feat(ocr): Implement persistent worker pool with SQLite job queue
Major OCR infrastructure improvements: - Add persistent SQLite-based job queue for OCR tasks - Implement worker pool with process isolation and auto-restart - Add OCR engine selector dropdown (Tesseract/PaddleOCR) in upload zone - Optimize Tesseract preprocessing based on benchmark results (8x faster) - Add recognize_cif_optimized() with multi-strategy CIF extraction - Add Romanian CIF checksum validation - Increase Telegram long polling timeout from 10s to 30s Squashed commits: - feat(ocr): Implement persistent worker pool with SQLite job queue - feat(ocr): Add OCR engine selector dropdown to upload zone - perf(telegram): Increase long polling timeout from 10s to 30s - perf(ocr): Optimize Tesseract preprocessing based on benchmark results 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -21,8 +21,8 @@
|
||||
style="width: 50px; height: 50px"
|
||||
strokeWidth="4"
|
||||
/>
|
||||
<p class="processing-text">Se proceseaza imaginea...</p>
|
||||
<p class="processing-subtext">Acest proces poate dura cateva secunde</p>
|
||||
<p class="processing-text">{{ processingMessage }}</p>
|
||||
<p class="processing-subtext">{{ processingSubtext }}</p>
|
||||
</div>
|
||||
|
||||
<div v-else-if="selectedFile" class="file-selected-state">
|
||||
@@ -52,6 +52,15 @@
|
||||
size="small"
|
||||
@click="triggerFileInput"
|
||||
/>
|
||||
<!-- OCR Engine Selector -->
|
||||
<Dropdown
|
||||
v-model="selectedEngine"
|
||||
:options="engineOptions"
|
||||
optionLabel="label"
|
||||
optionValue="value"
|
||||
placeholder="Motor OCR"
|
||||
class="engine-selector dropdown-borderless"
|
||||
/>
|
||||
<Button
|
||||
label="Proceseaza OCR"
|
||||
icon="pi pi-cog"
|
||||
@@ -68,7 +77,8 @@
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { ref } from 'vue'
|
||||
import { ref, computed } from 'vue'
|
||||
import Dropdown from 'primevue/dropdown'
|
||||
import api from '@data-entry/services/api'
|
||||
|
||||
const emit = defineEmits(['ocr-result', 'file-selected', 'error'])
|
||||
@@ -79,6 +89,42 @@ const isDragging = ref(false)
|
||||
const processing = ref(false)
|
||||
const error = ref(null)
|
||||
|
||||
// OCR Engine selection
|
||||
const selectedEngine = ref('auto')
|
||||
const engineOptions = [
|
||||
{ label: 'Auto (Recomandat)', value: 'auto' },
|
||||
{ label: 'PaddleOCR', value: 'paddleocr' },
|
||||
{ label: 'Tesseract', value: 'tesseract' }
|
||||
]
|
||||
|
||||
// Job queue state
|
||||
const jobId = ref(null)
|
||||
const queuePosition = ref(null)
|
||||
const estimatedWait = ref(null)
|
||||
const jobStatus = ref(null)
|
||||
let pollInterval = null
|
||||
|
||||
// Dynamic processing messages
|
||||
const processingMessage = computed(() => {
|
||||
if (jobStatus.value === 'pending' && queuePosition.value > 0) {
|
||||
return `In coada... Pozitia ${queuePosition.value}`
|
||||
}
|
||||
if (jobStatus.value === 'processing') {
|
||||
return 'Se proceseaza imaginea...'
|
||||
}
|
||||
return 'Se trimite...'
|
||||
})
|
||||
|
||||
const processingSubtext = computed(() => {
|
||||
if (jobStatus.value === 'pending' && estimatedWait.value > 0) {
|
||||
return `Timp estimat: ~${estimatedWait.value} secunde`
|
||||
}
|
||||
if (jobStatus.value === 'processing') {
|
||||
return 'Extragere date cu OCR'
|
||||
}
|
||||
return 'Asteptati...'
|
||||
})
|
||||
|
||||
const onDragOver = () => {
|
||||
isDragging.value = true
|
||||
}
|
||||
@@ -130,37 +176,130 @@ const processOCR = async () => {
|
||||
|
||||
processing.value = true
|
||||
error.value = null
|
||||
jobId.value = null
|
||||
queuePosition.value = null
|
||||
estimatedWait.value = null
|
||||
jobStatus.value = 'submitting'
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', selectedFile.value)
|
||||
|
||||
// Don't set Content-Type header - let browser set it with boundary for multipart/form-data
|
||||
// The API interceptor will add Authorization header automatically
|
||||
const response = await api.post('/ocr/extract', formData, {
|
||||
timeout: 60000, // 60 second timeout for OCR
|
||||
console.log('🔍 OCR Submit:', {
|
||||
fileName: selectedFile.value?.name,
|
||||
fileSize: selectedFile.value?.size,
|
||||
fileType: selectedFile.value?.type,
|
||||
engine: selectedEngine.value
|
||||
})
|
||||
|
||||
if (response.data.success) {
|
||||
// Include the OCR message in the data for debugging
|
||||
const resultData = {
|
||||
...response.data.data,
|
||||
_ocr_message: response.data.message
|
||||
}
|
||||
emit('ocr-result', resultData)
|
||||
} else {
|
||||
error.value = response.data.message || 'OCR processing failed'
|
||||
emit('error', error.value)
|
||||
}
|
||||
// Step 1: Submit job to queue (returns immediately)
|
||||
// Include engine parameter in query string
|
||||
const submitResponse = await api.post(`/ocr/extract?engine=${selectedEngine.value}`, formData, {
|
||||
timeout: 30000, // 30s timeout for submission
|
||||
})
|
||||
|
||||
const job = submitResponse.data
|
||||
jobId.value = job.job_id
|
||||
queuePosition.value = job.queue_position
|
||||
estimatedWait.value = job.estimated_wait_seconds
|
||||
jobStatus.value = job.status
|
||||
|
||||
console.log('📋 OCR Job Created:', job)
|
||||
|
||||
// Step 2: Start polling for result
|
||||
await pollJobStatus(job.job_id)
|
||||
|
||||
} catch (err) {
|
||||
console.error('🔴 OCR Error:', {
|
||||
message: err.message,
|
||||
code: err.code,
|
||||
response: err.response?.data
|
||||
})
|
||||
const message = err.response?.data?.detail || err.message || 'Eroare la procesarea OCR'
|
||||
error.value = message
|
||||
emit('error', message)
|
||||
} finally {
|
||||
processing.value = false
|
||||
}
|
||||
}
|
||||
|
||||
const pollJobStatus = async (id) => {
|
||||
const maxAttempts = 120 // 2 minutes max (120 * 1s)
|
||||
let attempts = 0
|
||||
|
||||
const poll = async () => {
|
||||
try {
|
||||
const response = await api.get(`/ocr/jobs/${id}`)
|
||||
const job = response.data
|
||||
|
||||
jobStatus.value = job.status
|
||||
queuePosition.value = job.queue_position
|
||||
estimatedWait.value = job.estimated_wait_seconds
|
||||
|
||||
console.log('📊 OCR Poll:', { status: job.status, position: job.queue_position })
|
||||
|
||||
if (job.status === 'completed') {
|
||||
// Success! Emit result
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
|
||||
if (job.result) {
|
||||
console.log('✅ OCR Complete:', job.result)
|
||||
emit('ocr-result', {
|
||||
...job.result,
|
||||
_processing_time_ms: job.processing_time_ms
|
||||
})
|
||||
} else {
|
||||
error.value = 'OCR completed but no result returned'
|
||||
emit('error', error.value)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (job.status === 'failed') {
|
||||
// Failed
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
|
||||
error.value = job.error || 'OCR processing failed'
|
||||
emit('error', error.value)
|
||||
return
|
||||
}
|
||||
|
||||
// Still pending/processing - continue polling
|
||||
attempts++
|
||||
if (attempts >= maxAttempts) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
error.value = 'Timeout - procesarea a durat prea mult'
|
||||
emit('error', error.value)
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
console.error('🔴 Poll Error:', err.message)
|
||||
attempts++
|
||||
// Don't stop on poll errors - network might be flaky
|
||||
if (attempts >= maxAttempts) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
error.value = 'Eroare la verificarea starii job-ului'
|
||||
emit('error', error.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initial poll immediately
|
||||
await poll()
|
||||
|
||||
// Continue polling every 1 second if still processing
|
||||
if (processing.value) {
|
||||
pollInterval = setInterval(poll, 1000)
|
||||
}
|
||||
}
|
||||
|
||||
const formatFileSize = (bytes) => {
|
||||
if (bytes < 1024) return bytes + ' B'
|
||||
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB'
|
||||
@@ -170,6 +309,14 @@ const formatFileSize = (bytes) => {
|
||||
const reset = () => {
|
||||
selectedFile.value = null
|
||||
error.value = null
|
||||
jobId.value = null
|
||||
queuePosition.value = null
|
||||
estimatedWait.value = null
|
||||
jobStatus.value = null
|
||||
if (pollInterval) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
}
|
||||
if (fileInput.value) {
|
||||
fileInput.value.value = ''
|
||||
}
|
||||
@@ -262,6 +409,23 @@ defineExpose({ reset, processOCR })
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.75rem;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
/* Engine selector dropdown */
|
||||
.engine-selector {
|
||||
min-width: 150px;
|
||||
}
|
||||
|
||||
.engine-selector:deep(.p-dropdown-label) {
|
||||
padding: 0.5rem 0.75rem !important;
|
||||
font-size: 0.875rem;
|
||||
color: #475569;
|
||||
}
|
||||
|
||||
.engine-selector:deep(.p-dropdown-trigger) {
|
||||
width: 2rem !important;
|
||||
}
|
||||
|
||||
/* Processing state */
|
||||
@@ -276,6 +440,7 @@ defineExpose({ reset, processOCR })
|
||||
font-size: 1rem;
|
||||
color: #475569;
|
||||
margin: 0.5rem 0 0 0;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.processing-subtext {
|
||||
|
||||
Reference in New Issue
Block a user