feat(ocr): Implement persistent worker pool with SQLite job queue
Major OCR infrastructure improvements: - Add persistent SQLite-based job queue for OCR tasks - Implement worker pool with process isolation and auto-restart - Add OCR engine selector dropdown (Tesseract/PaddleOCR) in upload zone - Optimize Tesseract preprocessing based on benchmark results (8x faster) - Add recognize_cif_optimized() with multi-strategy CIF extraction - Add Romanian CIF checksum validation - Increase Telegram long polling timeout from 10s to 30s Squashed commits: - feat(ocr): Implement persistent worker pool with SQLite job queue - feat(ocr): Add OCR engine selector dropdown to upload zone - perf(telegram): Increase long polling timeout from 10s to 30s - perf(ocr): Optimize Tesseract preprocessing based on benchmark results 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -987,33 +987,63 @@ const rescanAttachmentOCR = async (attachment) => {
|
||||
// Create a File object from the blob
|
||||
const file = new File([response.data], attachment.filename, { type: attachment.mime_type })
|
||||
|
||||
// Send to OCR
|
||||
// Send to OCR job queue
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const ocrResponse = await apiService.post('/ocr/extract', formData, {
|
||||
headers: { 'Content-Type': 'multipart/form-data' },
|
||||
timeout: 60000,
|
||||
// Submit job
|
||||
const submitResponse = await apiService.post('/ocr/extract', formData, {
|
||||
timeout: 30000,
|
||||
})
|
||||
|
||||
if (ocrResponse.data.success) {
|
||||
const resultData = {
|
||||
...ocrResponse.data.data,
|
||||
_ocr_message: ocrResponse.data.message
|
||||
const jobId = submitResponse.data.job_id
|
||||
console.log('📋 OCR Rescan Job:', submitResponse.data)
|
||||
|
||||
// Poll for result
|
||||
const maxAttempts = 120
|
||||
let attempts = 0
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)) // Wait 1 second
|
||||
|
||||
const pollResponse = await apiService.get(`/ocr/jobs/${jobId}`)
|
||||
const job = pollResponse.data
|
||||
|
||||
if (job.status === 'completed') {
|
||||
if (job.result) {
|
||||
ocrData.value = {
|
||||
...job.result,
|
||||
_processing_time_ms: job.processing_time_ms
|
||||
}
|
||||
ocrCollapsed.value = false
|
||||
toast.add({
|
||||
severity: 'success',
|
||||
summary: 'OCR Procesare',
|
||||
detail: 'Datele au fost re-extrase din atasament',
|
||||
life: 3000,
|
||||
})
|
||||
}
|
||||
break
|
||||
}
|
||||
ocrData.value = resultData
|
||||
ocrCollapsed.value = false
|
||||
toast.add({
|
||||
severity: 'success',
|
||||
summary: 'OCR Procesare',
|
||||
detail: 'Datele au fost re-extrase din atasament',
|
||||
life: 3000,
|
||||
})
|
||||
} else {
|
||||
|
||||
if (job.status === 'failed') {
|
||||
toast.add({
|
||||
severity: 'error',
|
||||
summary: 'Eroare OCR',
|
||||
detail: job.error || 'Procesare OCR esuata',
|
||||
life: 5000,
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
attempts++
|
||||
}
|
||||
|
||||
if (attempts >= maxAttempts) {
|
||||
toast.add({
|
||||
severity: 'error',
|
||||
summary: 'Eroare OCR',
|
||||
detail: ocrResponse.data.message || 'Procesare OCR esuata',
|
||||
detail: 'Timeout - procesarea a durat prea mult',
|
||||
life: 5000,
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user