feat(ocr): Add docTR OCR engine with metrics infrastructure
Add docTR as primary OCR engine with 2-tier sequential processing, OCR metrics tracking, and simplified engine selection. Features: - docTR OCR engine with light+medium preprocessing tiers - doctr_plus mode with early exit optimization (~65% fast path) - OCR metrics dashboard with per-engine statistics - User OCR preference persistence - Parallel worker pool for OCR processing - Cross-validation for extraction quality Engine options: tesseract, doctr, doctr_plus (recommended), paddleocr 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -263,6 +263,12 @@ const formatDate = (dateStr) => {
|
||||
|
||||
const getEngineClass = (engine) => {
|
||||
if (!engine) return ''
|
||||
// docTR engines
|
||||
if (engine === 'doctr-light') return 'doctr-fast'
|
||||
if (engine === 'doctr-medium') return 'doctr'
|
||||
if (engine === 'doctr-adaptive') return 'doctr-adaptive'
|
||||
if (engine.includes('doctr')) return 'doctr'
|
||||
// PaddleOCR engines
|
||||
if (engine === 'paddle-light') return 'fast'
|
||||
if (engine === 'paddle-adaptive') return 'adaptive'
|
||||
if (engine === 'adaptive-full') return 'full'
|
||||
@@ -273,13 +279,23 @@ const getEngineClass = (engine) => {
|
||||
|
||||
const getEngineIcon = (engine) => {
|
||||
if (!engine) return 'pi pi-cog'
|
||||
if (engine === 'paddle-light') return 'pi pi-bolt' // Fast/lightning
|
||||
if (engine === 'adaptive-full') return 'pi pi-cog' // Full pipeline
|
||||
// docTR - use bolt for fast modes
|
||||
if (engine === 'doctr-light') return 'pi pi-bolt'
|
||||
if (engine.includes('doctr')) return 'pi pi-bolt'
|
||||
// PaddleOCR
|
||||
if (engine === 'paddle-light') return 'pi pi-bolt'
|
||||
if (engine === 'adaptive-full') return 'pi pi-cog'
|
||||
return 'pi pi-cog'
|
||||
}
|
||||
|
||||
const getEngineLabel = (engine) => {
|
||||
if (!engine) return ''
|
||||
// docTR engines
|
||||
if (engine === 'doctr-light') return 'docTR Fast'
|
||||
if (engine === 'doctr-medium') return 'docTR Medium'
|
||||
if (engine === 'doctr-adaptive') return 'docTR Adaptive'
|
||||
if (engine.includes('doctr')) return 'docTR'
|
||||
// PaddleOCR engines
|
||||
if (engine === 'paddle-light') return 'Fast Mode (PaddleOCR)'
|
||||
if (engine === 'paddle-adaptive') return 'Adaptive (Paddle dual)'
|
||||
if (engine === 'adaptive-full') return 'Full Pipeline'
|
||||
@@ -615,6 +631,22 @@ const formatProcessingTime = (ms) => {
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
/* docTR engine styles */
|
||||
.ocr-engine-badge.doctr {
|
||||
background: #ede9fe;
|
||||
color: #5b21b6;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.doctr-fast {
|
||||
background: #d1fae5;
|
||||
color: #047857;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.doctr-adaptive {
|
||||
background: #e0e7ff;
|
||||
color: #3730a3;
|
||||
}
|
||||
|
||||
.ocr-message-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
|
||||
@@ -60,7 +60,14 @@
|
||||
optionValue="value"
|
||||
placeholder="Motor OCR"
|
||||
class="engine-selector dropdown-borderless"
|
||||
/>
|
||||
>
|
||||
<template #option="{ option }">
|
||||
<div class="engine-option">
|
||||
<span class="engine-label">{{ option.label }}</span>
|
||||
<span class="engine-desc">{{ option.desc }}</span>
|
||||
</div>
|
||||
</template>
|
||||
</Dropdown>
|
||||
<Button
|
||||
label="Proceseaza OCR"
|
||||
icon="pi pi-cog"
|
||||
@@ -77,9 +84,10 @@
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { ref, computed } from 'vue'
|
||||
import { ref, computed, onMounted, watch } from 'vue'
|
||||
import Dropdown from 'primevue/dropdown'
|
||||
import api from '@data-entry/services/api'
|
||||
import { useOCRSettingsStore } from '@data-entry/stores/ocrSettingsStore'
|
||||
|
||||
const emit = defineEmits(['ocr-result', 'file-selected', 'error'])
|
||||
|
||||
@@ -89,20 +97,73 @@ const isDragging = ref(false)
|
||||
const processing = ref(false)
|
||||
const error = ref(null)
|
||||
|
||||
// OCR Engine selection
|
||||
// OCR Settings Store - manages user preferences
|
||||
const ocrStore = useOCRSettingsStore()
|
||||
|
||||
// OCR Engine selection (synced with store)
|
||||
const selectedEngine = ref('auto')
|
||||
const engineOptions = [
|
||||
{ label: 'Auto (Recomandat)', value: 'auto' },
|
||||
{ label: 'PaddleOCR', value: 'paddleocr' },
|
||||
{ label: 'Tesseract', value: 'tesseract' }
|
||||
]
|
||||
|
||||
// Engine config - labels and descriptions for dropdown
|
||||
const engineConfig = {
|
||||
'auto': {
|
||||
label: 'Auto',
|
||||
desc: 'docTR→Paddle→Tess · General'
|
||||
},
|
||||
'doctr': {
|
||||
label: 'docTR',
|
||||
desc: 'Rapid, bună acuratețe'
|
||||
},
|
||||
'paddleocr': {
|
||||
label: 'PaddleOCR',
|
||||
desc: 'Cea mai bună calitate'
|
||||
},
|
||||
'tesseract': {
|
||||
label: 'Tesseract',
|
||||
desc: 'Cel mai rapid, calitate redusă'
|
||||
},
|
||||
'hybrid': {
|
||||
label: 'Hybrid',
|
||||
desc: 'docTR+Tess paralel · Recomandat'
|
||||
},
|
||||
'hybrid-quality': {
|
||||
label: 'Hybrid Calitate',
|
||||
desc: 'Paddle→docTR→Tess · Acuratețe max'
|
||||
},
|
||||
}
|
||||
|
||||
// Compute engine options from store's available engines
|
||||
const engineOptions = computed(() => {
|
||||
return ocrStore.availableEngines.map(engine => ({
|
||||
label: engineConfig[engine]?.label || engine,
|
||||
desc: engineConfig[engine]?.desc || '',
|
||||
value: engine
|
||||
}))
|
||||
})
|
||||
|
||||
// Load user's preferred engine on mount
|
||||
onMounted(async () => {
|
||||
await ocrStore.loadPreference()
|
||||
selectedEngine.value = ocrStore.preferredEngine
|
||||
console.log('[OCRUploadZone] Loaded user preference:', selectedEngine.value)
|
||||
})
|
||||
|
||||
// Save preference when user changes engine
|
||||
watch(selectedEngine, async (newEngine, oldEngine) => {
|
||||
if (oldEngine && newEngine !== oldEngine && ocrStore.initialized) {
|
||||
try {
|
||||
await ocrStore.setPreference(newEngine)
|
||||
console.log('[OCRUploadZone] Saved user preference:', newEngine)
|
||||
} catch (err) {
|
||||
console.error('[OCRUploadZone] Failed to save preference:', err)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Job queue state
|
||||
const jobId = ref(null)
|
||||
const queuePosition = ref(null)
|
||||
const estimatedWait = ref(null)
|
||||
const jobStatus = ref(null)
|
||||
let pollInterval = null
|
||||
|
||||
// Dynamic processing messages
|
||||
const processingMessage = computed(() => {
|
||||
@@ -223,26 +284,36 @@ const processOCR = async () => {
|
||||
}
|
||||
|
||||
const pollJobStatus = async (id) => {
|
||||
const maxAttempts = 120 // 2 minutes max (120 * 1s)
|
||||
let attempts = 0
|
||||
const LONG_POLL_TIMEOUT = 30 // seconds
|
||||
const MAX_TOTAL_TIME = 120 // 2 minutes max
|
||||
const startTime = Date.now()
|
||||
|
||||
const poll = async () => {
|
||||
try {
|
||||
const response = await api.get(`/ocr/jobs/${id}`)
|
||||
const job = response.data
|
||||
// Check if exceeded max total time
|
||||
const elapsed = (Date.now() - startTime) / 1000
|
||||
if (elapsed >= MAX_TOTAL_TIME) {
|
||||
processing.value = false
|
||||
error.value = 'Timeout - procesarea a durat prea mult'
|
||||
emit('error', error.value)
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
// Long-poll with 30s server timeout, 35s axios timeout
|
||||
const response = await api.get(`/ocr/jobs/${id}/wait`, {
|
||||
params: { timeout: LONG_POLL_TIMEOUT },
|
||||
timeout: (LONG_POLL_TIMEOUT + 5) * 1000
|
||||
})
|
||||
|
||||
const job = response.data
|
||||
jobStatus.value = job.status
|
||||
queuePosition.value = job.queue_position
|
||||
estimatedWait.value = job.estimated_wait_seconds
|
||||
|
||||
console.log('📊 OCR Poll:', { status: job.status, position: job.queue_position })
|
||||
console.log('📊 OCR Long-Poll:', { status: job.status, position: job.queue_position })
|
||||
|
||||
if (job.status === 'completed') {
|
||||
// Success! Emit result
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
|
||||
if (job.result) {
|
||||
console.log('✅ OCR Complete:', job.result)
|
||||
emit('ocr-result', {
|
||||
@@ -257,47 +328,36 @@ const pollJobStatus = async (id) => {
|
||||
}
|
||||
|
||||
if (job.status === 'failed') {
|
||||
// Failed
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
|
||||
error.value = job.error || 'OCR processing failed'
|
||||
emit('error', error.value)
|
||||
return
|
||||
}
|
||||
|
||||
// Still pending/processing - continue polling
|
||||
attempts++
|
||||
if (attempts >= maxAttempts) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
error.value = 'Timeout - procesarea a durat prea mult'
|
||||
emit('error', error.value)
|
||||
// Still pending/processing - long-poll again
|
||||
if (processing.value) {
|
||||
await poll()
|
||||
}
|
||||
|
||||
} catch (err) {
|
||||
console.error('🔴 Poll Error:', err.message)
|
||||
attempts++
|
||||
// Don't stop on poll errors - network might be flaky
|
||||
if (attempts >= maxAttempts) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
processing.value = false
|
||||
error.value = 'Eroare la verificarea starii job-ului'
|
||||
emit('error', error.value)
|
||||
// Handle timeout (normal for long-poll)
|
||||
if (err.code === 'ECONNABORTED' || err.message?.includes('timeout')) {
|
||||
console.log('⏱️ Long-poll timeout, retrying...')
|
||||
if (processing.value) {
|
||||
await poll()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Real error
|
||||
console.error('🔴 Poll Error:', err.message)
|
||||
processing.value = false
|
||||
error.value = 'Eroare la verificarea starii job-ului'
|
||||
emit('error', error.value)
|
||||
}
|
||||
}
|
||||
|
||||
// Initial poll immediately
|
||||
await poll()
|
||||
|
||||
// Continue polling every 1 second if still processing
|
||||
if (processing.value) {
|
||||
pollInterval = setInterval(poll, 1000)
|
||||
}
|
||||
}
|
||||
|
||||
const formatFileSize = (bytes) => {
|
||||
@@ -313,10 +373,7 @@ const reset = () => {
|
||||
queuePosition.value = null
|
||||
estimatedWait.value = null
|
||||
jobStatus.value = null
|
||||
if (pollInterval) {
|
||||
clearInterval(pollInterval)
|
||||
pollInterval = null
|
||||
}
|
||||
processing.value = false // Stop any ongoing long-poll
|
||||
if (fileInput.value) {
|
||||
fileInput.value.value = ''
|
||||
}
|
||||
@@ -415,7 +472,7 @@ defineExpose({ reset, processOCR })
|
||||
|
||||
/* Engine selector dropdown */
|
||||
.engine-selector {
|
||||
min-width: 150px;
|
||||
min-width: 180px;
|
||||
}
|
||||
|
||||
.engine-selector:deep(.p-dropdown-label) {
|
||||
@@ -428,6 +485,25 @@ defineExpose({ reset, processOCR })
|
||||
width: 2rem !important;
|
||||
}
|
||||
|
||||
/* Engine dropdown option with description */
|
||||
.engine-option {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 2px;
|
||||
padding: 4px 0;
|
||||
}
|
||||
|
||||
.engine-label {
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
color: #1e293b;
|
||||
}
|
||||
|
||||
.engine-desc {
|
||||
font-size: 0.75rem;
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
/* Processing state */
|
||||
.processing-state {
|
||||
display: flex;
|
||||
|
||||
Reference in New Issue
Block a user