feat: Improve OCR adaptive pipeline with early exit and better pattern matching

- Add adaptive 3-step OCR pipeline with early exit when all 5 fields found
- Add pattern for "C. I. F." with spaces (OCR artifact from PaddleOCR)
- Add pattern for YYYY. MM. DD date format with spaces (OMV/Petrom receipts)
- Add pattern for "OTAL TAXE" with T cut off and reversed amount position
- Make TVA rate pattern more flexible (code letter optional, handle "-21%")
- Replace logger.info with print(flush=True) for better debugging visibility
- Improve OCRPreview.vue to show extraction progress and raw OCR text

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-13 01:54:52 +02:00
parent 6c3dd89f6d
commit 9f06482681
9 changed files with 952 additions and 116 deletions

View File

@@ -106,14 +106,27 @@
<!-- Raw Text Toggle -->
<div class="raw-text-section" v-if="data.raw_text">
<Button
:label="showRawText ? 'Ascunde text OCR' : 'Arata text OCR'"
:icon="showRawText ? 'pi pi-eye-slash' : 'pi pi-eye'"
severity="secondary"
size="small"
text
@click="showRawText = !showRawText"
/>
<div class="raw-text-header">
<Button
:label="showRawText ? 'Ascunde text OCR' : 'Arata text OCR'"
:icon="showRawText ? 'pi pi-eye-slash' : 'pi pi-eye'"
severity="secondary"
size="small"
text
@click="showRawText = !showRawText"
/>
<span v-if="data.ocr_engine" class="ocr-engine-badge" :class="getEngineClass(data.ocr_engine)">
<i :class="getEngineIcon(data.ocr_engine)"></i>
{{ getEngineLabel(data.ocr_engine) }}
</span>
<span v-if="data._ocr_message" class="ocr-message-badge" :class="getMessageClass(data._ocr_message)">
{{ data._ocr_message }}
</span>
<span v-if="data.processing_time_ms" class="ocr-time-badge">
<i class="pi pi-clock"></i>
{{ formatProcessingTime(data.processing_time_ms) }}
</span>
</div>
<div v-if="showRawText" class="raw-text">
<pre>{{ data.raw_text }}</pre>
</div>
@@ -168,6 +181,45 @@ const formatDate = (dateStr) => {
year: 'numeric'
})
}
const getEngineClass = (engine) => {
if (!engine) return ''
if (engine === 'paddle-light') return 'fast'
if (engine === 'paddle-adaptive') return 'adaptive'
if (engine === 'adaptive-full') return 'full'
if (engine.includes('paddle')) return 'paddleocr'
if (engine.includes('tesseract')) return 'tesseract'
return ''
}
const getEngineIcon = (engine) => {
if (!engine) return 'pi pi-cog'
if (engine === 'paddle-light') return 'pi pi-bolt' // Fast/lightning
if (engine === 'adaptive-full') return 'pi pi-cog' // Full pipeline
return 'pi pi-cog'
}
const getEngineLabel = (engine) => {
if (!engine) return ''
if (engine === 'paddle-light') return 'Fast Mode (PaddleOCR)'
if (engine === 'paddle-adaptive') return 'Adaptive (Paddle dual)'
if (engine === 'adaptive-full') return 'Full Pipeline'
if (engine.includes('paddle')) return 'PaddleOCR'
if (engine.includes('tesseract')) return 'Tesseract'
return engine
}
const getMessageClass = (message) => {
if (!message) return ''
if (message.includes('fast mode')) return 'fast-mode'
if (message.includes('full pipeline')) return 'full-pipeline'
return ''
}
const formatProcessingTime = (ms) => {
if (ms < 1000) return `${ms}ms`
return `${(ms / 1000).toFixed(1)}s`
}
</script>
<style scoped>
@@ -305,6 +357,82 @@ const formatDate = (dateStr) => {
border-top: 1px dashed #86efac;
}
.raw-text-header {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 0.75rem;
}
.ocr-engine-badge {
display: inline-flex;
align-items: center;
gap: 0.25rem;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
font-weight: 500;
}
.ocr-engine-badge.paddleocr {
background: #dbeafe;
color: #1e40af;
}
.ocr-engine-badge.tesseract {
background: #fef3c7;
color: #92400e;
}
.ocr-engine-badge.fast {
background: #dcfce7;
color: #166534;
}
.ocr-engine-badge.adaptive {
background: #dbeafe;
color: #1e40af;
}
.ocr-engine-badge.full {
background: #fef3c7;
color: #92400e;
}
.ocr-message-badge {
display: inline-flex;
align-items: center;
gap: 0.25rem;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
font-weight: 500;
background: #f1f5f9;
color: #475569;
}
.ocr-message-badge.fast-mode {
background: #dcfce7;
color: #166534;
}
.ocr-message-badge.full-pipeline {
background: #fef3c7;
color: #92400e;
}
.ocr-time-badge {
display: inline-flex;
align-items: center;
gap: 0.25rem;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
font-weight: 600;
background: #e0e7ff;
color: #3730a3;
}
.raw-text {
margin-top: 0.5rem;
padding: 0.75rem;

View File

@@ -143,7 +143,12 @@ const processOCR = async () => {
})
if (response.data.success) {
emit('ocr-result', response.data.data)
// Include the OCR message in the data for debugging
const resultData = {
...response.data.data,
_ocr_message: response.data.message
}
emit('ocr-result', resultData)
} else {
error.value = response.data.message || 'OCR processing failed'
emit('error', error.value)