feat: Improve OCR adaptive pipeline with early exit and better pattern matching
- Add adaptive 3-step OCR pipeline with early exit when all 5 fields found - Add pattern for "C. I. F." with spaces (OCR artifact from PaddleOCR) - Add pattern for YYYY. MM. DD date format with spaces (OMV/Petrom receipts) - Add pattern for "OTAL TAXE" with T cut off and reversed amount position - Make TVA rate pattern more flexible (code letter optional, handle "-21%") - Replace logger.info with print(flush=True) for better debugging visibility - Improve OCRPreview.vue to show extraction progress and raw OCR text 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -106,14 +106,27 @@
|
||||
|
||||
<!-- Raw Text Toggle -->
|
||||
<div class="raw-text-section" v-if="data.raw_text">
|
||||
<Button
|
||||
:label="showRawText ? 'Ascunde text OCR' : 'Arata text OCR'"
|
||||
:icon="showRawText ? 'pi pi-eye-slash' : 'pi pi-eye'"
|
||||
severity="secondary"
|
||||
size="small"
|
||||
text
|
||||
@click="showRawText = !showRawText"
|
||||
/>
|
||||
<div class="raw-text-header">
|
||||
<Button
|
||||
:label="showRawText ? 'Ascunde text OCR' : 'Arata text OCR'"
|
||||
:icon="showRawText ? 'pi pi-eye-slash' : 'pi pi-eye'"
|
||||
severity="secondary"
|
||||
size="small"
|
||||
text
|
||||
@click="showRawText = !showRawText"
|
||||
/>
|
||||
<span v-if="data.ocr_engine" class="ocr-engine-badge" :class="getEngineClass(data.ocr_engine)">
|
||||
<i :class="getEngineIcon(data.ocr_engine)"></i>
|
||||
{{ getEngineLabel(data.ocr_engine) }}
|
||||
</span>
|
||||
<span v-if="data._ocr_message" class="ocr-message-badge" :class="getMessageClass(data._ocr_message)">
|
||||
{{ data._ocr_message }}
|
||||
</span>
|
||||
<span v-if="data.processing_time_ms" class="ocr-time-badge">
|
||||
<i class="pi pi-clock"></i>
|
||||
{{ formatProcessingTime(data.processing_time_ms) }}
|
||||
</span>
|
||||
</div>
|
||||
<div v-if="showRawText" class="raw-text">
|
||||
<pre>{{ data.raw_text }}</pre>
|
||||
</div>
|
||||
@@ -168,6 +181,45 @@ const formatDate = (dateStr) => {
|
||||
year: 'numeric'
|
||||
})
|
||||
}
|
||||
|
||||
const getEngineClass = (engine) => {
|
||||
if (!engine) return ''
|
||||
if (engine === 'paddle-light') return 'fast'
|
||||
if (engine === 'paddle-adaptive') return 'adaptive'
|
||||
if (engine === 'adaptive-full') return 'full'
|
||||
if (engine.includes('paddle')) return 'paddleocr'
|
||||
if (engine.includes('tesseract')) return 'tesseract'
|
||||
return ''
|
||||
}
|
||||
|
||||
const getEngineIcon = (engine) => {
|
||||
if (!engine) return 'pi pi-cog'
|
||||
if (engine === 'paddle-light') return 'pi pi-bolt' // Fast/lightning
|
||||
if (engine === 'adaptive-full') return 'pi pi-cog' // Full pipeline
|
||||
return 'pi pi-cog'
|
||||
}
|
||||
|
||||
const getEngineLabel = (engine) => {
|
||||
if (!engine) return ''
|
||||
if (engine === 'paddle-light') return 'Fast Mode (PaddleOCR)'
|
||||
if (engine === 'paddle-adaptive') return 'Adaptive (Paddle dual)'
|
||||
if (engine === 'adaptive-full') return 'Full Pipeline'
|
||||
if (engine.includes('paddle')) return 'PaddleOCR'
|
||||
if (engine.includes('tesseract')) return 'Tesseract'
|
||||
return engine
|
||||
}
|
||||
|
||||
const getMessageClass = (message) => {
|
||||
if (!message) return ''
|
||||
if (message.includes('fast mode')) return 'fast-mode'
|
||||
if (message.includes('full pipeline')) return 'full-pipeline'
|
||||
return ''
|
||||
}
|
||||
|
||||
const formatProcessingTime = (ms) => {
|
||||
if (ms < 1000) return `${ms}ms`
|
||||
return `${(ms / 1000).toFixed(1)}s`
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
@@ -305,6 +357,82 @@ const formatDate = (dateStr) => {
|
||||
border-top: 1px dashed #86efac;
|
||||
}
|
||||
|
||||
.raw-text-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.ocr-engine-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.paddleocr {
|
||||
background: #dbeafe;
|
||||
color: #1e40af;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.tesseract {
|
||||
background: #fef3c7;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.fast {
|
||||
background: #dcfce7;
|
||||
color: #166534;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.adaptive {
|
||||
background: #dbeafe;
|
||||
color: #1e40af;
|
||||
}
|
||||
|
||||
.ocr-engine-badge.full {
|
||||
background: #fef3c7;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.ocr-message-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
background: #f1f5f9;
|
||||
color: #475569;
|
||||
}
|
||||
|
||||
.ocr-message-badge.fast-mode {
|
||||
background: #dcfce7;
|
||||
color: #166534;
|
||||
}
|
||||
|
||||
.ocr-message-badge.full-pipeline {
|
||||
background: #fef3c7;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.ocr-time-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
background: #e0e7ff;
|
||||
color: #3730a3;
|
||||
}
|
||||
|
||||
.raw-text {
|
||||
margin-top: 0.5rem;
|
||||
padding: 0.75rem;
|
||||
|
||||
@@ -143,7 +143,12 @@ const processOCR = async () => {
|
||||
})
|
||||
|
||||
if (response.data.success) {
|
||||
emit('ocr-result', response.data.data)
|
||||
// Include the OCR message in the data for debugging
|
||||
const resultData = {
|
||||
...response.data.data,
|
||||
_ocr_message: response.data.message
|
||||
}
|
||||
emit('ocr-result', resultData)
|
||||
} else {
|
||||
error.value = response.data.message || 'OCR processing failed'
|
||||
emit('error', error.value)
|
||||
|
||||
Reference in New Issue
Block a user