feat: Improve OCR adaptive pipeline with early exit and better pattern matching
- Add adaptive 3-step OCR pipeline with early exit when all 5 fields found - Add pattern for "C. I. F." with spaces (OCR artifact from PaddleOCR) - Add pattern for YYYY. MM. DD date format with spaces (OMV/Petrom receipts) - Add pattern for "OTAL TAXE" with T cut off and reversed amount position - Make TVA rate pattern more flexible (code letter optional, handle "-21%") - Replace logger.info with print(flush=True) for better debugging visibility - Improve OCRPreview.vue to show extraction progress and raw OCR text 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -23,37 +23,57 @@ class ImagePreprocessor:
|
||||
raise ValueError(f"Could not load image: {path}")
|
||||
return image
|
||||
|
||||
def pdf_to_images(self, path: Path, dpi: int = 400) -> List[np.ndarray]:
|
||||
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
|
||||
"""
|
||||
Convert PDF to images with high DPI for better OCR.
|
||||
Convert PDF to images.
|
||||
|
||||
Args:
|
||||
path: Path to PDF file
|
||||
dpi: Resolution (400 recommended for receipts, higher = better quality but slower)
|
||||
dpi: Resolution (300 = fast & good quality, 400 = better but slower)
|
||||
"""
|
||||
if not PDF_AVAILABLE:
|
||||
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
|
||||
# Use 400 DPI for better text recognition on thermal receipts
|
||||
images = pdf2image.convert_from_path(str(path), dpi=dpi)
|
||||
return [np.array(img) for img in images]
|
||||
|
||||
def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
|
||||
"""
|
||||
Apply preprocessing pipeline for thermal receipt images.
|
||||
Apply LIGHT preprocessing - better for clear PDFs.
|
||||
Heavy binarization can destroy text on clear images.
|
||||
"""
|
||||
return self.preprocess_light(image)
|
||||
|
||||
Pipeline:
|
||||
1. Convert to grayscale
|
||||
2. Resize if too small (min 1500px width for high quality)
|
||||
3. Deskew (straighten rotated text)
|
||||
4. Contrast enhancement (CLAHE)
|
||||
5. Denoise (Non-local means)
|
||||
6. Sharpening (for clearer text edges)
|
||||
7. Adaptive thresholding (binarization)
|
||||
8. Morphological operations (connect broken chars)
|
||||
def preprocess_light(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Light preprocessing for CLEAR images (PDFs, good scans).
|
||||
Preserves original quality, only enhances contrast.
|
||||
"""
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image.copy()
|
||||
|
||||
Args:
|
||||
image: Input image (BGR or grayscale)
|
||||
high_quality: If True, apply more aggressive preprocessing
|
||||
# 2. Resize if too small
|
||||
height, width = gray.shape
|
||||
if width < 1500:
|
||||
scale = 1500 / width
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Light contrast enhancement only
|
||||
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# NO binarization, NO morphological ops - preserve original quality
|
||||
return enhanced
|
||||
|
||||
def preprocess_heavy(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Heavy preprocessing for FADED thermal receipts.
|
||||
Aggressive binarization to recover faded text.
|
||||
"""
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
@@ -63,57 +83,48 @@ class ImagePreprocessor:
|
||||
|
||||
# 2. Resize if too small (larger = better OCR)
|
||||
height, width = gray.shape
|
||||
min_width = 1500 if high_quality else 1000
|
||||
if width < min_width:
|
||||
scale = min_width / width
|
||||
gray = cv2.resize(
|
||||
gray, None, fx=scale, fy=scale,
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
if width < 1500:
|
||||
scale = 1500 / width
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Contrast enhancement with CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
||||
# 4. Contrast enhancement with CLAHE
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# 5. Denoise (slightly less aggressive to preserve text details)
|
||||
denoised = cv2.fastNlMeansDenoising(
|
||||
enhanced, h=8, # Lower h = preserve more details
|
||||
templateWindowSize=7,
|
||||
searchWindowSize=21
|
||||
)
|
||||
# 5. Denoise
|
||||
denoised = cv2.fastNlMeansDenoising(enhanced, h=8, templateWindowSize=7, searchWindowSize=21)
|
||||
|
||||
# 6. Sharpening to enhance text edges
|
||||
if high_quality:
|
||||
# Unsharp mask for better text clarity
|
||||
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
|
||||
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
|
||||
else:
|
||||
sharpened = denoised
|
||||
# 6. Sharpening
|
||||
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
|
||||
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
|
||||
|
||||
# 7. Adaptive thresholding with optimized parameters
|
||||
# 7. Adaptive thresholding (binarization)
|
||||
binary = cv2.adaptiveThreshold(
|
||||
sharpened, 255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
blockSize=11, # Smaller block = better for small text
|
||||
C=5 # Lower C = darker result, better for faded receipts
|
||||
blockSize=11, C=5
|
||||
)
|
||||
|
||||
# 8. Morphological operations
|
||||
# Close small gaps in characters
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
|
||||
|
||||
# Optional: Remove small noise spots
|
||||
if high_quality:
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
|
||||
result = cv2.morphologyEx(result, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
return result
|
||||
|
||||
def get_all_variants(self, image: np.ndarray) -> List[np.ndarray]:
|
||||
"""
|
||||
Generate 2 preprocessing variants for OCR (fast mode).
|
||||
Returns: [light_processed, heavy_processed]
|
||||
"""
|
||||
return [
|
||||
self.preprocess_light(image),
|
||||
self.preprocess_heavy(image),
|
||||
]
|
||||
|
||||
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Correct image rotation/skew using Hough lines."""
|
||||
edges = cv2.Canny(image, 50, 150, apertureSize=3)
|
||||
|
||||
Reference in New Issue
Block a user