feat: Add OCR integration for automatic receipt data extraction
Implement Tesseract-based OCR to automatically extract vendor name, date, total amount, and VAT from uploaded receipt images/PDFs, reducing manual data entry and improving accuracy. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
116
data-entry-app/backend/app/services/image_preprocessor.py
Normal file
116
data-entry-app/backend/app/services/image_preprocessor.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Image preprocessing for optimal OCR results."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
try:
|
||||
import pdf2image
|
||||
PDF_AVAILABLE = True
|
||||
except ImportError:
|
||||
PDF_AVAILABLE = False
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
"""Preprocess receipt images for OCR."""
|
||||
|
||||
def load_image(self, path: Path) -> np.ndarray:
|
||||
"""Load image from file."""
|
||||
image = cv2.imread(str(path))
|
||||
if image is None:
|
||||
raise ValueError(f"Could not load image: {path}")
|
||||
return image
|
||||
|
||||
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
|
||||
"""Convert PDF to images."""
|
||||
if not PDF_AVAILABLE:
|
||||
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
|
||||
images = pdf2image.convert_from_path(str(path), dpi=dpi)
|
||||
return [np.array(img) for img in images]
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply preprocessing pipeline for thermal receipt images.
|
||||
|
||||
Pipeline:
|
||||
1. Convert to grayscale
|
||||
2. Resize if too small (min 1000px width)
|
||||
3. Deskew (straighten rotated text)
|
||||
4. Denoise (Non-local means)
|
||||
5. Adaptive thresholding (binarization)
|
||||
6. Morphological close (connect broken chars)
|
||||
"""
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image.copy()
|
||||
|
||||
# 2. Resize if too small
|
||||
height, width = gray.shape
|
||||
if width < 1000:
|
||||
scale = 1000 / width
|
||||
gray = cv2.resize(
|
||||
gray, None, fx=scale, fy=scale,
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Denoise
|
||||
denoised = cv2.fastNlMeansDenoising(
|
||||
gray, h=10,
|
||||
templateWindowSize=7,
|
||||
searchWindowSize=21
|
||||
)
|
||||
|
||||
# 5. Adaptive thresholding
|
||||
binary = cv2.adaptiveThreshold(
|
||||
denoised, 255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
blockSize=15, C=8
|
||||
)
|
||||
|
||||
# 6. Morphological close
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
return result
|
||||
|
||||
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Correct image rotation/skew using Hough lines."""
|
||||
edges = cv2.Canny(image, 50, 150, apertureSize=3)
|
||||
lines = cv2.HoughLinesP(
|
||||
edges, 1, np.pi / 180,
|
||||
threshold=100, minLineLength=100, maxLineGap=10
|
||||
)
|
||||
|
||||
if lines is None:
|
||||
return image
|
||||
|
||||
angles = []
|
||||
for line in lines:
|
||||
x1, y1, x2, y2 = line[0]
|
||||
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
|
||||
if abs(angle) < 45:
|
||||
angles.append(angle)
|
||||
|
||||
if not angles:
|
||||
return image
|
||||
|
||||
median_angle = np.median(angles)
|
||||
if abs(median_angle) < 0.5:
|
||||
return image
|
||||
|
||||
h, w = image.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
||||
return cv2.warpAffine(
|
||||
image, M, (w, h),
|
||||
flags=cv2.INTER_CUBIC,
|
||||
borderMode=cv2.BORDER_REPLICATE
|
||||
)
|
||||
Reference in New Issue
Block a user