feat: Migrate to ultrathin monolith architecture
Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified
architecture with single backend and frontend:
Backend Changes:
- Unified FastAPI backend at backend/ with modular structure
- Modules: reports, data_entry, telegram in backend/modules/
- Centralized config.py and main.py with all routers registered
- Single worker mode (--workers 1) for Telegram bot compatibility
- Shared Oracle connection pool and JWT authentication
- Unified requirements.txt and environment configuration
Frontend Changes:
- Single Vue.js SPA with module-based routing
- Unified frontend at src/ with modules in src/modules/{reports,data-entry}/
- Shared components and stores in src/shared/
- Error boundaries for module isolation
- Dual API proxy in Vite for module communication
Infrastructure:
- New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh
- Environment templates: .env.dev.example, .env.test.example, .env.prod.example
- Updated deployment scripts for Windows IIS
- Simplified SSH tunnel management
Documentation:
- Comprehensive CLAUDE.md with architecture overview
- Module-specific docs in docs/{data-entry,telegram}/
- Architecture decision records in docs/ARCHITECTURE-DECISIONS.md
- Deployment guides consolidated in deployment/windows/docs/
This migration reduces complexity, improves maintainability, and enables easier
deployment while maintaining all existing functionality.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
270
backend/modules/data_entry/services/image_preprocessor.py
Normal file
270
backend/modules/data_entry/services/image_preprocessor.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""Image preprocessing for optimal OCR results."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
try:
|
||||
import pdf2image
|
||||
PDF_AVAILABLE = True
|
||||
except ImportError:
|
||||
PDF_AVAILABLE = False
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
"""Preprocess receipt images for OCR."""
|
||||
|
||||
def _add_safety_padding(self, image: np.ndarray, padding: int = 50) -> np.ndarray:
|
||||
"""Add white padding around image to protect edge content during rotation.
|
||||
|
||||
This prevents left/right margin truncation in OCR by ensuring text near
|
||||
edges isn't lost during deskew rotation.
|
||||
"""
|
||||
if len(image.shape) == 2:
|
||||
# Grayscale
|
||||
return cv2.copyMakeBorder(
|
||||
image, padding, padding, padding, padding,
|
||||
cv2.BORDER_CONSTANT, value=255
|
||||
)
|
||||
else:
|
||||
# Color (BGR)
|
||||
return cv2.copyMakeBorder(
|
||||
image, padding, padding, padding, padding,
|
||||
cv2.BORDER_CONSTANT, value=(255, 255, 255)
|
||||
)
|
||||
|
||||
def load_image(self, path: Path) -> np.ndarray:
|
||||
"""Load image from file."""
|
||||
image = cv2.imread(str(path))
|
||||
if image is None:
|
||||
raise ValueError(f"Could not load image: {path}")
|
||||
return image
|
||||
|
||||
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
|
||||
"""
|
||||
Convert PDF to images.
|
||||
|
||||
Args:
|
||||
path: Path to PDF file
|
||||
dpi: Resolution (300 = fast & good quality, 400 = better but slower)
|
||||
"""
|
||||
if not PDF_AVAILABLE:
|
||||
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
|
||||
images = pdf2image.convert_from_path(str(path), dpi=dpi)
|
||||
return [np.array(img) for img in images]
|
||||
|
||||
def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
|
||||
"""
|
||||
Apply LIGHT preprocessing - better for clear PDFs.
|
||||
Heavy binarization can destroy text on clear images.
|
||||
"""
|
||||
return self.preprocess_light(image)
|
||||
|
||||
def preprocess_light(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Light preprocessing for CLEAR images (PDFs, good scans).
|
||||
Preserves original quality, only enhances contrast.
|
||||
"""
|
||||
# 0. Add safety padding to protect edge content during deskew rotation
|
||||
image = self._add_safety_padding(image)
|
||||
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image.copy()
|
||||
|
||||
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
|
||||
height, width = gray.shape
|
||||
max_side = max(height, width)
|
||||
if max_side > 4000:
|
||||
scale = 4000 / max_side
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||
height, width = gray.shape
|
||||
|
||||
# 2b. Scale UP if too small
|
||||
if width < 1500:
|
||||
scale = 1500 / width
|
||||
# Ensure we don't exceed 4000px after upscaling
|
||||
new_width = int(width * scale)
|
||||
new_height = int(height * scale)
|
||||
if max(new_width, new_height) > 4000:
|
||||
scale = 4000 / max(new_width, new_height)
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Light contrast enhancement only
|
||||
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# NO binarization, NO morphological ops - preserve original quality
|
||||
return enhanced
|
||||
|
||||
def preprocess_heavy(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Heavy preprocessing for FADED thermal receipts.
|
||||
Aggressive binarization to recover faded text.
|
||||
"""
|
||||
# 0. Add safety padding to protect edge content during deskew rotation
|
||||
image = self._add_safety_padding(image)
|
||||
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image.copy()
|
||||
|
||||
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
|
||||
height, width = gray.shape
|
||||
max_side = max(height, width)
|
||||
if max_side > 4000:
|
||||
scale = 4000 / max_side
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||
height, width = gray.shape
|
||||
|
||||
# 2b. Scale UP if too small (larger = better OCR)
|
||||
if width < 1500:
|
||||
scale = 1500 / width
|
||||
# Ensure we don't exceed 4000px after upscaling
|
||||
new_width = int(width * scale)
|
||||
new_height = int(height * scale)
|
||||
if max(new_width, new_height) > 4000:
|
||||
scale = 4000 / max(new_width, new_height)
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Contrast enhancement with CLAHE
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# 5. Denoise
|
||||
denoised = cv2.fastNlMeansDenoising(enhanced, h=8, templateWindowSize=7, searchWindowSize=21)
|
||||
|
||||
# 6. Sharpening
|
||||
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
|
||||
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
|
||||
|
||||
# 7. Adaptive thresholding (binarization)
|
||||
binary = cv2.adaptiveThreshold(
|
||||
sharpened, 255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
blockSize=11, C=5
|
||||
)
|
||||
|
||||
# 8. Morphological operations
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
|
||||
|
||||
return result
|
||||
|
||||
def preprocess_for_tesseract(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Tesseract-optimized preprocessing.
|
||||
Tesseract works best with:
|
||||
- Clean black text on white background (binarized)
|
||||
- High DPI (scale up small images)
|
||||
- Otsu thresholding (better than adaptive for clean documents)
|
||||
"""
|
||||
# 0. Add safety padding to protect edge content during deskew rotation
|
||||
image = self._add_safety_padding(image)
|
||||
|
||||
# 1. Grayscale
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image.copy()
|
||||
|
||||
# 2. Scale for optimal Tesseract (target ~2000px width for receipts)
|
||||
height, width = gray.shape
|
||||
if width < 2000:
|
||||
scale = 2000 / width
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
elif width > 3000:
|
||||
scale = 3000 / width
|
||||
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||
|
||||
# 3. Deskew
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# 4. Strong contrast enhancement
|
||||
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# 5. Denoise before binarization
|
||||
denoised = cv2.fastNlMeansDenoising(enhanced, h=10, templateWindowSize=7, searchWindowSize=21)
|
||||
|
||||
# 6. Otsu binarization (better than adaptive for clean PDFs)
|
||||
_, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# 7. Light morphological cleanup
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
|
||||
cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
return cleaned
|
||||
|
||||
def get_all_variants(self, image: np.ndarray) -> List[np.ndarray]:
|
||||
"""
|
||||
Generate 2 preprocessing variants for OCR (fast mode).
|
||||
Returns: [light_processed, heavy_processed]
|
||||
"""
|
||||
return [
|
||||
self.preprocess_light(image),
|
||||
self.preprocess_heavy(image),
|
||||
]
|
||||
|
||||
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Correct image rotation/skew using Hough lines.
|
||||
|
||||
Uses expanded canvas to preserve all content during rotation,
|
||||
preventing left/right margin truncation.
|
||||
"""
|
||||
edges = cv2.Canny(image, 50, 150, apertureSize=3)
|
||||
lines = cv2.HoughLinesP(
|
||||
edges, 1, np.pi / 180,
|
||||
threshold=100, minLineLength=100, maxLineGap=10
|
||||
)
|
||||
|
||||
if lines is None:
|
||||
return image
|
||||
|
||||
angles = []
|
||||
for line in lines:
|
||||
x1, y1, x2, y2 = line[0]
|
||||
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
|
||||
if abs(angle) < 45:
|
||||
angles.append(angle)
|
||||
|
||||
if not angles:
|
||||
return image
|
||||
|
||||
median_angle = np.median(angles)
|
||||
if abs(median_angle) < 0.5:
|
||||
return image
|
||||
|
||||
h, w = image.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
||||
|
||||
# Calculate new canvas size to fit entire rotated image (prevents edge truncation)
|
||||
cos_angle = abs(np.cos(np.radians(median_angle)))
|
||||
sin_angle = abs(np.sin(np.radians(median_angle)))
|
||||
new_w = int(h * sin_angle + w * cos_angle)
|
||||
new_h = int(h * cos_angle + w * sin_angle)
|
||||
|
||||
# Adjust rotation matrix for new canvas center
|
||||
M[0, 2] += (new_w - w) / 2
|
||||
M[1, 2] += (new_h - h) / 2
|
||||
|
||||
return cv2.warpAffine(
|
||||
image, M, (new_w, new_h),
|
||||
flags=cv2.INTER_CUBIC,
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=255 # White background (grayscale)
|
||||
)
|
||||
Reference in New Issue
Block a user