fix(ocr,gitignore): fix charmap encoding in OCR workers and ignore SQLite WAL files
- Fix UnicodeEncodeError in OCR worker subprocesses on Windows: spawned processes with 'spawn' context inherit charmap (CP1252) encoding, causing crashes when printing Romanian diacritics (ș, ț, ă) from OCR-extracted text. Fix: reconfigure sys.stdout/sys.stderr to UTF-8 in _worker_initializer(). - Add *.db-wal and *.db-shm to .gitignore for SQLite WAL mode auxiliary files. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -519,6 +519,8 @@ secrets-backup/**/.env.*
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Backend unified data directories (cache, receipts, telegram)
|
# Backend unified data directories (cache, receipts, telegram)
|
||||||
backend/data/cache/*.db
|
backend/data/cache/*.db
|
||||||
|
backend/data/cache/*.db-wal
|
||||||
|
backend/data/cache/*.db-shm
|
||||||
backend/data/receipts/*.db
|
backend/data/receipts/*.db
|
||||||
backend/data/telegram/*.db
|
backend/data/telegram/*.db
|
||||||
backend/data/receipts/uploads/*
|
backend/data/receipts/uploads/*
|
||||||
|
|||||||
@@ -367,6 +367,17 @@ def _worker_initializer() -> None:
|
|||||||
|
|
||||||
Total warmup time = max(engine_times) instead of sum(engine_times).
|
Total warmup time = max(engine_times) instead of sum(engine_times).
|
||||||
"""
|
"""
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Fix stdout/stderr encoding in spawned worker process.
|
||||||
|
# On Windows, 'spawn' context creates a subprocess with charmap (CP1252) encoding.
|
||||||
|
# Romanian diacritics (ș, ț, ă, â, î) cannot be encoded in charmap, causing
|
||||||
|
# UnicodeEncodeError when print() is called with OCR-extracted text.
|
||||||
|
if sys.stdout and hasattr(sys.stdout, 'buffer'):
|
||||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||||||
|
if sys.stderr and hasattr(sys.stderr, 'buffer'):
|
||||||
|
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||||||
|
|
||||||
global _paddle_engine, _tesseract_engine, _doctr_engine, _worker_initialized
|
global _paddle_engine, _tesseract_engine, _doctr_engine, _worker_initialized
|
||||||
|
|
||||||
if _worker_initialized:
|
if _worker_initialized:
|
||||||
|
|||||||
Reference in New Issue
Block a user