From c2f37ddb8fc8094b792b354cde5237e2bf76322c Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Wed, 25 Mar 2026 15:34:12 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20md=5Fto=5Fpdf=20=E2=80=94=20parallel=20c?= =?UTF-8?q?onversion,=20fix=20tabele=20largi,=20fix=20underscores?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Conversie paralelă cu ProcessPoolExecutor (4 workers default) - Tabele: font 8.5pt, padding compact, word-break pentru text lung - code-friendly extra previne pierderea _ din nume de fișiere - find_files caută toate *.md din summaries/, nu doar MODUL* - .gitignore: adaugă .claude/ (local state) Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 3 +++ md_to_pdf.py | 46 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index bedd8e6..6bcabcc 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,8 @@ __pycache__/ .venv/ .venv_pdf/ +# Claude Code local state +.claude/ + # Logs *.log diff --git a/md_to_pdf.py b/md_to_pdf.py index 926572e..f3390e9 100644 --- a/md_to_pdf.py +++ b/md_to_pdf.py @@ -5,6 +5,7 @@ import argparse import glob import os import sys +from concurrent.futures import ProcessPoolExecutor, as_completed import markdown2 from weasyprint import HTML @@ -91,21 +92,25 @@ table { width: 100%; border-collapse: collapse; margin: 0.5em 0; - font-size: 10pt; + font-size: 8.5pt; page-break-inside: avoid; + table-layout: fixed; + word-wrap: break-word; + overflow-wrap: break-word; } th { background-color: #e8e8e8; font-weight: bold; text-align: left; - padding: 5pt 8pt; + padding: 3pt 4pt; border: 0.5pt solid #bbb; } td { - padding: 4pt 8pt; + padding: 3pt 4pt; border: 0.5pt solid #ccc; + word-break: break-all; } tr:nth-child(even) td { @@ -147,14 +152,15 @@ blockquote { """ -def md_to_pdf(md_path, pdf_path): - """Convert a single Markdown file to PDF.""" +def convert_one(args): + """Convert a single Markdown file to PDF. Designed for parallel execution.""" + md_path, pdf_path = args with open(md_path, "r", encoding="utf-8") as f: md_text = f.read() html_body = markdown2.markdown( md_text, - extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline"], + extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline", "code-friendly"], ) html_doc = f""" @@ -169,19 +175,20 @@ def md_to_pdf(md_path, pdf_path): """ HTML(string=html_doc).write_pdf(pdf_path) - print(f" {os.path.basename(md_path)} -> {os.path.basename(pdf_path)}") + return os.path.basename(md_path), os.path.basename(pdf_path) def find_files(modules=None): - """Find MODUL*_*.md files, optionally filtered by module numbers.""" - pattern = os.path.join(SUMMARIES_DIR, "MODUL*_*.md") + """Find all .md files in summaries/, optionally filtered by module numbers.""" + pattern = os.path.join(SUMMARIES_DIR, "*.md") files = sorted(glob.glob(pattern)) if modules: filtered = [] for f in files: basename = os.path.basename(f) - # Extract module number from MODUL{N}_... + if not basename.startswith("MODUL"): + continue try: num = int(basename.split("_")[0].replace("MODUL", "")) if num in modules: @@ -212,6 +219,9 @@ def main(): parser.add_argument( "--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'" ) + parser.add_argument( + "--workers", "-w", type=int, default=4, help="Parallel workers (default: 4)" + ) args = parser.parse_args() os.makedirs(PDF_DIR, exist_ok=True) @@ -226,11 +236,23 @@ def main(): print("No MD files found to convert.") sys.exit(1) - print(f"Converting {len(md_files)} file(s) to PDF...") + jobs = [] for md_path in md_files: basename = os.path.splitext(os.path.basename(md_path))[0] pdf_path = os.path.join(PDF_DIR, basename + ".pdf") - md_to_pdf(md_path, pdf_path) + jobs.append((md_path, pdf_path)) + + print(f"Converting {len(jobs)} file(s) to PDF with {args.workers} workers...") + + with ProcessPoolExecutor(max_workers=args.workers) as pool: + futures = {pool.submit(convert_one, job): job for job in jobs} + for future in as_completed(futures): + try: + src, dst = future.result() + print(f" {src} -> {dst}") + except Exception as e: + md_path = futures[future][0] + print(f" ERROR {os.path.basename(md_path)}: {e}", file=sys.stderr) print(f"Done. PDFs saved to {PDF_DIR}")