fix: md_to_pdf — parallel conversion, fix tabele largi, fix underscores

- Conversie paralelă cu ProcessPoolExecutor (4 workers default)
- Tabele: font 8.5pt, padding compact, word-break pentru text lung
- code-friendly extra previne pierderea _ din nume de fișiere
- find_files caută toate *.md din summaries/, nu doar MODUL*
- .gitignore: adaugă .claude/ (local state)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 15:34:12 +02:00
parent 2c5e140bd1
commit c2f37ddb8f
2 changed files with 37 additions and 12 deletions

3
.gitignore vendored
View File

@@ -33,5 +33,8 @@ __pycache__/
.venv/ .venv/
.venv_pdf/ .venv_pdf/
# Claude Code local state
.claude/
# Logs # Logs
*.log *.log

View File

@@ -5,6 +5,7 @@ import argparse
import glob import glob
import os import os
import sys import sys
from concurrent.futures import ProcessPoolExecutor, as_completed
import markdown2 import markdown2
from weasyprint import HTML from weasyprint import HTML
@@ -91,21 +92,25 @@ table {
width: 100%; width: 100%;
border-collapse: collapse; border-collapse: collapse;
margin: 0.5em 0; margin: 0.5em 0;
font-size: 10pt; font-size: 8.5pt;
page-break-inside: avoid; page-break-inside: avoid;
table-layout: fixed;
word-wrap: break-word;
overflow-wrap: break-word;
} }
th { th {
background-color: #e8e8e8; background-color: #e8e8e8;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
padding: 5pt 8pt; padding: 3pt 4pt;
border: 0.5pt solid #bbb; border: 0.5pt solid #bbb;
} }
td { td {
padding: 4pt 8pt; padding: 3pt 4pt;
border: 0.5pt solid #ccc; border: 0.5pt solid #ccc;
word-break: break-all;
} }
tr:nth-child(even) td { tr:nth-child(even) td {
@@ -147,14 +152,15 @@ blockquote {
""" """
def md_to_pdf(md_path, pdf_path): def convert_one(args):
"""Convert a single Markdown file to PDF.""" """Convert a single Markdown file to PDF. Designed for parallel execution."""
md_path, pdf_path = args
with open(md_path, "r", encoding="utf-8") as f: with open(md_path, "r", encoding="utf-8") as f:
md_text = f.read() md_text = f.read()
html_body = markdown2.markdown( html_body = markdown2.markdown(
md_text, md_text,
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline"], extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline", "code-friendly"],
) )
html_doc = f"""<!DOCTYPE html> html_doc = f"""<!DOCTYPE html>
@@ -169,19 +175,20 @@ def md_to_pdf(md_path, pdf_path):
</html>""" </html>"""
HTML(string=html_doc).write_pdf(pdf_path) HTML(string=html_doc).write_pdf(pdf_path)
print(f" {os.path.basename(md_path)} -> {os.path.basename(pdf_path)}") return os.path.basename(md_path), os.path.basename(pdf_path)
def find_files(modules=None): def find_files(modules=None):
"""Find MODUL*_*.md files, optionally filtered by module numbers.""" """Find all .md files in summaries/, optionally filtered by module numbers."""
pattern = os.path.join(SUMMARIES_DIR, "MODUL*_*.md") pattern = os.path.join(SUMMARIES_DIR, "*.md")
files = sorted(glob.glob(pattern)) files = sorted(glob.glob(pattern))
if modules: if modules:
filtered = [] filtered = []
for f in files: for f in files:
basename = os.path.basename(f) basename = os.path.basename(f)
# Extract module number from MODUL{N}_... if not basename.startswith("MODUL"):
continue
try: try:
num = int(basename.split("_")[0].replace("MODUL", "")) num = int(basename.split("_")[0].replace("MODUL", ""))
if num in modules: if num in modules:
@@ -212,6 +219,9 @@ def main():
parser.add_argument( parser.add_argument(
"--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'" "--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'"
) )
parser.add_argument(
"--workers", "-w", type=int, default=4, help="Parallel workers (default: 4)"
)
args = parser.parse_args() args = parser.parse_args()
os.makedirs(PDF_DIR, exist_ok=True) os.makedirs(PDF_DIR, exist_ok=True)
@@ -226,11 +236,23 @@ def main():
print("No MD files found to convert.") print("No MD files found to convert.")
sys.exit(1) sys.exit(1)
print(f"Converting {len(md_files)} file(s) to PDF...") jobs = []
for md_path in md_files: for md_path in md_files:
basename = os.path.splitext(os.path.basename(md_path))[0] basename = os.path.splitext(os.path.basename(md_path))[0]
pdf_path = os.path.join(PDF_DIR, basename + ".pdf") pdf_path = os.path.join(PDF_DIR, basename + ".pdf")
md_to_pdf(md_path, pdf_path) jobs.append((md_path, pdf_path))
print(f"Converting {len(jobs)} file(s) to PDF with {args.workers} workers...")
with ProcessPoolExecutor(max_workers=args.workers) as pool:
futures = {pool.submit(convert_one, job): job for job in jobs}
for future in as_completed(futures):
try:
src, dst = future.result()
print(f" {src} -> {dst}")
except Exception as e:
md_path = futures[future][0]
print(f" ERROR {os.path.basename(md_path)}: {e}", file=sys.stderr)
print(f"Done. PDFs saved to {PDF_DIR}") print(f"Done. PDFs saved to {PDF_DIR}")