fix: md_to_pdf — parallel conversion, fix tabele largi, fix underscores

- Conversie paralelă cu ProcessPoolExecutor (4 workers default)
- Tabele: font 8.5pt, padding compact, word-break pentru text lung
- code-friendly extra previne pierderea _ din nume de fișiere
- find_files caută toate *.md din summaries/, nu doar MODUL*
- .gitignore: adaugă .claude/ (local state)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 15:34:12 +02:00
parent 2c5e140bd1
commit c2f37ddb8f
2 changed files with 37 additions and 12 deletions

3
.gitignore vendored
View File

@@ -33,5 +33,8 @@ __pycache__/
.venv/
.venv_pdf/
# Claude Code local state
.claude/
# Logs
*.log

View File

@@ -5,6 +5,7 @@ import argparse
import glob
import os
import sys
from concurrent.futures import ProcessPoolExecutor, as_completed
import markdown2
from weasyprint import HTML
@@ -91,21 +92,25 @@ table {
width: 100%;
border-collapse: collapse;
margin: 0.5em 0;
font-size: 10pt;
font-size: 8.5pt;
page-break-inside: avoid;
table-layout: fixed;
word-wrap: break-word;
overflow-wrap: break-word;
}
th {
background-color: #e8e8e8;
font-weight: bold;
text-align: left;
padding: 5pt 8pt;
padding: 3pt 4pt;
border: 0.5pt solid #bbb;
}
td {
padding: 4pt 8pt;
padding: 3pt 4pt;
border: 0.5pt solid #ccc;
word-break: break-all;
}
tr:nth-child(even) td {
@@ -147,14 +152,15 @@ blockquote {
"""
def md_to_pdf(md_path, pdf_path):
"""Convert a single Markdown file to PDF."""
def convert_one(args):
"""Convert a single Markdown file to PDF. Designed for parallel execution."""
md_path, pdf_path = args
with open(md_path, "r", encoding="utf-8") as f:
md_text = f.read()
html_body = markdown2.markdown(
md_text,
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline"],
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline", "code-friendly"],
)
html_doc = f"""<!DOCTYPE html>
@@ -169,19 +175,20 @@ def md_to_pdf(md_path, pdf_path):
</html>"""
HTML(string=html_doc).write_pdf(pdf_path)
print(f" {os.path.basename(md_path)} -> {os.path.basename(pdf_path)}")
return os.path.basename(md_path), os.path.basename(pdf_path)
def find_files(modules=None):
"""Find MODUL*_*.md files, optionally filtered by module numbers."""
pattern = os.path.join(SUMMARIES_DIR, "MODUL*_*.md")
"""Find all .md files in summaries/, optionally filtered by module numbers."""
pattern = os.path.join(SUMMARIES_DIR, "*.md")
files = sorted(glob.glob(pattern))
if modules:
filtered = []
for f in files:
basename = os.path.basename(f)
# Extract module number from MODUL{N}_...
if not basename.startswith("MODUL"):
continue
try:
num = int(basename.split("_")[0].replace("MODUL", ""))
if num in modules:
@@ -212,6 +219,9 @@ def main():
parser.add_argument(
"--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'"
)
parser.add_argument(
"--workers", "-w", type=int, default=4, help="Parallel workers (default: 4)"
)
args = parser.parse_args()
os.makedirs(PDF_DIR, exist_ok=True)
@@ -226,11 +236,23 @@ def main():
print("No MD files found to convert.")
sys.exit(1)
print(f"Converting {len(md_files)} file(s) to PDF...")
jobs = []
for md_path in md_files:
basename = os.path.splitext(os.path.basename(md_path))[0]
pdf_path = os.path.join(PDF_DIR, basename + ".pdf")
md_to_pdf(md_path, pdf_path)
jobs.append((md_path, pdf_path))
print(f"Converting {len(jobs)} file(s) to PDF with {args.workers} workers...")
with ProcessPoolExecutor(max_workers=args.workers) as pool:
futures = {pool.submit(convert_one, job): job for job in jobs}
for future in as_completed(futures):
try:
src, dst = future.result()
print(f" {src} -> {dst}")
except Exception as e:
md_path = futures[future][0]
print(f" ERROR {os.path.basename(md_path)}: {e}", file=sys.stderr)
print(f"Done. PDFs saved to {PDF_DIR}")