fix: md_to_pdf — parallel conversion, fix tabele largi, fix underscores
- Conversie paralelă cu ProcessPoolExecutor (4 workers default) - Tabele: font 8.5pt, padding compact, word-break pentru text lung - code-friendly extra previne pierderea _ din nume de fișiere - find_files caută toate *.md din summaries/, nu doar MODUL* - .gitignore: adaugă .claude/ (local state) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -33,5 +33,8 @@ __pycache__/
|
||||
.venv/
|
||||
.venv_pdf/
|
||||
|
||||
# Claude Code local state
|
||||
.claude/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
46
md_to_pdf.py
46
md_to_pdf.py
@@ -5,6 +5,7 @@ import argparse
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
|
||||
import markdown2
|
||||
from weasyprint import HTML
|
||||
@@ -91,21 +92,25 @@ table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: 0.5em 0;
|
||||
font-size: 10pt;
|
||||
font-size: 8.5pt;
|
||||
page-break-inside: avoid;
|
||||
table-layout: fixed;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #e8e8e8;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
padding: 5pt 8pt;
|
||||
padding: 3pt 4pt;
|
||||
border: 0.5pt solid #bbb;
|
||||
}
|
||||
|
||||
td {
|
||||
padding: 4pt 8pt;
|
||||
padding: 3pt 4pt;
|
||||
border: 0.5pt solid #ccc;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
tr:nth-child(even) td {
|
||||
@@ -147,14 +152,15 @@ blockquote {
|
||||
"""
|
||||
|
||||
|
||||
def md_to_pdf(md_path, pdf_path):
|
||||
"""Convert a single Markdown file to PDF."""
|
||||
def convert_one(args):
|
||||
"""Convert a single Markdown file to PDF. Designed for parallel execution."""
|
||||
md_path, pdf_path = args
|
||||
with open(md_path, "r", encoding="utf-8") as f:
|
||||
md_text = f.read()
|
||||
|
||||
html_body = markdown2.markdown(
|
||||
md_text,
|
||||
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline"],
|
||||
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline", "code-friendly"],
|
||||
)
|
||||
|
||||
html_doc = f"""<!DOCTYPE html>
|
||||
@@ -169,19 +175,20 @@ def md_to_pdf(md_path, pdf_path):
|
||||
</html>"""
|
||||
|
||||
HTML(string=html_doc).write_pdf(pdf_path)
|
||||
print(f" {os.path.basename(md_path)} -> {os.path.basename(pdf_path)}")
|
||||
return os.path.basename(md_path), os.path.basename(pdf_path)
|
||||
|
||||
|
||||
def find_files(modules=None):
|
||||
"""Find MODUL*_*.md files, optionally filtered by module numbers."""
|
||||
pattern = os.path.join(SUMMARIES_DIR, "MODUL*_*.md")
|
||||
"""Find all .md files in summaries/, optionally filtered by module numbers."""
|
||||
pattern = os.path.join(SUMMARIES_DIR, "*.md")
|
||||
files = sorted(glob.glob(pattern))
|
||||
|
||||
if modules:
|
||||
filtered = []
|
||||
for f in files:
|
||||
basename = os.path.basename(f)
|
||||
# Extract module number from MODUL{N}_...
|
||||
if not basename.startswith("MODUL"):
|
||||
continue
|
||||
try:
|
||||
num = int(basename.split("_")[0].replace("MODUL", ""))
|
||||
if num in modules:
|
||||
@@ -212,6 +219,9 @@ def main():
|
||||
parser.add_argument(
|
||||
"--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers", "-w", type=int, default=4, help="Parallel workers (default: 4)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(PDF_DIR, exist_ok=True)
|
||||
@@ -226,11 +236,23 @@ def main():
|
||||
print("No MD files found to convert.")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Converting {len(md_files)} file(s) to PDF...")
|
||||
jobs = []
|
||||
for md_path in md_files:
|
||||
basename = os.path.splitext(os.path.basename(md_path))[0]
|
||||
pdf_path = os.path.join(PDF_DIR, basename + ".pdf")
|
||||
md_to_pdf(md_path, pdf_path)
|
||||
jobs.append((md_path, pdf_path))
|
||||
|
||||
print(f"Converting {len(jobs)} file(s) to PDF with {args.workers} workers...")
|
||||
|
||||
with ProcessPoolExecutor(max_workers=args.workers) as pool:
|
||||
futures = {pool.submit(convert_one, job): job for job in jobs}
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
src, dst = future.result()
|
||||
print(f" {src} -> {dst}")
|
||||
except Exception as e:
|
||||
md_path = futures[future][0]
|
||||
print(f" ERROR {os.path.basename(md_path)}: {e}", file=sys.stderr)
|
||||
|
||||
print(f"Done. PDFs saved to {PDF_DIR}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user