fix: md_to_pdf — parallel conversion, fix tabele largi, fix underscores
- Conversie paralelă cu ProcessPoolExecutor (4 workers default) - Tabele: font 8.5pt, padding compact, word-break pentru text lung - code-friendly extra previne pierderea _ din nume de fișiere - find_files caută toate *.md din summaries/, nu doar MODUL* - .gitignore: adaugă .claude/ (local state) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -33,5 +33,8 @@ __pycache__/
|
|||||||
.venv/
|
.venv/
|
||||||
.venv_pdf/
|
.venv_pdf/
|
||||||
|
|
||||||
|
# Claude Code local state
|
||||||
|
.claude/
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
|
|||||||
46
md_to_pdf.py
46
md_to_pdf.py
@@ -5,6 +5,7 @@ import argparse
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||||
|
|
||||||
import markdown2
|
import markdown2
|
||||||
from weasyprint import HTML
|
from weasyprint import HTML
|
||||||
@@ -91,21 +92,25 @@ table {
|
|||||||
width: 100%;
|
width: 100%;
|
||||||
border-collapse: collapse;
|
border-collapse: collapse;
|
||||||
margin: 0.5em 0;
|
margin: 0.5em 0;
|
||||||
font-size: 10pt;
|
font-size: 8.5pt;
|
||||||
page-break-inside: avoid;
|
page-break-inside: avoid;
|
||||||
|
table-layout: fixed;
|
||||||
|
word-wrap: break-word;
|
||||||
|
overflow-wrap: break-word;
|
||||||
}
|
}
|
||||||
|
|
||||||
th {
|
th {
|
||||||
background-color: #e8e8e8;
|
background-color: #e8e8e8;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
text-align: left;
|
text-align: left;
|
||||||
padding: 5pt 8pt;
|
padding: 3pt 4pt;
|
||||||
border: 0.5pt solid #bbb;
|
border: 0.5pt solid #bbb;
|
||||||
}
|
}
|
||||||
|
|
||||||
td {
|
td {
|
||||||
padding: 4pt 8pt;
|
padding: 3pt 4pt;
|
||||||
border: 0.5pt solid #ccc;
|
border: 0.5pt solid #ccc;
|
||||||
|
word-break: break-all;
|
||||||
}
|
}
|
||||||
|
|
||||||
tr:nth-child(even) td {
|
tr:nth-child(even) td {
|
||||||
@@ -147,14 +152,15 @@ blockquote {
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def md_to_pdf(md_path, pdf_path):
|
def convert_one(args):
|
||||||
"""Convert a single Markdown file to PDF."""
|
"""Convert a single Markdown file to PDF. Designed for parallel execution."""
|
||||||
|
md_path, pdf_path = args
|
||||||
with open(md_path, "r", encoding="utf-8") as f:
|
with open(md_path, "r", encoding="utf-8") as f:
|
||||||
md_text = f.read()
|
md_text = f.read()
|
||||||
|
|
||||||
html_body = markdown2.markdown(
|
html_body = markdown2.markdown(
|
||||||
md_text,
|
md_text,
|
||||||
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline"],
|
extras=["tables", "fenced-code-blocks", "header-ids", "break-on-newline", "code-friendly"],
|
||||||
)
|
)
|
||||||
|
|
||||||
html_doc = f"""<!DOCTYPE html>
|
html_doc = f"""<!DOCTYPE html>
|
||||||
@@ -169,19 +175,20 @@ def md_to_pdf(md_path, pdf_path):
|
|||||||
</html>"""
|
</html>"""
|
||||||
|
|
||||||
HTML(string=html_doc).write_pdf(pdf_path)
|
HTML(string=html_doc).write_pdf(pdf_path)
|
||||||
print(f" {os.path.basename(md_path)} -> {os.path.basename(pdf_path)}")
|
return os.path.basename(md_path), os.path.basename(pdf_path)
|
||||||
|
|
||||||
|
|
||||||
def find_files(modules=None):
|
def find_files(modules=None):
|
||||||
"""Find MODUL*_*.md files, optionally filtered by module numbers."""
|
"""Find all .md files in summaries/, optionally filtered by module numbers."""
|
||||||
pattern = os.path.join(SUMMARIES_DIR, "MODUL*_*.md")
|
pattern = os.path.join(SUMMARIES_DIR, "*.md")
|
||||||
files = sorted(glob.glob(pattern))
|
files = sorted(glob.glob(pattern))
|
||||||
|
|
||||||
if modules:
|
if modules:
|
||||||
filtered = []
|
filtered = []
|
||||||
for f in files:
|
for f in files:
|
||||||
basename = os.path.basename(f)
|
basename = os.path.basename(f)
|
||||||
# Extract module number from MODUL{N}_...
|
if not basename.startswith("MODUL"):
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
num = int(basename.split("_")[0].replace("MODUL", ""))
|
num = int(basename.split("_")[0].replace("MODUL", ""))
|
||||||
if num in modules:
|
if num in modules:
|
||||||
@@ -212,6 +219,9 @@ def main():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'"
|
"--modules", "-m", help="Module filter, e.g. '1-3' or '2,4,5'"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--workers", "-w", type=int, default=4, help="Parallel workers (default: 4)"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
os.makedirs(PDF_DIR, exist_ok=True)
|
os.makedirs(PDF_DIR, exist_ok=True)
|
||||||
@@ -226,11 +236,23 @@ def main():
|
|||||||
print("No MD files found to convert.")
|
print("No MD files found to convert.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"Converting {len(md_files)} file(s) to PDF...")
|
jobs = []
|
||||||
for md_path in md_files:
|
for md_path in md_files:
|
||||||
basename = os.path.splitext(os.path.basename(md_path))[0]
|
basename = os.path.splitext(os.path.basename(md_path))[0]
|
||||||
pdf_path = os.path.join(PDF_DIR, basename + ".pdf")
|
pdf_path = os.path.join(PDF_DIR, basename + ".pdf")
|
||||||
md_to_pdf(md_path, pdf_path)
|
jobs.append((md_path, pdf_path))
|
||||||
|
|
||||||
|
print(f"Converting {len(jobs)} file(s) to PDF with {args.workers} workers...")
|
||||||
|
|
||||||
|
with ProcessPoolExecutor(max_workers=args.workers) as pool:
|
||||||
|
futures = {pool.submit(convert_one, job): job for job in jobs}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
try:
|
||||||
|
src, dst = future.result()
|
||||||
|
print(f" {src} -> {dst}")
|
||||||
|
except Exception as e:
|
||||||
|
md_path = futures[future][0]
|
||||||
|
print(f" ERROR {os.path.basename(md_path)}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
print(f"Done. PDFs saved to {PDF_DIR}")
|
print(f"Done. PDFs saved to {PDF_DIR}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user