#!/usr/bin/env python3 """ Generate PDF from markdown content. Outputs PDF to stdout as binary. Simple, robust approach focusing on text content. """ import sys import json from pathlib import Path # Read JSON from stdin input_data = json.load(sys.stdin) markdown_content = input_data.get('markdown', '') filename = input_data.get('filename', 'document.pdf') try: from fpdf import FPDF import re # Create PDF pdf = FPDF(format='A4') pdf.add_page() pdf.set_margins(12, 12, 12) # Try to use DejaVu font for Romanian support try: dejavu_path = Path("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") if dejavu_path.exists(): pdf.add_font("DejaVu", "", str(dejavu_path)) pdf.add_font("DejaVu", "B", "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf") oblique_path = Path("/usr/share/fonts/truetype/dejavu/DejaVuSans-Oblique.ttf") if oblique_path.exists(): pdf.add_font("DejaVu", "I", str(oblique_path)) bold_oblique = Path("/usr/share/fonts/truetype/dejavu/DejaVuSans-BoldOblique.ttf") if bold_oblique.exists(): pdf.add_font("DejaVu", "BI", str(bold_oblique)) pdf.set_font("DejaVu", "", 10) use_dejavu = True else: raise Exception("DejaVu font not found") except: pdf.set_font("Helvetica", "", 10) use_dejavu = False def render_rich_text(pdf, text, use_dejavu, size): """Render text with inline **bold** and *italic* formatting.""" font_name = "DejaVu" if use_dejavu else "Helvetica" # Remove links but keep text text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # Split on bold/italic markers, process segments # Pattern: **bold**, __bold__, *italic*, _italic_ parts = re.split(r'(\*\*.*?\*\*|__.*?__|\*.*?\*|_.*?_)', text) for part in parts: if not part: continue if part.startswith('**') and part.endswith('**'): pdf.set_font(font_name, "B", size) pdf.write(5, part[2:-2]) pdf.set_font(font_name, "", size) elif part.startswith('__') and part.endswith('__'): pdf.set_font(font_name, "B", size) pdf.write(5, part[2:-2]) pdf.set_font(font_name, "", size) elif part.startswith('*') and part.endswith('*') and len(part) > 2: try: pdf.set_font(font_name, "I", size) except: pass # italic not available, keep current font pdf.write(5, part[1:-1]) pdf.set_font(font_name, "", size) elif part.startswith('_') and part.endswith('_') and len(part) > 2: try: pdf.set_font(font_name, "I", size) except: pass pdf.write(5, part[1:-1]) pdf.set_font(font_name, "", size) else: pdf.write(5, part) # Parse markdown line by line lines = markdown_content.split('\n') i = 0 while i < len(lines): line = lines[i] # Skip empty lines but add spacing if not line.strip(): pdf.ln(2) i += 1 continue # H1 - Main heading if line.startswith('# '): pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "B", 16) text = line.replace('# ', '', 1).strip() pdf.multi_cell(0, 7, text, ln=True) pdf.ln(1) pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "", 10) # H2 - Section heading elif line.startswith('## '): pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "B", 12) text = line.replace('## ', '', 1).strip() pdf.multi_cell(0, 6, text, ln=True) pdf.ln(0.5) pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "", 10) # H3 - Subsection elif line.startswith('### '): pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "B", 11) text = line.replace('### ', '', 1).strip() pdf.multi_cell(0, 5, text, ln=True) pdf.ln(0.3) pdf.set_font("DejaVu" if use_dejavu else "Helvetica", "", 10) # Bullet point elif line.strip().startswith('- ') or line.strip().startswith('* '): text = line.strip().lstrip('-*').strip() pdf.write(5, '- ') render_rich_text(pdf, text, use_dejavu, 10) pdf.ln(5) # Numbered list elif re.match(r'^\s*(\d+\.)\s', line): m = re.match(r'^\s*(\d+\.)\s(.*)', line) prefix = m.group(1) + ' ' text = m.group(2) pdf.write(5, prefix) render_rich_text(pdf, text, use_dejavu, 10) pdf.ln(5) # Regular text with formatting else: text = line.strip() if text: render_rich_text(pdf, text, use_dejavu, 10) pdf.ln(5) i += 1 # Output PDF pdf_bytes = pdf.output() sys.stdout.buffer.write(pdf_bytes) sys.exit(0) except Exception as e: error_json = json.dumps({'error': str(e)}) sys.stderr.write(error_json) sys.exit(1)