From 51af0918a43732c251c521c0778e8320097e4240 Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Wed, 22 Apr 2026 07:50:40 +0000 Subject: [PATCH] feat(email): send attachments as WhatsApp documents, fix forward sender - Add /send-document endpoint to WhatsApp bridge (base64 document send) - save_email_as_note() now saves attachment files to disk alongside note - email_digest: extract original sender for Fwd: emails so header shows the real author, not the forwarder; send attachment files after summary - email_forward: send attachment files as documents after text parts - Add extract_original_sender() and save_email_attachment_files() helpers Co-Authored-By: Claude Sonnet 4.6 --- bridge/whatsapp/index.js | 23 +++++++++++++++++++ tools/email_digest.py | 44 +++++++++++++++++++++++++++++------- tools/email_forward.py | 40 +++++++++++++++++++++++++++++++- tools/email_process.py | 49 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 145 insertions(+), 11 deletions(-) diff --git a/bridge/whatsapp/index.js b/bridge/whatsapp/index.js index b815914..af17e71 100644 --- a/bridge/whatsapp/index.js +++ b/bridge/whatsapp/index.js @@ -187,6 +187,29 @@ app.post('/send', async (req, res) => { } }); +app.post('/send-document', async (req, res) => { + const { to, filename, mimetype, data_base64, caption } = req.body || {}; + if (!to || !filename || !data_base64) { + return res.status(400).json({ ok: false, error: 'missing "to", "filename", or "data_base64"' }); + } + if (!connected || !sock) { + return res.status(503).json({ ok: false, error: 'not connected to WhatsApp' }); + } + try { + const buffer = Buffer.from(data_base64, 'base64'); + const result = await sock.sendMessage(to, { + document: buffer, + fileName: filename, + mimetype: mimetype || 'application/octet-stream', + caption: caption || '', + }); + res.json({ ok: true, id: result.key.id }); + } catch (err) { + console.error('[whatsapp] Send document failed:', err.message); + res.status(500).json({ ok: false, error: err.message }); + } +}); + app.post('/react', async (req, res) => { const { to, id, emoji, fromMe, participant } = req.body || {}; diff --git a/tools/email_digest.py b/tools/email_digest.py index f866b8b..c212777 100644 --- a/tools/email_digest.py +++ b/tools/email_digest.py @@ -8,6 +8,7 @@ Usage: """ import sys +import base64 import subprocess import requests from pathlib import Path @@ -15,7 +16,7 @@ from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT)) -from tools.email_process import save_unread_emails +from tools.email_process import save_unread_emails, extract_original_sender from src.config import Config BRIDGE_URL = "http://127.0.0.1:8098" @@ -35,6 +36,8 @@ def generate_summary(filepath: str, subject: str, from_full: str, date: str) -> except Exception as e: return f"[Eroare la citirea fișierului: {e}]" + display_from = extract_original_sender(subject, email_content, from_full) + prompt = f"""Mai jos este conținutul unui email. Scrie un rezumat factual pentru WhatsApp. EMAIL: @@ -43,18 +46,14 @@ EMAIL: Instrucțiuni: - Începe cu header-ul fix (fără modificări): SUBIECT: {subject} - De la: {from_full} + De la: {display_from} Primit: {date} --- -- Dacă emailul este un forward (subiect începe cu Fwd:/Fw: sau conține "---------- Forwarded message"): - * Ignoră complet persoana care a forwardat. Nu o menționez în rezumat. - * Identifică expeditorul original din corpul emailului (câmpurile From/De la din headerul forwarded). - * Rezumatul trebuie să fie despre mesajul original, ca și cum ar fi fost primit direct de la acel expeditor. +- Ignoră complet orice persoană care a forwardat emailul. Nu o menționă în rezumat. - Scrie rezumatul în stil briefing: factual, clar, persoana a 3-a. * Prima propoziție: cine a trimis mesajul original, ce, cui. - * Ce conține mesajul — concret și direct. Omite politețuri, scuze și amabilități; include doar faptele. + * Ce conține mesajul — concret și direct. Omite politețuri și amabilități; include doar faptele. * Dacă există termene, date, locuri sau acțiuni cerute — menționează-le explicit. - * Dacă există atașamente — listează-le la final: "Atașat: ..." * Dacă există linkuri acționabile (formulare, documente), adaugă o secțiune LINKURI la final. - Nu adăuga secțiuni goale sau care nu se aplică emailului. - Plain text, fără markdown. Fără emoji. @@ -89,6 +88,24 @@ def send_whatsapp(to: str, text: str) -> bool: return False +def send_whatsapp_document(to: str, filepath: str) -> bool: + """Trimite un fișier ca document WhatsApp prin bridge.""" + try: + path = Path(filepath) + data_b64 = base64.b64encode(path.read_bytes()).decode() + import mimetypes + mimetype = mimetypes.guess_type(path.name)[0] or "application/octet-stream" + resp = requests.post( + f"{BRIDGE_URL}/send-document", + json={"to": to, "filename": path.name, "mimetype": mimetype, "data_base64": data_b64}, + timeout=30, + ) + return resp.json().get("ok", False) + except Exception as e: + print(f"[eroare send-document] {e}", file=sys.stderr) + return False + + def run_digest(): print("📬 Verific emailuri necitite...") saved = save_unread_emails() @@ -110,6 +127,7 @@ def run_digest(): subject = result["subject"] from_full = result.get("from_full", result.get("from", "")) date = result.get("date", "") + attachment_paths = result.get("attachment_paths", []) print(f"📧 Procesez: {subject}") summary = generate_summary(filepath, subject, from_full, date) @@ -117,6 +135,8 @@ def run_digest(): if DRY_RUN: print("\n--- REZUMAT (dry-run) ---") print(summary) + if attachment_paths: + print(f"Atașamente: {attachment_paths}") print("------------------------\n") else: ok = send_whatsapp(owner_jid, summary) @@ -125,6 +145,14 @@ def run_digest(): else: print(f"❌ Trimitere eșuată: {subject}") + for att_path in attachment_paths: + ok_att = send_whatsapp_document(owner_jid, att_path) + name = Path(att_path).name + if ok_att: + print(f"✅ Atașament trimis: {name}") + else: + print(f"❌ Atașament eșuat: {name}") + if __name__ == "__main__": run_digest() diff --git a/tools/email_forward.py b/tools/email_forward.py index c3c44b4..f8f290c 100644 --- a/tools/email_forward.py +++ b/tools/email_forward.py @@ -9,6 +9,8 @@ Usage: import sys import re +import base64 +import mimetypes import requests from pathlib import Path @@ -115,6 +117,21 @@ def send_whatsapp(to: str, text: str) -> bool: return False +def send_whatsapp_document(to: str, filename: str, data: bytes) -> bool: + try: + mimetype = mimetypes.guess_type(filename)[0] or "application/octet-stream" + resp = requests.post( + f"{BRIDGE_URL}/send-document", + json={"to": to, "filename": filename, "mimetype": mimetype, + "data_base64": base64.b64encode(data).decode()}, + timeout=30, + ) + return resp.json().get("ok", False) + except Exception as e: + print(f"[eroare send-document] {e}", file=sys.stderr) + return False + + def fetch_unread_emails(): """Preia emailurile necitite din inbox fără a le salva sau marca ca citite.""" mail = imaplib.IMAP4_SSL(IMAP_SERVER, IMAP_PORT) @@ -138,12 +155,24 @@ def fetch_unread_emails(): if sender_email not in WHITELIST: continue + # Extract attachment data (name → bytes) + att_data = {} + if msg.is_multipart(): + for part in msg.walk(): + fname = part.get_filename() + if fname: + fname = decode_mime_header(fname) + payload = part.get_payload(decode=True) + if payload: + att_data[fname] = payload + results.append({ 'subject': decode_mime_header(msg['Subject']), 'from_full': from_addr, 'date': msg['Date'], 'body': get_email_body(msg), - 'attachments': get_email_attachments(msg), + 'attachments': list(att_data.keys()), + 'attachment_data': att_data, }) mail.logout() @@ -172,6 +201,8 @@ def run_forward(): print(f"\n--- FORWARD {i+1}/{len(parts)} (dry-run) ---") print(part) print("------------------------\n") + if em.get('attachment_data'): + print(f"Atașamente: {list(em['attachment_data'].keys())}") else: for part in parts: ok = send_whatsapp(owner_jid, part) @@ -181,6 +212,13 @@ def run_forward(): else: print(f"Trimis pe WhatsApp ({len(parts)} mesaje): {subject}") + for fname, fdata in em.get('attachment_data', {}).items(): + ok_att = send_whatsapp_document(owner_jid, fname, fdata) + if ok_att: + print(f"Atașament trimis: {fname}") + else: + print(f"Atașament eșuat: {fname}") + if __name__ == "__main__": run_forward() diff --git a/tools/email_process.py b/tools/email_process.py index d8757b0..18f76ca 100755 --- a/tools/email_process.py +++ b/tools/email_process.py @@ -93,6 +93,46 @@ def get_email_attachments(msg) -> list: attachments.append(f"[{part.get_content_type()}]") return attachments +def save_email_attachment_files(msg, dest_dir: Path) -> list: + """Save attachment files from email to dest_dir. Returns list of saved file paths.""" + saved = [] + if not msg.is_multipart(): + return saved + dest_dir.mkdir(parents=True, exist_ok=True) + for part in msg.walk(): + filename = part.get_filename() + if not filename: + continue + filename = decode_mime_header(filename) + payload = part.get_payload(decode=True) + if payload is None: + continue + dest = dest_dir / filename + # Avoid overwriting — append counter if needed + counter = 1 + while dest.exists(): + stem, suffix = Path(filename).stem, Path(filename).suffix + dest = dest_dir / f"{stem}_{counter}{suffix}" + counter += 1 + dest.write_bytes(payload) + saved.append(dest) + return saved + +def extract_original_sender(subject: str, body_content: str, from_full: str) -> str: + """If email is a forward, extract original sender from body.""" + if not re.match(r'^(fwd?|fw)\s*[:\s]', subject, re.IGNORECASE): + return from_full + match = re.search( + r'(?:De la|From):\s*(.+?)(?:\n|$)', + body_content, re.IGNORECASE | re.MULTILINE + ) + if match: + candidate = match.group(1).strip() + # Skip blank or markdown artifacts + if candidate and not candidate.startswith('**') and '@' in candidate or len(candidate) > 3: + return candidate + return from_full + def extract_sender_email(from_header: str) -> str: """Extract just the email address from From header""" match = re.search(r'<([^>]+)>', from_header) @@ -204,11 +244,15 @@ def save_email_as_note(eid: str) -> dict: KB_PATH.mkdir(parents=True, exist_ok=True) filepath.write_text(content, encoding='utf-8') - + + # Save attachment files next to the note + att_dir = KB_PATH / f"{date_prefix}_{slug}_attachments" + attachment_paths = save_email_attachment_files(msg, att_dir) + # Mark as seen mail.store(eid.encode(), '+FLAGS', '\\Seen') mail.logout() - + return { 'ok': True, 'file': str(filepath), @@ -216,6 +260,7 @@ def save_email_as_note(eid: str) -> dict: 'from': sender_email, 'from_full': from_addr, 'date': date_str, + 'attachment_paths': [str(p) for p in attachment_paths], } def save_unread_emails():