Files
clawd/tools/lead-gen/find_leads.py
Echo ead8132d23 feat(A0): add minimal lead generation system
- tools/lead-gen/find_leads.py - searches for companies hiring accountants
- tools/lead-gen/templates/ - email templates for outreach
- Uses Brave Search API to find leads from job sites
- Outputs CSV for manual review and tracking

First run found 19 companies for potential outreach.
2026-02-04 07:55:55 +00:00

230 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""
Lead Generator Minimal - Găsește companii care au nevoie de soluții ERP/contabilitate.
Folosește Brave Search API pentru a găsi companii care angajează contabili/economiști.
Output: leads.csv cu companii pentru review manual
Usage:
python find_leads.py [--limit N]
Necesită: BRAVE_API_KEY în environment sau ~/.clawdbot/clawdbot.json
"""
import os
import re
import csv
import json
import argparse
from datetime import datetime
from pathlib import Path
import requests
OUTPUT_DIR = Path(__file__).parent / "output"
OUTPUT_DIR.mkdir(exist_ok=True)
def get_brave_api_key():
"""Get Brave API key from clawdbot config."""
config_path = Path.home() / ".clawdbot" / "clawdbot.json"
if config_path.exists():
with open(config_path) as f:
config = json.load(f)
# Try tools.web.search.apiKey (clawdbot format)
api_key = config.get("tools", {}).get("web", {}).get("search", {}).get("apiKey", "")
if api_key:
return api_key
# Fallback to brave.apiKey
return config.get("brave", {}).get("apiKey", "")
return os.getenv("BRAVE_API_KEY", "")
def search_brave(query, count=10):
"""Search using Brave Search API."""
api_key = get_brave_api_key()
if not api_key:
print("[!] Nu am găsit Brave API key")
return []
url = "https://api.search.brave.com/res/v1/web/search"
headers = {
"X-Subscription-Token": api_key,
"Accept": "application/json"
}
params = {
"q": query,
"count": count
}
try:
resp = requests.get(url, headers=headers, params=params, timeout=15)
data = resp.json()
return data.get("web", {}).get("results", [])
except Exception as e:
print(f"[!] Brave search error: {e}")
return []
def extract_companies_from_results(results):
"""Extract company names from search results."""
companies = []
# Patterns for Romanian companies
patterns = [
r'([A-Z][A-Z\s\-\.&]+(?:S\.R\.L\.|SRL|S\.A\.|SA|S\.C\.))', # COMPANY S.R.L.
r'(SC\s+[A-Z][A-Z\s\-\.&]+(?:S\.R\.L\.|SRL|S\.A\.|SA))', # SC COMPANY SRL
r'([A-Z][a-zA-Z\s\-\.&]{2,30}(?:S\.R\.L\.|SRL|S\.A\.|SA))', # Mixed case
]
for result in results:
text = f"{result.get('title', '')} {result.get('description', '')}"
for pattern in patterns:
matches = re.findall(pattern, text)
for match in matches:
company = match.strip()
# Clean up
company = re.sub(r'\s+', ' ', company)
if len(company) > 5 and len(company) < 80:
companies.append({
"company": company,
"source_url": result.get("url", ""),
"context": text[:200]
})
return companies
def clean_company_name(name):
"""Clean company name from prefix garbage."""
# Remove common prefixes that get caught by regex
prefixes_to_remove = [
r'^(?:Senior|Junior|Contabil|Economist|Director\s+Economic|Expert|Specialist)\s+',
r'^(?:RON|EUR|USD)\s+',
r'^(?:Bucuresti|Cluj|Iasi|Brasov|Constanta)\s+',
r'^\d+[\s\-]+',
]
result = name.strip()
for pattern in prefixes_to_remove:
result = re.sub(pattern, '', result, flags=re.IGNORECASE)
# Clean trailing garbage
result = re.sub(r'\s*-\s*$', '', result)
result = re.sub(r'\s+', ' ', result).strip()
return result
def deduplicate(leads):
"""Elimină duplicate după numele companiei."""
seen = set()
unique = []
for lead in leads:
# Clean company name
lead["company"] = clean_company_name(lead["company"])
# Normalize for comparison
company_norm = re.sub(r'[^a-z0-9]', '', lead["company"].lower())
# Skip too short or invalid
if len(company_norm) < 5:
continue
# Skip obvious non-companies
skip_patterns = [
r'^emea\s',
r'^staff\s',
r'accountant',
r'^bestjobs',
r'^ejobs',
r'^hipo',
]
if any(re.search(p, lead["company"], re.IGNORECASE) for p in skip_patterns):
continue
if company_norm not in seen:
seen.add(company_norm)
unique.append(lead)
return unique
def enrich_leads(leads):
"""Adaugă câmpuri pentru tracking."""
for lead in leads:
lead["found_date"] = datetime.now().isoformat()[:10]
lead["cui"] = ""
lead["email"] = ""
lead["website"] = ""
lead["phone"] = ""
lead["status"] = "new" # new, researched, contacted, replied, converted, rejected
lead["notes"] = ""
lead["industry"] = ""
return leads
def save_leads(leads, filename="leads.csv"):
"""Salvează leads în CSV pentru review."""
output_file = OUTPUT_DIR / filename
fieldnames = ["company", "industry", "source_url", "found_date",
"cui", "email", "website", "phone", "status", "notes"]
# Remove context from output (used only for extraction)
for lead in leads:
lead.pop("context", None)
with open(output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
writer.writerows(leads)
return output_file
def main():
parser = argparse.ArgumentParser(description="Lead Generator Minimal")
parser.add_argument("--limit", type=int, default=10, help="Results per search query")
args = parser.parse_args()
print("🔍 Căutare leads via Brave Search...")
# Search queries - companii care angajează contabili
queries = [
'site:ejobs.ro contabil angajare 2026',
'site:ejobs.ro economist angajare',
'site:bestjobs.eu contabil Romania',
'site:hipo.ro contabil angajare',
'"angajam contabil" Romania firma',
'"cautam economist" Romania SRL',
]
all_leads = []
for query in queries:
print(f"{query[:50]}...")
results = search_brave(query, count=args.limit)
companies = extract_companies_from_results(results)
all_leads.extend(companies)
print(f" Găsite: {len(companies)} companii")
# Deduplică
unique_leads = deduplicate(all_leads)
print(f"\n📊 Total: {len(all_leads)}{len(unique_leads)} unice")
# Îmbogățește
enriched = enrich_leads(unique_leads)
# Salvează
output_file = save_leads(enriched)
print(f"\n✅ Salvat: {output_file}")
# Afișează lista
print(f"\n📋 {len(enriched)} companii găsite:")
for i, lead in enumerate(enriched, 1):
print(f" {i}. {lead['company']}")
print(f"\n💡 Următorii pași:")
print(f" 1. Deschide {output_file}")
print(f" 2. Completează CUI, email, website pentru cele interesante")
print(f" 3. Marchează status: researched → contacted → replied")
return enriched
if __name__ == "__main__":
main()