- tools/lead-gen/find_leads.py - searches for companies hiring accountants - tools/lead-gen/templates/ - email templates for outreach - Uses Brave Search API to find leads from job sites - Outputs CSV for manual review and tracking First run found 19 companies for potential outreach.
230 lines
7.1 KiB
Python
230 lines
7.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Lead Generator Minimal - Găsește companii care au nevoie de soluții ERP/contabilitate.
|
|
|
|
Folosește Brave Search API pentru a găsi companii care angajează contabili/economiști.
|
|
|
|
Output: leads.csv cu companii pentru review manual
|
|
|
|
Usage:
|
|
python find_leads.py [--limit N]
|
|
|
|
Necesită: BRAVE_API_KEY în environment sau ~/.clawdbot/clawdbot.json
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import csv
|
|
import json
|
|
import argparse
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
|
|
OUTPUT_DIR = Path(__file__).parent / "output"
|
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
|
|
def get_brave_api_key():
|
|
"""Get Brave API key from clawdbot config."""
|
|
config_path = Path.home() / ".clawdbot" / "clawdbot.json"
|
|
if config_path.exists():
|
|
with open(config_path) as f:
|
|
config = json.load(f)
|
|
# Try tools.web.search.apiKey (clawdbot format)
|
|
api_key = config.get("tools", {}).get("web", {}).get("search", {}).get("apiKey", "")
|
|
if api_key:
|
|
return api_key
|
|
# Fallback to brave.apiKey
|
|
return config.get("brave", {}).get("apiKey", "")
|
|
return os.getenv("BRAVE_API_KEY", "")
|
|
|
|
def search_brave(query, count=10):
|
|
"""Search using Brave Search API."""
|
|
api_key = get_brave_api_key()
|
|
if not api_key:
|
|
print("[!] Nu am găsit Brave API key")
|
|
return []
|
|
|
|
url = "https://api.search.brave.com/res/v1/web/search"
|
|
headers = {
|
|
"X-Subscription-Token": api_key,
|
|
"Accept": "application/json"
|
|
}
|
|
params = {
|
|
"q": query,
|
|
"count": count
|
|
}
|
|
|
|
try:
|
|
resp = requests.get(url, headers=headers, params=params, timeout=15)
|
|
data = resp.json()
|
|
return data.get("web", {}).get("results", [])
|
|
except Exception as e:
|
|
print(f"[!] Brave search error: {e}")
|
|
return []
|
|
|
|
def extract_companies_from_results(results):
|
|
"""Extract company names from search results."""
|
|
companies = []
|
|
|
|
# Patterns for Romanian companies
|
|
patterns = [
|
|
r'([A-Z][A-Z\s\-\.&]+(?:S\.R\.L\.|SRL|S\.A\.|SA|S\.C\.))', # COMPANY S.R.L.
|
|
r'(SC\s+[A-Z][A-Z\s\-\.&]+(?:S\.R\.L\.|SRL|S\.A\.|SA))', # SC COMPANY SRL
|
|
r'([A-Z][a-zA-Z\s\-\.&]{2,30}(?:S\.R\.L\.|SRL|S\.A\.|SA))', # Mixed case
|
|
]
|
|
|
|
for result in results:
|
|
text = f"{result.get('title', '')} {result.get('description', '')}"
|
|
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, text)
|
|
for match in matches:
|
|
company = match.strip()
|
|
# Clean up
|
|
company = re.sub(r'\s+', ' ', company)
|
|
if len(company) > 5 and len(company) < 80:
|
|
companies.append({
|
|
"company": company,
|
|
"source_url": result.get("url", ""),
|
|
"context": text[:200]
|
|
})
|
|
|
|
return companies
|
|
|
|
def clean_company_name(name):
|
|
"""Clean company name from prefix garbage."""
|
|
# Remove common prefixes that get caught by regex
|
|
prefixes_to_remove = [
|
|
r'^(?:Senior|Junior|Contabil|Economist|Director\s+Economic|Expert|Specialist)\s+',
|
|
r'^(?:RON|EUR|USD)\s+',
|
|
r'^(?:Bucuresti|Cluj|Iasi|Brasov|Constanta)\s+',
|
|
r'^\d+[\s\-]+',
|
|
]
|
|
|
|
result = name.strip()
|
|
for pattern in prefixes_to_remove:
|
|
result = re.sub(pattern, '', result, flags=re.IGNORECASE)
|
|
|
|
# Clean trailing garbage
|
|
result = re.sub(r'\s*-\s*$', '', result)
|
|
result = re.sub(r'\s+', ' ', result).strip()
|
|
|
|
return result
|
|
|
|
def deduplicate(leads):
|
|
"""Elimină duplicate după numele companiei."""
|
|
seen = set()
|
|
unique = []
|
|
for lead in leads:
|
|
# Clean company name
|
|
lead["company"] = clean_company_name(lead["company"])
|
|
|
|
# Normalize for comparison
|
|
company_norm = re.sub(r'[^a-z0-9]', '', lead["company"].lower())
|
|
|
|
# Skip too short or invalid
|
|
if len(company_norm) < 5:
|
|
continue
|
|
|
|
# Skip obvious non-companies
|
|
skip_patterns = [
|
|
r'^emea\s',
|
|
r'^staff\s',
|
|
r'accountant',
|
|
r'^bestjobs',
|
|
r'^ejobs',
|
|
r'^hipo',
|
|
]
|
|
if any(re.search(p, lead["company"], re.IGNORECASE) for p in skip_patterns):
|
|
continue
|
|
|
|
if company_norm not in seen:
|
|
seen.add(company_norm)
|
|
unique.append(lead)
|
|
return unique
|
|
|
|
def enrich_leads(leads):
|
|
"""Adaugă câmpuri pentru tracking."""
|
|
for lead in leads:
|
|
lead["found_date"] = datetime.now().isoformat()[:10]
|
|
lead["cui"] = ""
|
|
lead["email"] = ""
|
|
lead["website"] = ""
|
|
lead["phone"] = ""
|
|
lead["status"] = "new" # new, researched, contacted, replied, converted, rejected
|
|
lead["notes"] = ""
|
|
lead["industry"] = ""
|
|
return leads
|
|
|
|
def save_leads(leads, filename="leads.csv"):
|
|
"""Salvează leads în CSV pentru review."""
|
|
output_file = OUTPUT_DIR / filename
|
|
|
|
fieldnames = ["company", "industry", "source_url", "found_date",
|
|
"cui", "email", "website", "phone", "status", "notes"]
|
|
|
|
# Remove context from output (used only for extraction)
|
|
for lead in leads:
|
|
lead.pop("context", None)
|
|
|
|
with open(output_file, "w", newline="", encoding="utf-8") as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
|
writer.writeheader()
|
|
writer.writerows(leads)
|
|
|
|
return output_file
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Lead Generator Minimal")
|
|
parser.add_argument("--limit", type=int, default=10, help="Results per search query")
|
|
args = parser.parse_args()
|
|
|
|
print("🔍 Căutare leads via Brave Search...")
|
|
|
|
# Search queries - companii care angajează contabili
|
|
queries = [
|
|
'site:ejobs.ro contabil angajare 2026',
|
|
'site:ejobs.ro economist angajare',
|
|
'site:bestjobs.eu contabil Romania',
|
|
'site:hipo.ro contabil angajare',
|
|
'"angajam contabil" Romania firma',
|
|
'"cautam economist" Romania SRL',
|
|
]
|
|
|
|
all_leads = []
|
|
|
|
for query in queries:
|
|
print(f" → {query[:50]}...")
|
|
results = search_brave(query, count=args.limit)
|
|
companies = extract_companies_from_results(results)
|
|
all_leads.extend(companies)
|
|
print(f" Găsite: {len(companies)} companii")
|
|
|
|
# Deduplică
|
|
unique_leads = deduplicate(all_leads)
|
|
print(f"\n📊 Total: {len(all_leads)} → {len(unique_leads)} unice")
|
|
|
|
# Îmbogățește
|
|
enriched = enrich_leads(unique_leads)
|
|
|
|
# Salvează
|
|
output_file = save_leads(enriched)
|
|
print(f"\n✅ Salvat: {output_file}")
|
|
|
|
# Afișează lista
|
|
print(f"\n📋 {len(enriched)} companii găsite:")
|
|
for i, lead in enumerate(enriched, 1):
|
|
print(f" {i}. {lead['company']}")
|
|
|
|
print(f"\n💡 Următorii pași:")
|
|
print(f" 1. Deschide {output_file}")
|
|
print(f" 2. Completează CUI, email, website pentru cele interesante")
|
|
print(f" 3. Marchează status: researched → contacted → replied")
|
|
|
|
return enriched
|
|
|
|
if __name__ == "__main__":
|
|
main()
|