#!/usr/bin/env python3 """One-time script to find duplicate partners by CUI (bare number, ignoring RO prefix).""" import sys, os, csv sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # Setup Oracle env same as start.sh from api.app import database def scan_duplicates(): database.init_oracle() conn = database.get_oracle_connection() try: with conn.cursor() as cur: cur.execute(""" SELECT bare_cui, COUNT(*) as cnt, LISTAGG(id_part||':'||denumire, ', ') WITHIN GROUP (ORDER BY id_part) as partners FROM (SELECT id_part, denumire, TRIM(REGEXP_REPLACE(UPPER(TRIM(cod_fiscal)), '^RO\\s*', '')) as bare_cui FROM nom_parteneri WHERE NVL(sters,0)=0 AND cod_fiscal IS NOT NULL AND LENGTH(TRIM(cod_fiscal)) >= 3) GROUP BY bare_cui HAVING COUNT(*) > 1 ORDER BY cnt DESC """) rows = cur.fetchall() finally: database.pool.release(conn) database.close_oracle() # Output markdown + CSV print(f"\n## Duplicate Partners Report\n") print(f"Found {len(rows)} CUIs with duplicate partners.\n") print("| CUI | Count | Partners |") print("|-----|-------|----------|") for row in rows: print(f"| {row[0]} | {row[1]} | {row[2][:100]} |") # CSV output csv_path = os.path.join(os.path.dirname(__file__), 'duplicate_partners.csv') with open(csv_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['bare_cui', 'count', 'partners']) for row in rows: writer.writerow(row) print(f"\nCSV saved: {csv_path}") if __name__ == '__main__': scan_duplicates()