From 2165383317aaa3fe3c9d7221602a797389e07f6c Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Fri, 15 May 2026 14:17:08 +0300 Subject: [PATCH] diag(login): captureaza starea paginii la timeout pe input#user Cand wait_for_selector('input#user') da timeout, logam URL, titlu, text vizibil, lista de input-uri si salvam screenshot full_page - cat browser-ul e inca viu. Necesar ca sa distingem intre WAF block, randare lenta si selector schimbat, fara sa mai ghicim. Handler-ul de blocare trackere inghite acum TargetClosedError la shutdown, ca sa nu mai spameze log-ul cu "Future exception was never retrieved" si sa ascunda eroarea reala. Co-Authored-By: Claude Opus 4.7 --- btgo_scraper.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/btgo_scraper.py b/btgo_scraper.py index fcb8c3e..0e5082b 100644 --- a/btgo_scraper.py +++ b/btgo_scraper.py @@ -425,9 +425,18 @@ class BTGoScraper: 'linkedin.com', 'licdn.com', 'omniconvert.com', 'hotjar.com', 'clarity.ms', ) + def _abort_tracker(route): + # La inchiderea browser-ului, route-urile in zbor arunca + # TargetClosedError. Le inghitim ca sa nu spameze log-ul cu + # "Future exception was never retrieved" si sa ascunda eroarea reala. + try: + route.abort() + except Exception: + pass + context.route( lambda url: any(host in url for host in blocked_hosts), - lambda route: route.abort(), + _abort_tracker, ) self.page = context.new_page() @@ -558,6 +567,26 @@ class BTGoScraper: except PlaywrightTimeout as e: logging.error(f"Timeout la login: {e}") + # Capteaza starea paginii CAT browser-ul e inca viu, ca sa stim + # ce a vazut de fapt: Access Denied, pagina goala, alt formular etc. + diag_page = getattr(self, 'login_page', None) or self.page + try: + logging.error(f" [DIAG] URL: {diag_page.url}") + logging.error(f" [DIAG] Titlu: {diag_page.title()!r}") + body = diag_page.evaluate( + "document.body ? document.body.innerText.slice(0, 400) : '(fara body)'" + ) + logging.error(f" [DIAG] Text vizibil: {body!r}") + inputs = diag_page.evaluate( + "Array.from(document.querySelectorAll('input'))" + ".map(i => i.id + '|' + i.type + '|' + i.name)" + ) + logging.error(f" [DIAG] Input-uri in pagina: {inputs}") + shot = Path(self.config.OUTPUT_DIR) / f"diag_login_timeout_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + diag_page.screenshot(path=str(shot), full_page=True) + logging.error(f" [DIAG] Screenshot salvat: {shot}") + except Exception as diag_err: + logging.error(f" [DIAG] Nu am putut capta starea paginii: {diag_err}") raise Exception("Nu am gasit elementele de login. Verifica selectors!") except Exception as e: logging.error(f"Eroare la login: {e}")