From e49e653e12a3204854ec5b19c87af57226e049f6 Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Tue, 9 Dec 2025 14:32:49 +0200 Subject: [PATCH] Adauga detectie inteligenta campuri login cu strategii fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rezolva problema cand selectoarele BT se schimba - acum incearca multiple strategii pentru a gasi username, password si submit button. Imbunatateste si gestionarea GDPR cookie banner. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- btgo_scraper.py | 264 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 244 insertions(+), 20 deletions(-) diff --git a/btgo_scraper.py b/btgo_scraper.py index dcc5098..8dd9390 100644 --- a/btgo_scraper.py +++ b/btgo_scraper.py @@ -90,6 +90,218 @@ class BTGoScraper: ) logging.info(f"Progress update: {message}") + def _dismiss_gdpr_cookies(self, page): + """ + Inchide GDPR cookie banner daca este vizibil + + Args: + page: Pagina Playwright pe care sa verifice + """ + try: + # Verifica daca exista gdprcookie-wrapper + gdpr_wrapper = page.locator(".gdprcookie-wrapper") + if gdpr_wrapper.is_visible(timeout=3000): + logging.info(" GDPR cookie banner detectat") + + # Incearca diverse butoane de accept (in ordinea probabilitatii) + accept_selectors = [ + ".gdprcookie-wrapper button:has-text('Accept')", + ".gdprcookie-wrapper button:has-text('Accepta')", + ".gdprcookie-wrapper button:has-text('Sunt de acord')", + ".gdprcookie-wrapper button:has-text('OK')", + ".gdprcookie-wrapper .gdprcookie-buttons button:first-child", + ".gdprcookie-wrapper button", + ] + + for selector in accept_selectors: + try: + accept_btn = page.locator(selector).first + if accept_btn.is_visible(timeout=1000): + accept_btn.click() + logging.info(f" [OK] Cookies acceptate (selector: {selector})") + time.sleep(1) # Asteapta sa dispara banner-ul + return True + except: + continue + + logging.warning(" Nu am gasit buton de accept in GDPR wrapper") + return False + except: + logging.info(" Nu exista GDPR cookie banner (sau deja inchis)") + return False + + def _find_username_field(self, page): + """ + Detecteaza inteligent campul de username folosind multiple strategii. + Ordinea: selectori specifici -> selectori generici -> detectie structurala + + Returns: + Locator daca gasit, None altfel + """ + strategies = [ + # 1. Selectori specifici BT (pot sa se schimbe) + ("placeholder_exact", "ID logare"), + ("placeholder_exact", "ID de logare"), + ("id", "user"), + ("name", "user"), + + # 2. Selectori generici (mai stabili) + ("placeholder_contains", "logare"), + ("placeholder_contains", "user"), + ("placeholder_contains", "utilizator"), + ("name_contains", "user"), + ("name_contains", "login"), + ("id_contains", "user"), + ("id_contains", "login"), + + # 3. Selectori structurali (foarte stabili) + ("css", "form input[type='text']:not([type='hidden'])"), + ("css", "form input:not([type='password']):not([type='hidden']):not([type='submit'])"), + ("css", "input[type='text']"), + ("css", ".form-control[type='text']"), + + # 4. Fallback - primul input vizibil care nu e password/submit + ("first_text_input", None), + ] + + for strategy_type, value in strategies: + try: + field = self._try_field_strategy(page, strategy_type, value) + if field and field.is_visible(timeout=1000): + logging.info(f" [USERNAME] Gasit cu strategia: {strategy_type}='{value}'") + return field + except Exception: + continue + + logging.error(" [USERNAME] Nu am gasit campul cu nicio strategie!") + return None + + def _find_password_field(self, page): + """ + Detecteaza inteligent campul de parola. + Campul password e foarte stabil - type='password' e standard HTML. + + Returns: + Locator daca gasit, None altfel + """ + strategies = [ + # 1. Cel mai stabil - type='password' (standard HTML) + ("css", "input[type='password']"), + + # 2. Selectori specifici BT + ("id", "password"), + ("name", "password"), + ("name", "pass"), + ("placeholder_exact", "Parola"), + ("placeholder_exact", "Password"), + + # 3. Selectori generici + ("placeholder_contains", "parola"), + ("placeholder_contains", "password"), + ("name_contains", "pass"), + ("id_contains", "pass"), + + # 4. Fallback structural + ("css", "form input[type='password']"), + ("css", ".form-control[type='password']"), + ] + + for strategy_type, value in strategies: + try: + field = self._try_field_strategy(page, strategy_type, value) + if field and field.is_visible(timeout=1000): + logging.info(f" [PASSWORD] Gasit cu strategia: {strategy_type}='{value}'") + return field + except Exception: + continue + + logging.error(" [PASSWORD] Nu am gasit campul cu nicio strategie!") + return None + + def _find_submit_button(self, page): + """ + Detecteaza inteligent butonul de submit. + + Returns: + Locator daca gasit, None altfel + """ + strategies = [ + # 1. Selectori specifici BT + ("css", "input[value='Autentifică-te']"), + ("css", "button:has-text('Autentifică-te')"), + ("css", "input[value*='Autentific']"), + + # 2. Selectori generici pentru login buttons + ("css", "input[type='submit']"), + ("css", "button[type='submit']"), + ("css", "form button.btn-primary"), + ("css", "form input.btn-primary"), + + # 3. Text-based (mai putin stabil dar functional) + ("text_contains", "Login"), + ("text_contains", "Conectare"), + ("text_contains", "Autentificare"), + ("text_contains", "Intra"), + ("text_contains", "Submit"), + + # 4. Fallback - orice buton din form + ("css", "form button"), + ("css", "form input[type='button']"), + ("css", ".btn-primary"), + ] + + for strategy_type, value in strategies: + try: + button = self._try_button_strategy(page, strategy_type, value) + if button and button.is_visible(timeout=1000): + logging.info(f" [SUBMIT] Gasit cu strategia: {strategy_type}='{value}'") + return button + except Exception: + continue + + logging.error(" [SUBMIT] Nu am gasit butonul cu nicio strategie!") + return None + + def _try_field_strategy(self, page, strategy_type, value): + """Helper pentru a incerca o strategie de gasire a unui camp""" + if strategy_type == "placeholder_exact": + return page.get_by_placeholder(value, exact=True) + elif strategy_type == "placeholder_contains": + return page.locator(f"input[placeholder*='{value}' i]").first + elif strategy_type == "id": + return page.locator(f"#{value}") + elif strategy_type == "id_contains": + return page.locator(f"input[id*='{value}' i]").first + elif strategy_type == "name": + return page.locator(f"input[name='{value}']") + elif strategy_type == "name_contains": + return page.locator(f"input[name*='{value}' i]").first + elif strategy_type == "css": + return page.locator(value).first + elif strategy_type == "label": + return page.get_by_label(value) + elif strategy_type == "first_text_input": + # Gaseste primul input care nu e password, hidden sau submit + inputs = page.locator("input:visible").all() + for inp in inputs: + try: + inp_type = inp.get_attribute("type", timeout=500) or "text" + if inp_type.lower() not in ["password", "hidden", "submit", "button", "checkbox", "radio"]: + return inp + except: + continue + return None + + def _try_button_strategy(self, page, strategy_type, value): + """Helper pentru a incerca o strategie de gasire a butonului""" + if strategy_type == "css": + return page.locator(value).first + elif strategy_type == "text_contains": + return page.locator(f"button:has-text('{value}'), input[value*='{value}' i]").first + elif strategy_type == "role": + return page.get_by_role("button", name=value) + return None + def run(self): """Entry point principal - orchestreaza tot flow-ul""" try: @@ -169,14 +381,9 @@ class BTGoScraper: logging.info("Pagina incarcata") try: - # Cookie consent - asteapta si accepta - logging.info("Acceptare cookies...") - try: - cookie_button = self.page.get_by_role("button", name="Sunt de acord", exact=True) - cookie_button.click(timeout=5000) - logging.info("✓ Cookies acceptate") - except: - logging.info("Nu a fost necesar acceptul cookies (posibil deja acceptat)") + # Cookie consent - asteapta si accepta (GDPR wrapper) + logging.info("Verificare GDPR cookie banner...") + self._dismiss_gdpr_cookies(self.page) # Click pe butonul LOGIN - deschide popup logging.info("Click pe butonul LOGIN...") @@ -188,23 +395,40 @@ class BTGoScraper: self.login_page = popup_info.value logging.info("✓ Popup login deschis") - # Completare username - logging.info("Completare username...") - username_field = self.login_page.get_by_placeholder("ID de logare") + # Verifica GDPR cookies si pe popup + self._dismiss_gdpr_cookies(self.login_page) + + # Asteapta sa se incarce pagina de login + time.sleep(2) + + # Screenshot debug pentru a vedea starea paginii + debug_path = Path(self.config.OUTPUT_DIR) / f"debug_login_popup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + self.login_page.screenshot(path=str(debug_path)) + logging.info(f"Screenshot debug salvat: {debug_path}") + + # Completare username - detectie inteligenta cu fallback + logging.info("Detectie camp username...") + username_field = self._find_username_field(self.login_page) + if not username_field: + raise Exception("Nu am gasit campul de username cu nicio strategie!") username_field.fill(self.config.BTGO_USERNAME) - logging.info("✓ Username completat") + logging.info("[OK] Username completat") - # Completare password - logging.info("Completare password...") - password_field = self.login_page.get_by_placeholder("Parola") + # Completare password - detectie inteligenta cu fallback + logging.info("Detectie camp password...") + password_field = self._find_password_field(self.login_page) + if not password_field: + raise Exception("Nu am gasit campul de parola cu nicio strategie!") password_field.fill(self.config.BTGO_PASSWORD) - logging.info("✓ Password completat") + logging.info("[OK] Password completat") - # Click pe butonul de submit - logging.info("Click pe 'Mergi mai departe'...") - submit_button = self.login_page.get_by_role("button", name="Mergi mai departe") + # Click pe butonul de submit - detectie inteligenta cu fallback + logging.info("Detectie buton submit...") + submit_button = self._find_submit_button(self.login_page) + if not submit_button: + raise Exception("Nu am gasit butonul de submit cu nicio strategie!") submit_button.click() - logging.info("✓ Credentials trimise, astept 2FA...") + logging.info("[OK] Credentials trimise, astept 2FA...") self._update_progress("Astept aprobare 2FA pe telefon...") except PlaywrightTimeout as e: