Adauga detectie inteligenta campuri login cu strategii fallback
Rezolva problema cand selectoarele BT se schimba - acum incearca multiple strategii pentru a gasi username, password si submit button. Imbunatateste si gestionarea GDPR cookie banner. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
264
btgo_scraper.py
264
btgo_scraper.py
@@ -90,6 +90,218 @@ class BTGoScraper:
|
|||||||
)
|
)
|
||||||
logging.info(f"Progress update: {message}")
|
logging.info(f"Progress update: {message}")
|
||||||
|
|
||||||
|
def _dismiss_gdpr_cookies(self, page):
|
||||||
|
"""
|
||||||
|
Inchide GDPR cookie banner daca este vizibil
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page: Pagina Playwright pe care sa verifice
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Verifica daca exista gdprcookie-wrapper
|
||||||
|
gdpr_wrapper = page.locator(".gdprcookie-wrapper")
|
||||||
|
if gdpr_wrapper.is_visible(timeout=3000):
|
||||||
|
logging.info(" GDPR cookie banner detectat")
|
||||||
|
|
||||||
|
# Incearca diverse butoane de accept (in ordinea probabilitatii)
|
||||||
|
accept_selectors = [
|
||||||
|
".gdprcookie-wrapper button:has-text('Accept')",
|
||||||
|
".gdprcookie-wrapper button:has-text('Accepta')",
|
||||||
|
".gdprcookie-wrapper button:has-text('Sunt de acord')",
|
||||||
|
".gdprcookie-wrapper button:has-text('OK')",
|
||||||
|
".gdprcookie-wrapper .gdprcookie-buttons button:first-child",
|
||||||
|
".gdprcookie-wrapper button",
|
||||||
|
]
|
||||||
|
|
||||||
|
for selector in accept_selectors:
|
||||||
|
try:
|
||||||
|
accept_btn = page.locator(selector).first
|
||||||
|
if accept_btn.is_visible(timeout=1000):
|
||||||
|
accept_btn.click()
|
||||||
|
logging.info(f" [OK] Cookies acceptate (selector: {selector})")
|
||||||
|
time.sleep(1) # Asteapta sa dispara banner-ul
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.warning(" Nu am gasit buton de accept in GDPR wrapper")
|
||||||
|
return False
|
||||||
|
except:
|
||||||
|
logging.info(" Nu exista GDPR cookie banner (sau deja inchis)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _find_username_field(self, page):
|
||||||
|
"""
|
||||||
|
Detecteaza inteligent campul de username folosind multiple strategii.
|
||||||
|
Ordinea: selectori specifici -> selectori generici -> detectie structurala
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Locator daca gasit, None altfel
|
||||||
|
"""
|
||||||
|
strategies = [
|
||||||
|
# 1. Selectori specifici BT (pot sa se schimbe)
|
||||||
|
("placeholder_exact", "ID logare"),
|
||||||
|
("placeholder_exact", "ID de logare"),
|
||||||
|
("id", "user"),
|
||||||
|
("name", "user"),
|
||||||
|
|
||||||
|
# 2. Selectori generici (mai stabili)
|
||||||
|
("placeholder_contains", "logare"),
|
||||||
|
("placeholder_contains", "user"),
|
||||||
|
("placeholder_contains", "utilizator"),
|
||||||
|
("name_contains", "user"),
|
||||||
|
("name_contains", "login"),
|
||||||
|
("id_contains", "user"),
|
||||||
|
("id_contains", "login"),
|
||||||
|
|
||||||
|
# 3. Selectori structurali (foarte stabili)
|
||||||
|
("css", "form input[type='text']:not([type='hidden'])"),
|
||||||
|
("css", "form input:not([type='password']):not([type='hidden']):not([type='submit'])"),
|
||||||
|
("css", "input[type='text']"),
|
||||||
|
("css", ".form-control[type='text']"),
|
||||||
|
|
||||||
|
# 4. Fallback - primul input vizibil care nu e password/submit
|
||||||
|
("first_text_input", None),
|
||||||
|
]
|
||||||
|
|
||||||
|
for strategy_type, value in strategies:
|
||||||
|
try:
|
||||||
|
field = self._try_field_strategy(page, strategy_type, value)
|
||||||
|
if field and field.is_visible(timeout=1000):
|
||||||
|
logging.info(f" [USERNAME] Gasit cu strategia: {strategy_type}='{value}'")
|
||||||
|
return field
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.error(" [USERNAME] Nu am gasit campul cu nicio strategie!")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _find_password_field(self, page):
|
||||||
|
"""
|
||||||
|
Detecteaza inteligent campul de parola.
|
||||||
|
Campul password e foarte stabil - type='password' e standard HTML.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Locator daca gasit, None altfel
|
||||||
|
"""
|
||||||
|
strategies = [
|
||||||
|
# 1. Cel mai stabil - type='password' (standard HTML)
|
||||||
|
("css", "input[type='password']"),
|
||||||
|
|
||||||
|
# 2. Selectori specifici BT
|
||||||
|
("id", "password"),
|
||||||
|
("name", "password"),
|
||||||
|
("name", "pass"),
|
||||||
|
("placeholder_exact", "Parola"),
|
||||||
|
("placeholder_exact", "Password"),
|
||||||
|
|
||||||
|
# 3. Selectori generici
|
||||||
|
("placeholder_contains", "parola"),
|
||||||
|
("placeholder_contains", "password"),
|
||||||
|
("name_contains", "pass"),
|
||||||
|
("id_contains", "pass"),
|
||||||
|
|
||||||
|
# 4. Fallback structural
|
||||||
|
("css", "form input[type='password']"),
|
||||||
|
("css", ".form-control[type='password']"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for strategy_type, value in strategies:
|
||||||
|
try:
|
||||||
|
field = self._try_field_strategy(page, strategy_type, value)
|
||||||
|
if field and field.is_visible(timeout=1000):
|
||||||
|
logging.info(f" [PASSWORD] Gasit cu strategia: {strategy_type}='{value}'")
|
||||||
|
return field
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.error(" [PASSWORD] Nu am gasit campul cu nicio strategie!")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _find_submit_button(self, page):
|
||||||
|
"""
|
||||||
|
Detecteaza inteligent butonul de submit.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Locator daca gasit, None altfel
|
||||||
|
"""
|
||||||
|
strategies = [
|
||||||
|
# 1. Selectori specifici BT
|
||||||
|
("css", "input[value='Autentifică-te']"),
|
||||||
|
("css", "button:has-text('Autentifică-te')"),
|
||||||
|
("css", "input[value*='Autentific']"),
|
||||||
|
|
||||||
|
# 2. Selectori generici pentru login buttons
|
||||||
|
("css", "input[type='submit']"),
|
||||||
|
("css", "button[type='submit']"),
|
||||||
|
("css", "form button.btn-primary"),
|
||||||
|
("css", "form input.btn-primary"),
|
||||||
|
|
||||||
|
# 3. Text-based (mai putin stabil dar functional)
|
||||||
|
("text_contains", "Login"),
|
||||||
|
("text_contains", "Conectare"),
|
||||||
|
("text_contains", "Autentificare"),
|
||||||
|
("text_contains", "Intra"),
|
||||||
|
("text_contains", "Submit"),
|
||||||
|
|
||||||
|
# 4. Fallback - orice buton din form
|
||||||
|
("css", "form button"),
|
||||||
|
("css", "form input[type='button']"),
|
||||||
|
("css", ".btn-primary"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for strategy_type, value in strategies:
|
||||||
|
try:
|
||||||
|
button = self._try_button_strategy(page, strategy_type, value)
|
||||||
|
if button and button.is_visible(timeout=1000):
|
||||||
|
logging.info(f" [SUBMIT] Gasit cu strategia: {strategy_type}='{value}'")
|
||||||
|
return button
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.error(" [SUBMIT] Nu am gasit butonul cu nicio strategie!")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _try_field_strategy(self, page, strategy_type, value):
|
||||||
|
"""Helper pentru a incerca o strategie de gasire a unui camp"""
|
||||||
|
if strategy_type == "placeholder_exact":
|
||||||
|
return page.get_by_placeholder(value, exact=True)
|
||||||
|
elif strategy_type == "placeholder_contains":
|
||||||
|
return page.locator(f"input[placeholder*='{value}' i]").first
|
||||||
|
elif strategy_type == "id":
|
||||||
|
return page.locator(f"#{value}")
|
||||||
|
elif strategy_type == "id_contains":
|
||||||
|
return page.locator(f"input[id*='{value}' i]").first
|
||||||
|
elif strategy_type == "name":
|
||||||
|
return page.locator(f"input[name='{value}']")
|
||||||
|
elif strategy_type == "name_contains":
|
||||||
|
return page.locator(f"input[name*='{value}' i]").first
|
||||||
|
elif strategy_type == "css":
|
||||||
|
return page.locator(value).first
|
||||||
|
elif strategy_type == "label":
|
||||||
|
return page.get_by_label(value)
|
||||||
|
elif strategy_type == "first_text_input":
|
||||||
|
# Gaseste primul input care nu e password, hidden sau submit
|
||||||
|
inputs = page.locator("input:visible").all()
|
||||||
|
for inp in inputs:
|
||||||
|
try:
|
||||||
|
inp_type = inp.get_attribute("type", timeout=500) or "text"
|
||||||
|
if inp_type.lower() not in ["password", "hidden", "submit", "button", "checkbox", "radio"]:
|
||||||
|
return inp
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _try_button_strategy(self, page, strategy_type, value):
|
||||||
|
"""Helper pentru a incerca o strategie de gasire a butonului"""
|
||||||
|
if strategy_type == "css":
|
||||||
|
return page.locator(value).first
|
||||||
|
elif strategy_type == "text_contains":
|
||||||
|
return page.locator(f"button:has-text('{value}'), input[value*='{value}' i]").first
|
||||||
|
elif strategy_type == "role":
|
||||||
|
return page.get_by_role("button", name=value)
|
||||||
|
return None
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Entry point principal - orchestreaza tot flow-ul"""
|
"""Entry point principal - orchestreaza tot flow-ul"""
|
||||||
try:
|
try:
|
||||||
@@ -169,14 +381,9 @@ class BTGoScraper:
|
|||||||
logging.info("Pagina incarcata")
|
logging.info("Pagina incarcata")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Cookie consent - asteapta si accepta
|
# Cookie consent - asteapta si accepta (GDPR wrapper)
|
||||||
logging.info("Acceptare cookies...")
|
logging.info("Verificare GDPR cookie banner...")
|
||||||
try:
|
self._dismiss_gdpr_cookies(self.page)
|
||||||
cookie_button = self.page.get_by_role("button", name="Sunt de acord", exact=True)
|
|
||||||
cookie_button.click(timeout=5000)
|
|
||||||
logging.info("✓ Cookies acceptate")
|
|
||||||
except:
|
|
||||||
logging.info("Nu a fost necesar acceptul cookies (posibil deja acceptat)")
|
|
||||||
|
|
||||||
# Click pe butonul LOGIN - deschide popup
|
# Click pe butonul LOGIN - deschide popup
|
||||||
logging.info("Click pe butonul LOGIN...")
|
logging.info("Click pe butonul LOGIN...")
|
||||||
@@ -188,23 +395,40 @@ class BTGoScraper:
|
|||||||
self.login_page = popup_info.value
|
self.login_page = popup_info.value
|
||||||
logging.info("✓ Popup login deschis")
|
logging.info("✓ Popup login deschis")
|
||||||
|
|
||||||
# Completare username
|
# Verifica GDPR cookies si pe popup
|
||||||
logging.info("Completare username...")
|
self._dismiss_gdpr_cookies(self.login_page)
|
||||||
username_field = self.login_page.get_by_placeholder("ID de logare")
|
|
||||||
|
# Asteapta sa se incarce pagina de login
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Screenshot debug pentru a vedea starea paginii
|
||||||
|
debug_path = Path(self.config.OUTPUT_DIR) / f"debug_login_popup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
|
||||||
|
self.login_page.screenshot(path=str(debug_path))
|
||||||
|
logging.info(f"Screenshot debug salvat: {debug_path}")
|
||||||
|
|
||||||
|
# Completare username - detectie inteligenta cu fallback
|
||||||
|
logging.info("Detectie camp username...")
|
||||||
|
username_field = self._find_username_field(self.login_page)
|
||||||
|
if not username_field:
|
||||||
|
raise Exception("Nu am gasit campul de username cu nicio strategie!")
|
||||||
username_field.fill(self.config.BTGO_USERNAME)
|
username_field.fill(self.config.BTGO_USERNAME)
|
||||||
logging.info("✓ Username completat")
|
logging.info("[OK] Username completat")
|
||||||
|
|
||||||
# Completare password
|
# Completare password - detectie inteligenta cu fallback
|
||||||
logging.info("Completare password...")
|
logging.info("Detectie camp password...")
|
||||||
password_field = self.login_page.get_by_placeholder("Parola")
|
password_field = self._find_password_field(self.login_page)
|
||||||
|
if not password_field:
|
||||||
|
raise Exception("Nu am gasit campul de parola cu nicio strategie!")
|
||||||
password_field.fill(self.config.BTGO_PASSWORD)
|
password_field.fill(self.config.BTGO_PASSWORD)
|
||||||
logging.info("✓ Password completat")
|
logging.info("[OK] Password completat")
|
||||||
|
|
||||||
# Click pe butonul de submit
|
# Click pe butonul de submit - detectie inteligenta cu fallback
|
||||||
logging.info("Click pe 'Mergi mai departe'...")
|
logging.info("Detectie buton submit...")
|
||||||
submit_button = self.login_page.get_by_role("button", name="Mergi mai departe")
|
submit_button = self._find_submit_button(self.login_page)
|
||||||
|
if not submit_button:
|
||||||
|
raise Exception("Nu am gasit butonul de submit cu nicio strategie!")
|
||||||
submit_button.click()
|
submit_button.click()
|
||||||
logging.info("✓ Credentials trimise, astept 2FA...")
|
logging.info("[OK] Credentials trimise, astept 2FA...")
|
||||||
self._update_progress("Astept aprobare 2FA pe telefon...")
|
self._update_progress("Astept aprobare 2FA pe telefon...")
|
||||||
|
|
||||||
except PlaywrightTimeout as e:
|
except PlaywrightTimeout as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user