diff --git a/CLAUDE.md b/CLAUDE.md index ae8a8e3..d85b758 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,13 +9,23 @@ pip install -e ".[windows]" # Windows: live capture pip install -e ".[dev]" # Linux/macOS: dev + tests (WSL: create venv first) atm calibrate # Tk wizard atm debug --delay 5 # one-shot capture + detect -atm validate-calibration samples/calibration_labels.json # offline color gate +atm validate-calibration calibration/calibration_labels.json # offline color gate atm run --start-at 16:30 --stop-at 23:00 # live session atm run --tz America/New_York --oh-start 09:30 --oh-stop 16:00 # NYSE window override atm dryrun samples # corpus gate -pytest -q # 184 tests +pytest -q # 192 tests (184 core + 8 scenarii regresie) +pytest tests/test_scenarios_regression.py -v # FSM pe imagini reale ``` +## Calibration corpus + +`calibration/` — persistent, auto-suficient, safe to keep când `samples/` și `logs/fires/` se golesc. Conține: +- `frames/` — PNG-uri `{ts}_{color}.png` (ground truth în nume) +- `calibration_labels.json` — gate offline pentru `atm validate-calibration` +- `scenarios.json` — secvențe FSM pentru `tests/test_scenarios_regression.py` + +Când adaugi un frame: copiezi din `logs/fires/` → redenumești `{ts}_{color}.png` → adaugi entry în JSON. Validare după orice recalibrare. + ## Telegram commands (live) `/ss` `/status` `/pause` `/resume` `/resume force` `/3` (interval min) `/stop` @@ -31,6 +41,15 @@ pytest -q # 184 tests `[options.alerts] fire_on_phase_skip = true` (default) — ARMED→light_* direct (dark_* missed) still emits a `⚠️ PHASE SKIP` alert using FSM lockout to suppress spam. +## Palette gotcha (2026-04-21 recalibration) + +TradeStation M2D indicators paint the four bright colors at near-pure saturation: +turquoise `(0,253,253)`, yellow `(253,253,0)`, light_green `(0,255,0)`, light_red `(255,0,0)`. +If Tk-wizard calibration samples a slightly desaturated pixel, classifier returns `UNKNOWN` +(distance > tolerance=60) → FSM never sees trigger → stuck in PRIMED → scheduler polls +forever. Always run `atm validate-calibration calibration/calibration_labels.json` after +recalibrating. Current active config: `configs/2026-04-21-recalib.toml`. + ## Skill routing When the user's request matches an available skill, ALWAYS invoke it using the Skill diff --git a/README.md b/README.md index 3088eb2..d4614d5 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,17 @@ Fără execuție automată. Faza 2 (auto-execute) e blocată de auditul TOS prop ``` atm/ ├── configs/ # calibrări + current.txt (marcaj care config e activ) +├── calibration/ # corpus auto-suficient pentru validare + regresie +│ ├── calibration_labels.json # etichete per-frame pentru atm validate-calibration +│ ├── scenarios.json # secvențe FSM (arm→prime→trigger etc.) pentru test_scenarios_regression.py +│ ├── frames/ # PNG-uri numite {ts}_{color}.png, izolate de logs/fires și samples +│ └── README.md ├── logs/ │ ├── YYYY-MM-DD.jsonl # audit zilnic, se rotește la miezul nopții local │ ├── dead_letter.jsonl # alerte care au eșuat după retries -│ ├── fires/ # screenshot-uri adnotate, unul per trigger BUY/SELL +│ ├── fires/ # screenshot-uri adnotate, unul per trigger BUY/SELL (tranzitoriu, se poate goli) │ └── calibrate_capture_*.png / debug_*.png # artefacte debug (gitignored) -├── samples/ # frame complet salvat automat la fiecare schimbare de culoare +├── samples/ # frame complet salvat automat la fiecare schimbare de culoare (tranzitoriu) ├── src/atm/ # pachetul Python │ ├── config.py # dataclass + loader TOML │ ├── vision.py # crop ROI, phash, pixel↔preț, Hough, componente conectate @@ -33,7 +38,7 @@ atm/ │ ├── journal.py # înregistrări trade-uri │ ├── report.py # raport săptămânal PnL în R │ └── main.py # CLI unificat -├── tests/ # 184 teste pytest +├── tests/ # 192 teste pytest (184 core + 8 scenarii regresie) └── TODOS.md # backlog P1/P2/P3 ``` @@ -77,8 +82,9 @@ pip install -e ".[dev]" ## Dev ```bash -pytest -q # toate testele (184+) +pytest -q # toate testele (192+) pytest tests/test_commands.py # un modul specific +pytest tests/test_scenarios_regression.py -v # scenarii FSM pe imagini reale pytest -q --cov=atm --cov-report=term-missing # cu coverage ``` @@ -97,6 +103,7 @@ Structura testelor: | `test_main.py` | lifecycle, operating hours, canary, dispatcher | | `test_validate.py` | gate offline clasificare culori | | `test_canary.py` | drift + callback pauză | +| `test_scenarios_regression.py` | secvențe FSM pe frame-uri reale (arm→prime→trigger, phase_skip, catchup, post-fire suppression) | --- @@ -173,18 +180,23 @@ Deschizi PNG-ul adnotat: dreptunghi galben = `dot_roi`, cerc roșu = dot detecta Verifici dacă calibrarea actuală clasifică corect un set de frame-uri etichetate manual, **fără să aștepți sesiunea live**. Esențial după orice recalibrare. ```bash -atm validate-calibration samples/calibration_labels.json +atm validate-calibration calibration/calibration_labels.json ``` -Format input (`samples/calibration_labels.json`): +Format input (`calibration/calibration_labels.json`): ```json [ - {"path": "logs/fires/20260417_201500_arm_sell.png", "expected": "yellow", "note": "primul arm"}, - {"path": "logs/fires/20260417_205302_ss.png", "expected": "dark_red"}, - {"path": "logs/fires/20260417_210441_ss.png", "expected": "light_red"} + {"path": "calibration/frames/20260420_171501_yellow.png", "expected": "yellow"}, + {"path": "calibration/frames/20260420_172104_dark_red.png", "expected": "dark_red"}, + {"path": "calibration/frames/20260420_173004_light_red.png", "expected": "light_red"} ] ``` +Frame-urile sunt copiate în `calibration/frames/` cu numele `{timestamp}_{culoare}.png` +— numele reflectă ground truth-ul vizibil pe dot, nu label-ul de eveniment din +`logs/fires/`. Directorul e auto-suficient: `samples/` și `logs/fires/` se pot +goli oricând fără să afecteze validarea. + Output: per fiecare frame PASS/FAIL + culoarea detectată + top 3 candidați după distanță RGB + sugestii de pixel pentru misclasificări. Exit code: @@ -192,25 +204,42 @@ Exit code: - `1` — cel puțin un FAIL - `2` — input invalid/lipsă -### Două corpus-uri, două scopuri +### Trei surse de frame-uri, roluri distincte -| Corpus | Unde se salvează | Cum se populează | Folosit de | +| Sursă | Unde se salvează | Cum se populează | Folosit de | |---|---|---|---| +| `calibration/frames/` | PNG-uri curate `{ts}_{color}.png` | **manual** — copii din `logs/fires/` doar cele verificate | `atm validate-calibration` + `test_scenarios_regression.py` | | `samples/` | frame complet la fiecare **schimbare de culoare** detectată | automat de `atm run` | `atm label` + `atm dryrun` | -| `logs/fires/` | screenshot adnotat la fiecare alertă BUY/SELL, `/ss` manual, **interval automat `/3`** | manual sau scheduler | `atm validate-calibration` | +| `logs/fires/` | screenshot adnotat la fiecare alertă BUY/SELL, `/ss` manual, **interval automat `/3`** | manual sau scheduler | sursă pentru `calibration/frames/` | + +`calibration/` e singurul director **persistent**. Celelalte două se pot goli +după ce ai extras ce-ți trebuie — tranzitorii prin natură. + +### Regresie FSM pe frame-uri reale + +`calibration/scenarios.json` definește secvențe ordonate (arm → prime → trigger, +phase_skip, catchup, suprimare dark_* post-fire) care refolosesc aceleași frame-uri. +`tests/test_scenarios_regression.py` rulează fiecare secvență prin pipeline-ul real +`Detector → _handle_tick`, asertând per pas: culoarea detectată, tranziția FSM +(prev→next + reason + trigger), alertele emise prin notifier, și starea +scheduler-ului (running/stopped). + +Extensii fără cod nou: adaugi un scenariu în JSON și pytest-ul îl consumă automat +(parametrizat pe `id`). Dacă scenariul cere o combinație de culori noi, copii +frame-ul în `calibration/frames/` cu numele `{timestamp}_{culoare}.png`. **Flow A — calibrare fină cu screenshots automate (`/3`)** Util când vrei să acumulezi repede frame-uri din culori reale, fără să aștepți schimbări de culoare. 1. **În sesiunea live**, trimite `/3` în Telegram → bot-ul face screenshot automat la 3 minute și îl salvează în `logs/fires/*_ss.png`. Oprești cu `/stop`. -2. **După sesiune**, adaugi intrări în `samples/calibration_labels.json` pentru fiecare screenshot relevant, cu culoarea pe care ai văzut-o TU pe chart: +2. **După sesiune**, adaugi intrări în `calibration/calibration_labels.json` pentru fiecare screenshot relevant, cu culoarea pe care ai văzut-o TU pe chart: ```json {"path": "logs/fires/20260420_151234_ss.png", "expected": "dark_green", "note": "văzut live, ratat de bot"} ``` 3. **Rulează validarea:** ```bash - atm validate-calibration samples/calibration_labels.json + atm validate-calibration calibration/calibration_labels.json ``` 4. **Interpretează rezultatul:** - **Toate PASS** → calibrarea ține, continui live fără modificări. @@ -237,7 +266,7 @@ Scenariu: ai rulat o sesiune live, ai văzut pe chart o culoare pe care bot-ul n 1. **În timpul sesiunii** — două opțiuni pentru a captura dovezi: - `/ss` în Telegram → un screenshot instant în `logs/fires/` - `/3` în Telegram → screenshots automate la 3 min în `logs/fires/` (util dacă nu ești la monitor continuu); oprești cu `/stop` -2. **După sesiune**, adaugi intrările relevante în `samples/calibration_labels.json` cu culoarea corectă și rulezi `atm validate-calibration` (Flow A de mai sus). +2. **După sesiune**, adaugi intrările relevante în `calibration/calibration_labels.json` cu culoarea corectă și rulezi `atm validate-calibration` (Flow A de mai sus). 3. Dacă apar FAIL-uri, aplici fix tactic în TOML sau recalibrezi complet. ### Exemplu real — incidentul 2026-04-17 @@ -257,9 +286,30 @@ yellow, turquoise, gray, background — lăsate neschimbate (nu am dovezi live c După fix: `atm validate-calibration` → 3/3 PASS, confidence 1.00 pe ambele roșuri. +### Exemplu real — incidentul 2026-04-20/21 (culori saturate) + +User a observat screenshot-uri poll periodice după ce un trigger BUY/SELL părea deja declanșat. Dovadă: `logs/fires/20260420_214908_poll.png` avea pixel verde pur `(0, 255, 0)` (trigger light_green) dar detector-ul îl clasifica `UNKNOWN`. Investigație: 27/114 PNG-uri din corpus ieșeau UNKNOWN pentru că paleta din `2026-04-18-1220.toml` avea centrele celor patru culori luminoase **prea întunecate** — distanța până la pixelul real depășea toleranța de 60. + +Fix aplicat în `2026-04-21-recalib.toml`: + +| Culoare | Centru vechi | Pixel live observat | Centru nou | d(vechi) | +|---|---|---|---|---| +| turquoise | (0, 153, 153) | (0, 253, 253) | **(0, 253, 253)** | 141 | +| yellow | (153, 153, 0) | (253, 253, 0) | **(253, 253, 0)** | 141 | +| light_green | (0, 171, 0) | (0, 255, 0) | **(0, 255, 0)** | 84 | +| light_red | (171, 0, 0) | (255, 0, 0) | **(255, 0, 0)** | 84 | + +dark_green, dark_red, gray, background — neschimbate (nu ieșeau UNKNOWN). + +Consecință invizibilă pentru user: fără trigger acceptat de FSM, starea rămânea blocată în `PRIMED_*` → `ScreenshotScheduler` nu primea `reason=fire/cooled/phase_skip/opposite_rearm` → polling continuu la 3 min ore în șir. + +După fix: corpus 27→0 UNKNOWN pe culorile luminoase (restul 9 sunt pixeli off-ROI crem, nu dot-uri). `atm validate-calibration calibration/calibration_labels.json` → 16/16 PASS. + +**Lesson learned:** la recalibrare cu wizard-ul Tk, dacă folosești o imagine screenshot (nu captură live), pipeline-ul de saturation-snap poate rata pixelul cel mai saturat și să ia un dot ușor desaturat. Regulă: după wizard, verifică imediat cu `atm validate-calibration` pe un corpus cu toate 7 culorile. Dacă vreo culoare iese UNKNOWN, corectează manual în TOML cu pixelul real observat. + **Rollback** dacă ceva merge prost: ```bash -echo "2026-04-16-0703.toml" > configs/current.txt +echo "2026-04-18-1220.toml" > configs/current.txt # sau 2026-04-16-0703.toml ``` --- @@ -385,6 +435,7 @@ atm report --week 2026-16 # win rate săptămânal + PnL în R + slippage | Discord OK, Telegram tace (sau invers) | `logs/dead_letter.jsonl` are alertele eșuate + eroarea | Fixezi credențiale în TOML, restart. | | Heartbeat arată `telegram: failed > 0` | Telegram a răspuns `ok:false` | Check `logs/dead_letter.jsonl` pentru `error_str` / `description`. Comun: bot-ul nu-a fost pornit de user în Telegram, sau `chat_id` greșit (channel vs group vs DM). | | Bot-ul "moare" după N ore, heartbeat merge dar comenzile nu răspund | Era bug-ul de hang din 2026-04-17 — drain coadă de comenzi sărit când Canary paused | Fixat în `c5024ce`. Update git pull. | +| Poll-uri periodice continuă deși un trigger BUY/SELL s-a afișat pe chart | Trigger-ul a ieșit UNKNOWN (pixel saturat, paletă întunecată) → FSM blocat în PRIMED → scheduler nu primește `fire/cooled/phase_skip` | Rulează `atm validate-calibration calibration/calibration_labels.json`. Dacă vreo culoare luminoasă iese UNKNOWN, actualizezi centrul RGB în TOML la pixelul real observat. Vezi incidentul 2026-04-20/21. | --- diff --git a/calibration/README.md b/calibration/README.md new file mode 100644 index 0000000..e29423d --- /dev/null +++ b/calibration/README.md @@ -0,0 +1,49 @@ +# calibration/ — frame corpus for validation & regression + +Two artifacts, one frame pool: + +- `calibration_labels.json` — per-frame color labels. Used by + `atm validate-calibration` to check the current palette classifies known-good + dots correctly before a live session. +- `scenarios.json` — ordered frame sequences per FSM scenario (full cycle, + phase skip, catchup, post-fire suppression). Consumed by + `tests/test_scenarios_regression.py` which runs each sequence through the + full `Detector → _handle_tick` pipeline and asserts color, FSM reason/state, + emitted alerts, and scheduler on/off. + +Frames live in `calibration/frames/` and are self-contained: purging +`logs/fires/` or `samples/` does not break either artifact. + +## calibration_labels.json schema + +## Schema + +A JSON array of entries. Each entry: + +| Field | Type | Required | Description | +|------------|---------|----------|----------------------------------------------------------------| +| `path` | string | yes | Path to a PNG frame (relative to CWD or absolute). | +| `expected` | string | yes | Expected color name: one of `turquoise`, `yellow`, `dark_green`, `dark_red`, `light_green`, `light_red`, `gray`. | +| `note` | string | no | Freeform annotation; shown in SUGGESTIONS output. | + +## Usage + +```bash +atm validate-calibration calibration/calibration_labels.json +``` + +Exit codes: +- `0` — every sample PASS +- `1` — one or more FAIL +- `2` — label file missing or malformed JSON + +## Adding new samples + +1. Find a screenshot in `logs/fires/` whose dot color you can verify by eye. +2. **Copy it into `calibration/frames/`** — this directory is self-contained so + `logs/fires/` and `samples/` can be emptied without breaking validation. +3. Append an entry with `path` (pointing to `calibration/frames/...`), + `expected`, and an optional `note`. +4. Re-run validation. If it FAILs, the SUGGESTIONS section will tell you the + RGB distance between the observed pixel and the expected color's center — + use that as input for `atm calibrate`. diff --git a/calibration/calibration_labels.json b/calibration/calibration_labels.json new file mode 100644 index 0000000..e95250b --- /dev/null +++ b/calibration/calibration_labels.json @@ -0,0 +1,82 @@ +[ + { + "path": "calibration/frames/20260420_200002_turquoise.png", + "expected": "turquoise", + "note": "BUY arm visible in poll; rgb=(0,253,253)" + }, + { + "path": "calibration/frames/20260421_072757_turquoise.png", + "expected": "turquoise", + "note": "BUY arm via manual /ss; rgb=(0,253,253)" + }, + { + "path": "calibration/frames/20260420_171501_yellow.png", + "expected": "yellow", + "note": "SELL arm event; rgb=(253,253,0)" + }, + { + "path": "calibration/frames/20260420_194505_yellow.png", + "expected": "yellow", + "note": "SELL arm event; rgb=(253,253,0)" + }, + { + "path": "calibration/frames/20260420_194721_yellow.png", + "expected": "yellow", + "note": "SELL arm visible in manual /ss; rgb=(253,253,0)" + }, + { + "path": "calibration/frames/20260418_124645_dark_green.png", + "expected": "dark_green", + "note": "BUY prime catchup; rgb=(0,128,0)" + }, + { + "path": "calibration/frames/20260420_185102_dark_green.png", + "expected": "dark_green", + "note": "BUY prime; rgb=(0,128,0)" + }, + { + "path": "calibration/frames/20260420_213706_dark_green.png", + "expected": "dark_green", + "note": "BUY prime catchup; rgb=(0,128,0)" + }, + { + "path": "calibration/frames/20260420_172104_dark_red.png", + "expected": "dark_red", + "note": "SELL prime; rgb=(128,0,0)" + }, + { + "path": "calibration/frames/20260420_195701_dark_red.png", + "expected": "dark_red", + "note": "SELL prime; rgb=(128,0,0)" + }, + { + "path": "calibration/frames/20260420_210905_dark_red.png", + "expected": "dark_red", + "note": "SELL prime; rgb=(128,0,0)" + }, + { + "path": "calibration/frames/20260420_163303_light_green.png", + "expected": "light_green", + "note": "BUY trigger (FIRE); rgb=(0,255,0)" + }, + { + "path": "calibration/frames/20260420_214908_light_green.png", + "expected": "light_green", + "note": "regression 2026-04-20: BUY trigger visible in poll (original complaint); rgb=(0,255,0) was UNKNOWN under pre-2026-04-21 calibration" + }, + { + "path": "calibration/frames/20260420_173004_light_red.png", + "expected": "light_red", + "note": "SELL trigger (FIRE); rgb=(255,0,0)" + }, + { + "path": "calibration/frames/20260420_175005_gray.png", + "expected": "gray", + "note": "idle gray dot via manual /ss; rgb=(128,128,128)" + }, + { + "path": "calibration/frames/20260420_185702_gray.png", + "expected": "gray", + "note": "idle gray dot in poll; rgb=(128,128,128)" + } +] diff --git a/calibration/frames/20260418_124645_dark_green.png b/calibration/frames/20260418_124645_dark_green.png new file mode 100644 index 0000000..3442321 Binary files /dev/null and b/calibration/frames/20260418_124645_dark_green.png differ diff --git a/calibration/frames/20260420_163303_light_green.png b/calibration/frames/20260420_163303_light_green.png new file mode 100644 index 0000000..5d50803 Binary files /dev/null and b/calibration/frames/20260420_163303_light_green.png differ diff --git a/calibration/frames/20260420_171501_yellow.png b/calibration/frames/20260420_171501_yellow.png new file mode 100644 index 0000000..4b7e179 Binary files /dev/null and b/calibration/frames/20260420_171501_yellow.png differ diff --git a/calibration/frames/20260420_172104_dark_red.png b/calibration/frames/20260420_172104_dark_red.png new file mode 100644 index 0000000..796a115 Binary files /dev/null and b/calibration/frames/20260420_172104_dark_red.png differ diff --git a/calibration/frames/20260420_173004_light_red.png b/calibration/frames/20260420_173004_light_red.png new file mode 100644 index 0000000..b26a1cd Binary files /dev/null and b/calibration/frames/20260420_173004_light_red.png differ diff --git a/calibration/frames/20260420_175005_gray.png b/calibration/frames/20260420_175005_gray.png new file mode 100644 index 0000000..27ea597 Binary files /dev/null and b/calibration/frames/20260420_175005_gray.png differ diff --git a/calibration/frames/20260420_185102_dark_green.png b/calibration/frames/20260420_185102_dark_green.png new file mode 100644 index 0000000..299e51b Binary files /dev/null and b/calibration/frames/20260420_185102_dark_green.png differ diff --git a/calibration/frames/20260420_185702_gray.png b/calibration/frames/20260420_185702_gray.png new file mode 100644 index 0000000..23d5140 Binary files /dev/null and b/calibration/frames/20260420_185702_gray.png differ diff --git a/calibration/frames/20260420_194505_yellow.png b/calibration/frames/20260420_194505_yellow.png new file mode 100644 index 0000000..de13142 Binary files /dev/null and b/calibration/frames/20260420_194505_yellow.png differ diff --git a/calibration/frames/20260420_194721_yellow.png b/calibration/frames/20260420_194721_yellow.png new file mode 100644 index 0000000..8e23a3b Binary files /dev/null and b/calibration/frames/20260420_194721_yellow.png differ diff --git a/calibration/frames/20260420_195701_dark_red.png b/calibration/frames/20260420_195701_dark_red.png new file mode 100644 index 0000000..ebf5e36 Binary files /dev/null and b/calibration/frames/20260420_195701_dark_red.png differ diff --git a/calibration/frames/20260420_200002_turquoise.png b/calibration/frames/20260420_200002_turquoise.png new file mode 100644 index 0000000..a468e0d Binary files /dev/null and b/calibration/frames/20260420_200002_turquoise.png differ diff --git a/calibration/frames/20260420_210905_dark_red.png b/calibration/frames/20260420_210905_dark_red.png new file mode 100644 index 0000000..29dc5ee Binary files /dev/null and b/calibration/frames/20260420_210905_dark_red.png differ diff --git a/calibration/frames/20260420_213706_dark_green.png b/calibration/frames/20260420_213706_dark_green.png new file mode 100644 index 0000000..8171c47 Binary files /dev/null and b/calibration/frames/20260420_213706_dark_green.png differ diff --git a/calibration/frames/20260420_214908_light_green.png b/calibration/frames/20260420_214908_light_green.png new file mode 100644 index 0000000..70c05c3 Binary files /dev/null and b/calibration/frames/20260420_214908_light_green.png differ diff --git a/calibration/frames/20260421_072757_turquoise.png b/calibration/frames/20260421_072757_turquoise.png new file mode 100644 index 0000000..f726bb7 Binary files /dev/null and b/calibration/frames/20260421_072757_turquoise.png differ diff --git a/calibration/scenarios.json b/calibration/scenarios.json new file mode 100644 index 0000000..5882d96 --- /dev/null +++ b/calibration/scenarios.json @@ -0,0 +1,248 @@ +[ + { + "id": "buy_full_cycle", + "description": "IDLE → ARMED_BUY → PRIMED_BUY → IDLE(fire). Turquoise arm, dark_green prime, light_green trigger.", + "steps": [ + { + "frame": "calibration/frames/20260420_200002_turquoise.png", + "expected_color": "turquoise", + "expected_reason": "arm", + "expected_state": "ARMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_185102_dark_green.png", + "expected_color": "dark_green", + "expected_reason": "prime", + "expected_state": "PRIMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["prime"], + "expected_scheduler_running": true + }, + { + "frame": "calibration/frames/20260420_163303_light_green.png", + "expected_color": "light_green", + "expected_reason": "fire", + "expected_state": "IDLE", + "expected_trigger": "BUY", + "expected_new_alerts": [], + "expected_scheduler_running": false + } + ] + }, + { + "id": "sell_full_cycle", + "description": "Mirror of buy_full_cycle: yellow arm, dark_red prime, light_red trigger.", + "steps": [ + { + "frame": "calibration/frames/20260420_171501_yellow.png", + "expected_color": "yellow", + "expected_reason": "arm", + "expected_state": "ARMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_172104_dark_red.png", + "expected_color": "dark_red", + "expected_reason": "prime", + "expected_state": "PRIMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["prime"], + "expected_scheduler_running": true + }, + { + "frame": "calibration/frames/20260420_173004_light_red.png", + "expected_color": "light_red", + "expected_reason": "fire", + "expected_state": "IDLE", + "expected_trigger": "SELL", + "expected_new_alerts": [], + "expected_scheduler_running": false + } + ] + }, + { + "id": "buy_phase_skip", + "description": "ARMED_BUY → light_green direct (dark_green missed). Backstop `fire_on_phase_skip` emits phase_skip_fire alert.", + "steps": [ + { + "frame": "calibration/frames/20260421_072757_turquoise.png", + "expected_color": "turquoise", + "expected_reason": "arm", + "expected_state": "ARMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_214908_light_green.png", + "expected_color": "light_green", + "expected_reason": "phase_skip", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": ["phase_skip_fire"], + "expected_scheduler_running": false + } + ] + }, + { + "id": "sell_phase_skip", + "description": "Mirror: ARMED_SELL → light_red direct (dark_red missed).", + "steps": [ + { + "frame": "calibration/frames/20260420_194505_yellow.png", + "expected_color": "yellow", + "expected_reason": "arm", + "expected_state": "ARMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_173004_light_red.png", + "expected_color": "light_red", + "expected_reason": "phase_skip", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": ["phase_skip_fire"], + "expected_scheduler_running": false + } + ] + }, + { + "id": "buy_catchup", + "description": "Start with dark_green in IDLE (no arm observed). Catchup synth-feeds turquoise → emits arm+prime alerts. FSM ends in PRIMED_BUY.", + "steps": [ + { + "frame": "calibration/frames/20260418_124645_dark_green.png", + "expected_color": "dark_green", + "expected_reason": "prime", + "expected_state": "PRIMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["arm", "prime"], + "expected_scheduler_running": true + } + ] + }, + { + "id": "sell_catchup", + "description": "Mirror: start with dark_red in IDLE. Catchup synth-yellow → arm+prime alerts.", + "steps": [ + { + "frame": "calibration/frames/20260420_195701_dark_red.png", + "expected_color": "dark_red", + "expected_reason": "prime", + "expected_state": "PRIMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["arm", "prime"], + "expected_scheduler_running": true + } + ] + }, + { + "id": "buy_post_fire_suppression", + "description": "After BUY fire, residual dark_green in IDLE must NOT re-prime. User rule: new arming (turquoise) required before priming alerts become valid again.", + "steps": [ + { + "frame": "calibration/frames/20260420_200002_turquoise.png", + "expected_color": "turquoise", + "expected_reason": "arm", + "expected_state": "ARMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_185102_dark_green.png", + "expected_color": "dark_green", + "expected_reason": "prime", + "expected_state": "PRIMED_BUY", + "expected_trigger": null, + "expected_new_alerts": ["prime"], + "expected_scheduler_running": true + }, + { + "frame": "calibration/frames/20260420_163303_light_green.png", + "expected_color": "light_green", + "expected_reason": "fire", + "expected_state": "IDLE", + "expected_trigger": "BUY", + "expected_new_alerts": [], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_213706_dark_green.png", + "expected_color": "dark_green", + "expected_reason": "noise", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": [], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260418_124645_dark_green.png", + "expected_color": "dark_green", + "expected_reason": "noise", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": [], + "expected_scheduler_running": false + } + ] + }, + { + "id": "sell_post_fire_suppression", + "description": "Mirror: after SELL fire, residual dark_red must NOT re-prime until new yellow arming.", + "steps": [ + { + "frame": "calibration/frames/20260420_171501_yellow.png", + "expected_color": "yellow", + "expected_reason": "arm", + "expected_state": "ARMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["arm"], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_172104_dark_red.png", + "expected_color": "dark_red", + "expected_reason": "prime", + "expected_state": "PRIMED_SELL", + "expected_trigger": null, + "expected_new_alerts": ["prime"], + "expected_scheduler_running": true + }, + { + "frame": "calibration/frames/20260420_173004_light_red.png", + "expected_color": "light_red", + "expected_reason": "fire", + "expected_state": "IDLE", + "expected_trigger": "SELL", + "expected_new_alerts": [], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_195701_dark_red.png", + "expected_color": "dark_red", + "expected_reason": "noise", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": [], + "expected_scheduler_running": false + }, + { + "frame": "calibration/frames/20260420_210905_dark_red.png", + "expected_color": "dark_red", + "expected_reason": "noise", + "expected_state": "IDLE", + "expected_trigger": null, + "expected_new_alerts": [], + "expected_scheduler_running": false + } + ] + } +] diff --git a/samples/calibration_labels.README.md b/samples/calibration_labels.README.md deleted file mode 100644 index 216b9d0..0000000 --- a/samples/calibration_labels.README.md +++ /dev/null @@ -1,33 +0,0 @@ -# calibration_labels.json — schema - -Used by `atm validate-calibration` to check that the current color calibration -classifies known-good screenshots correctly before a live session. - -## Schema - -A JSON array of entries. Each entry: - -| Field | Type | Required | Description | -|------------|---------|----------|----------------------------------------------------------------| -| `path` | string | yes | Path to a PNG frame (relative to CWD or absolute). | -| `expected` | string | yes | Expected color name: one of `turquoise`, `yellow`, `dark_green`, `dark_red`, `light_green`, `light_red`, `gray`. | -| `note` | string | no | Freeform annotation; shown in SUGGESTIONS output. | - -## Usage - -```bash -atm validate-calibration samples/calibration_labels.json -``` - -Exit codes: -- `0` — every sample PASS -- `1` — one or more FAIL -- `2` — label file missing or malformed JSON - -## Adding new samples - -1. Find a screenshot in `logs/fires/` whose dot color you can verify by eye. -2. Append an entry with `path`, `expected`, and an optional `note`. -3. Re-run validation. If it FAILs, the SUGGESTIONS section will tell you the - RGB distance between the observed pixel and the expected color's center — - use that as input for `atm calibrate`. diff --git a/samples/calibration_labels.json b/samples/calibration_labels.json deleted file mode 100644 index f770610..0000000 --- a/samples/calibration_labels.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - { - "path": "logs/fires/20260417_201500_arm_sell.png", - "expected": "yellow", - "note": "first arm of SELL cycle 2026-04-17" - }, - { - "path": "logs/fires/20260417_205302_ss.png", - "expected": "dark_red", - "note": "user confirmed via screenshot (missed live alert)" - }, - { - "path": "logs/fires/20260417_210441_ss.png", - "expected": "light_red", - "note": "fire phase (missed live alert)" - }, - { - "path": "logs/fires/20260420_210649_ss.png", - "expected": "dark_red", - "note": "fused-blob regression: rightmost dark_red missed because erosion failed to separate adjacent fused dots; centroid landed on interior gray dot" - }, - { - "path": "logs/fires/20260420_200603_poll.png", - "expected": "dark_green", - "note": "fused-blob regression: rightmost dark_green missed for the same reason as the 21:06:49 dark_red sample" - } -] diff --git a/tests/test_scenarios_regression.py b/tests/test_scenarios_regression.py new file mode 100644 index 0000000..8f1e917 --- /dev/null +++ b/tests/test_scenarios_regression.py @@ -0,0 +1,111 @@ +"""Image-backed regression scenarios. + +Each scenario in `calibration/scenarios.json` is a sequence of real PNG frames +fed through the full Detector → _handle_tick pipeline. Asserts per step: + - detector classifies the exact expected color (accepted=True) + - FSM transition reason/state + trigger match + - notifier receives exactly the expected new alert kinds + - scheduler-running flag (mirroring _handle_fsm_result) matches + +Frames live in calibration/frames/ (self-contained, survives logs/fires/ purges). +""" +from __future__ import annotations + +import json +from pathlib import Path + +import cv2 +import pytest + +from atm.config import Config +from atm.detector import Detector +from atm.main import _handle_tick +from atm.state_machine import StateMachine + +from tests.test_handle_tick import FakeNotifier, FakeAudit + + +_SCENARIOS_PATH = Path("calibration/scenarios.json") +_CONFIGS_DIR = Path("configs") + +# Reasons that stop the screenshot scheduler (mirrors main.py:_handle_fsm_result). +_SCHEDULER_STOP_REASONS = {"fire", "cooled", "phase_skip", "opposite_rearm"} + + +def _load_scenarios() -> list[dict]: + return json.loads(_SCENARIOS_PATH.read_text(encoding="utf-8")) + + +@pytest.fixture(scope="module") +def cfg() -> Config: + return Config.load_current(_CONFIGS_DIR) + + +@pytest.mark.parametrize( + "scenario", _load_scenarios(), ids=lambda s: s["id"] +) +def test_scenario(scenario: dict, cfg: Config) -> None: + fsm = StateMachine(lockout_s=cfg.lockout_s) + notif = FakeNotifier() + audit = FakeAudit() + detector = Detector(cfg=cfg, capture=lambda: None) + scheduler_running = False + first_accepted = True + + for i, step in enumerate(scenario["steps"]): + frame_path = Path(step["frame"]) + assert frame_path.exists(), f"{scenario['id']}[{i}]: missing frame {frame_path}" + frame = cv2.imread(str(frame_path)) + assert frame is not None, f"{scenario['id']}[{i}]: cv2.imread failed" + + res = detector.step(ts=float(i), frame=frame) + assert res.accepted, ( + f"{scenario['id']}[{i}]: detector rejected {frame_path.name} " + f"(match={res.match.name if res.match else None}, " + f"d={res.match.distance if res.match else None}, rgb={res.rgb})" + ) + assert res.color == step["expected_color"], ( + f"{scenario['id']}[{i}]: color mismatch — expected " + f"{step['expected_color']}, got {res.color}" + ) + + alerts_before = len(notif.alerts) + tr = _handle_tick( + fsm, res.color, float(i), notif, audit, + first_accepted=first_accepted, cfg=cfg, + ) + first_accepted = False + + assert tr is not None, f"{scenario['id']}[{i}]: _handle_tick returned None" + assert tr.reason == step["expected_reason"], ( + f"{scenario['id']}[{i}]: reason mismatch — expected " + f"{step['expected_reason']}, got {tr.reason}" + ) + assert tr.next.value == step["expected_state"], ( + f"{scenario['id']}[{i}]: state mismatch — expected " + f"{step['expected_state']}, got {tr.next.value}" + ) + assert tr.trigger == step["expected_trigger"], ( + f"{scenario['id']}[{i}]: trigger mismatch — expected " + f"{step['expected_trigger']}, got {tr.trigger}" + ) + + new_alerts = [a.kind for a in notif.alerts[alerts_before:]] + assert new_alerts == step["expected_new_alerts"], ( + f"{scenario['id']}[{i}]: alert mismatch — expected " + f"{step['expected_new_alerts']}, got {new_alerts}" + ) + + # Scheduler lifecycle (mirrors _handle_fsm_result main.py:953-957) + if tr.reason == "prime" and not scheduler_running: + scheduler_running = True + elif tr.reason in _SCHEDULER_STOP_REASONS and scheduler_running: + scheduler_running = False + # Also stops on trigger fire (main.py:960-964) + if tr.trigger and not tr.locked and scheduler_running: + scheduler_running = False + + assert scheduler_running == step["expected_scheduler_running"], ( + f"{scenario['id']}[{i}]: scheduler_running mismatch — expected " + f"{step['expected_scheduler_running']}, got {scheduler_running}" + )