feat(T6/T7): supervizare worker (healthcheck+autoheal) + backup online + cheie partajata
T6 — worker supravegheat:
- app/worker/healthcheck.py: probe pe heartbeat-ul din DB (beat invechit -> exit 1).
Prinde worker-ul agatat (proces viu, beat inghetat) pe care restart:always nu-l
vede. Cablat ca healthcheck pe serviciul worker in compose.
- sidecar autoheal: restarteaza efectiv containerul unhealthy (compose simplu doar
marcheaza, nu restarteaza la unhealthy).
T7 — deploy:
- tools/backup.py: backup ONLINE via Connection.backup (WAL nu se copiaza sigur cu
cp); --keep N roteste snapshot-urile.
- .env.example documenteaza env-urile; volum persistent numit deja in compose.
Fix critic (split api/worker in 2 containere): AUTOPASS_CREDS_KEY trebuie PARTAJATA
api<->worker, altfel worker nu decripteaza creds-urile criptate de API -> submission
blocate. Acum impusa in compose (${...:?} -> fail explicit daca lipseste).
.gitignore: exceptie !.env.example.
5 teste noi (tests/test_deploy.py). 100 pass total.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,10 @@
|
||||
# Gateway RAR AUTOPASS — un container API + un container worker, acelasi image,
|
||||
# acelasi volum SQLite persistent (plan.md sect. 4 + 9). restart: always pe ambele.
|
||||
#
|
||||
# CRITIC: AUTOPASS_CREDS_KEY trebuie PARTAJATA intre api si worker — API cripteaza
|
||||
# creds-urile RAR, worker-ul le decripteaza. Chei diferite -> worker nu poate
|
||||
# decripta -> submission-uri blocate "creds indisponibile". Seteaz-o in .env
|
||||
# (vezi .env.example): compose o citeste automat. Lipsa -> compose pica explicit.
|
||||
services:
|
||||
api:
|
||||
build: .
|
||||
@@ -11,6 +16,8 @@ services:
|
||||
environment:
|
||||
AUTOPASS_DB_PATH: /data/autopass.db
|
||||
AUTOPASS_RAR_ENV: test
|
||||
AUTOPASS_CREDS_KEY: ${AUTOPASS_CREDS_KEY:?seteaza AUTOPASS_CREDS_KEY in .env (vezi .env.example)}
|
||||
AUTOPASS_REQUIRE_API_KEY: ${AUTOPASS_REQUIRE_API_KEY:-false}
|
||||
restart: always
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/healthz').status==200 else 1)"]
|
||||
@@ -26,11 +33,35 @@ services:
|
||||
environment:
|
||||
AUTOPASS_DB_PATH: /data/autopass.db
|
||||
AUTOPASS_RAR_ENV: test
|
||||
AUTOPASS_CREDS_KEY: ${AUTOPASS_CREDS_KEY:?seteaza AUTOPASS_CREDS_KEY in .env (vezi .env.example)}
|
||||
# Send dezactivat by default; activeaza pentru proba end-to-end.
|
||||
AUTOPASS_WORKER_SEND_ENABLED: "false"
|
||||
restart: always
|
||||
depends_on:
|
||||
- api
|
||||
# T6: probe pe heartbeat-ul din DB — prinde worker-ul AGATAT (proces viu, beat
|
||||
# invechit), pe care restart:always singur nu-l vede. start_period acopera bootul.
|
||||
# ATENTIE: in compose simplu, "unhealthy" doar marcheaza containerul — NU il
|
||||
# restarteaza (restart:always reactioneaza la EXIT). Sidecar-ul `autoheal` de
|
||||
# mai jos vede label-ul si chiar restarteaza worker-ul cand pica probe-ul.
|
||||
labels:
|
||||
autoheal: "true"
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-m", "app.worker.healthcheck"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
# Restarteaza orice container marcat unhealthy cu label autoheal=true (worker-ul
|
||||
# agatat). Alternativa: Docker Swarm (restart on unhealthy nativ).
|
||||
autoheal:
|
||||
image: willfarrell/autoheal:latest
|
||||
restart: always
|
||||
environment:
|
||||
AUTOHEAL_CONTAINER_LABEL: autoheal
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
|
||||
volumes:
|
||||
autopass-data:
|
||||
|
||||
Reference in New Issue
Block a user