echo-core/tests/test_ralph_usage.py

"""Tests for tools/ralph_usage.py — rate limit budget tracking.

Acoperă:
- extract_usage_entry: shape corect, missing fields, JSON corupt → None
- parse_usage_jsonl: skip linii corupte, file lipsă → []
- aggregate_by_day / aggregate_by_project: sume corecte, deduplicare
- filter_by_days: window inclusiv vs exclusiv
- summarize: today_cost/today_runs corecte
- append_entry: atomic write, JSONL roundtrip
- CLI append: idempotent la JSON corupt (no-op + exit 0)
"""
from __future__ import annotations

import json
import sys
from pathlib import Path

import pytest

PROJECT_ROOT = Path(__file__).resolve().parents[1]
TOOLS = PROJECT_ROOT / "tools"
if str(TOOLS) not in sys.path:
    sys.path.insert(0, str(TOOLS))

import ralph_usage  # noqa: E402


# ── Sample claude -p --output-format json envelopes ────────────────


def _claude_envelope(
    *,
    cost: float = 0.55,
    input_tokens: int = 1234,
    output_tokens: int = 567,
    cache_read: int = 890,
    duration_ms: int = 49000,
    model: str = "claude-opus-4-7-20260101",
) -> dict:
    return {
        "type": "result",
        "subtype": "completed",
        "session_id": "abc123",
        "result": "Story implementat",
        "is_error": False,
        "total_cost_usd": cost,
        "duration_ms": duration_ms,
        "num_turns": 5,
        "usage": {
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "cache_creation_input_tokens": 0,
            "cache_read_input_tokens": cache_read,
        },
        "model": model,
    }


# ── extract_usage_entry ────────────────────────────────────────────


class TestExtractEntry:
    def test_full_envelope_extracts_all_fields(self):
        env = _claude_envelope()
        entry = ralph_usage.extract_usage_entry(
            env, slug="proj-a", story_id="US-001", iter_n=3,
            ts="2026-04-26T12:00:00+00:00",
        )
        assert entry == {
            "ts": "2026-04-26T12:00:00+00:00",
            "slug": "proj-a",
            "story_id": "US-001",
            "iter": 3,
            "total_cost_usd": 0.55,
            "input_tokens": 1234,
            "output_tokens": 567,
            "cache_read": 890,
            "model": "claude-opus-4-7-20260101",
            "duration_ms": 49000,
        }

    def test_accepts_raw_string(self):
        env = _claude_envelope()
        entry = ralph_usage.extract_usage_entry(
            json.dumps(env), slug="x", story_id=None, iter_n=None,
            ts="2026-04-26T00:00:00+00:00",
        )
        assert entry is not None
        assert entry["story_id"] is None
        assert entry["iter"] is None
        assert entry["total_cost_usd"] == 0.55

    def test_corrupt_json_returns_none(self):
        assert ralph_usage.extract_usage_entry("{not json", slug="x") is None
        assert ralph_usage.extract_usage_entry("", slug="x") is None
        assert ralph_usage.extract_usage_entry("null", slug="x") is None

    def test_missing_usage_field_zeros(self):
        env = {"total_cost_usd": 0.1, "duration_ms": 1000}
        entry = ralph_usage.extract_usage_entry(env, slug="x")
        assert entry["input_tokens"] == 0
        assert entry["output_tokens"] == 0
        assert entry["cache_read"] == 0
        assert entry["model"] is None

    def test_missing_cost_defaults_zero(self):
        env = {"usage": {"input_tokens": 100}}
        entry = ralph_usage.extract_usage_entry(env, slug="x")
        assert entry["total_cost_usd"] == 0.0
        assert entry["input_tokens"] == 100

    def test_non_dict_returns_none(self):
        assert ralph_usage.extract_usage_entry([], slug="x") is None
        assert ralph_usage.extract_usage_entry(123, slug="x") is None

    def test_alternative_cache_field_name(self):
        # Defensive: dacă viitor schema folosește `cache_read`
        env = {"usage": {"cache_read": 42}, "total_cost_usd": 0.1}
        entry = ralph_usage.extract_usage_entry(env, slug="x")
        assert entry["cache_read"] == 42


# ── parse_usage_jsonl ──────────────────────────────────────────────


class TestParseJsonl:
    def test_file_missing_returns_empty(self, tmp_path):
        assert ralph_usage.parse_usage_jsonl(tmp_path / "ghost.jsonl") == []

    def test_skips_corrupt_lines(self, tmp_path):
        p = tmp_path / "u.jsonl"
        p.write_text(
            '{"slug": "a", "ts": "2026-04-26T00:00:00+00:00", "total_cost_usd": 0.1}\n'
            "{not json}\n"
            '{"slug": "b", "ts": "2026-04-26T01:00:00+00:00", "total_cost_usd": 0.2}\n'
            "\n"
            "[]\n",  # not a dict
            encoding="utf-8",
        )
        entries = ralph_usage.parse_usage_jsonl(p)
        slugs = [e["slug"] for e in entries]
        assert slugs == ["a", "b"]

    def test_empty_file_returns_empty(self, tmp_path):
        p = tmp_path / "u.jsonl"
        p.write_text("", encoding="utf-8")
        assert ralph_usage.parse_usage_jsonl(p) == []


# ── aggregate_by_day / aggregate_by_project ───────────────────────


class TestAggregate:
    @pytest.fixture
    def entries(self):
        return [
            {"slug": "proj-a", "ts": "2026-04-26T10:00:00+00:00",
             "total_cost_usd": 0.5, "input_tokens": 100, "output_tokens": 50, "cache_read": 200},
            {"slug": "proj-a", "ts": "2026-04-26T11:00:00+00:00",
             "total_cost_usd": 0.3, "input_tokens": 80, "output_tokens": 30, "cache_read": 100},
            {"slug": "proj-b", "ts": "2026-04-25T22:00:00+00:00",
             "total_cost_usd": 1.2, "input_tokens": 500, "output_tokens": 200, "cache_read": 0},
        ]

    def test_aggregate_by_day(self, entries):
        result = ralph_usage.aggregate_by_day(entries)
        assert result["2026-04-26"]["cost_usd"] == 0.8
        assert result["2026-04-26"]["runs"] == 2
        assert result["2026-04-26"]["input_tokens"] == 180
        assert result["2026-04-26"]["output_tokens"] == 80
        assert result["2026-04-26"]["cache_read"] == 300
        assert result["2026-04-25"]["cost_usd"] == 1.2
        assert result["2026-04-25"]["runs"] == 1
        # Sortare descrescătoare în iteration order
        keys = list(result.keys())
        assert keys == ["2026-04-26", "2026-04-25"]

    def test_aggregate_by_project(self, entries):
        result = ralph_usage.aggregate_by_project(entries)
        assert result["proj-a"]["cost_usd"] == 0.8
        assert result["proj-a"]["runs"] == 2
        assert result["proj-b"]["cost_usd"] == 1.2
        assert result["proj-b"]["runs"] == 1

    def test_aggregate_handles_missing_slug(self):
        entries = [{"ts": "2026-04-26T00:00:00+00:00", "total_cost_usd": 0.1}]
        result = ralph_usage.aggregate_by_project(entries)
        assert "unknown" in result

    def test_aggregate_handles_missing_ts(self):
        entries = [{"slug": "x", "total_cost_usd": 0.1}]
        # Missing ts → skipped from by_day
        result = ralph_usage.aggregate_by_day(entries)
        assert result == {}

    def test_aggregate_empty_entries(self):
        assert ralph_usage.aggregate_by_day([]) == {}
        assert ralph_usage.aggregate_by_project([]) == {}


# ── filter_by_days ─────────────────────────────────────────────────


class TestFilterByDays:
    def test_window_inclusive_today(self):
        entries = [
            {"ts": "2026-04-26T00:00:00+00:00", "slug": "a"},
            {"ts": "2026-04-25T00:00:00+00:00", "slug": "a"},
            {"ts": "2026-04-20T00:00:00+00:00", "slug": "a"},
        ]
        kept = ralph_usage.filter_by_days(entries, 7, today="2026-04-26")
        # 7-day window inclusiv de la today: 2026-04-20 .. 2026-04-26
        slugs = [e["ts"][:10] for e in kept]
        assert slugs == ["2026-04-26", "2026-04-25", "2026-04-20"]

    def test_window_exclusive_older(self):
        entries = [
            {"ts": "2026-04-26T00:00:00+00:00"},
            {"ts": "2026-04-19T00:00:00+00:00"},  # 7 days before today → exclus
        ]
        kept = ralph_usage.filter_by_days(entries, 7, today="2026-04-26")
        assert len(kept) == 1
        assert kept[0]["ts"] == "2026-04-26T00:00:00+00:00"

    def test_zero_days_empty(self):
        entries = [{"ts": "2026-04-26T00:00:00+00:00"}]
        assert ralph_usage.filter_by_days(entries, 0, today="2026-04-26") == []

    def test_corrupt_ts_skipped(self):
        entries = [{"ts": "garbage"}]
        assert ralph_usage.filter_by_days(entries, 7, today="2026-04-26") == []


# ── summarize ──────────────────────────────────────────────────────


class TestSummarize:
    def test_summary_shape_and_today_split(self):
        entries = [
            {"ts": "2026-04-26T10:00:00+00:00", "slug": "a", "total_cost_usd": 0.5,
             "input_tokens": 100, "output_tokens": 50, "cache_read": 0},
            {"ts": "2026-04-26T11:00:00+00:00", "slug": "a", "total_cost_usd": 0.3,
             "input_tokens": 80, "output_tokens": 30, "cache_read": 0},
            {"ts": "2026-04-25T00:00:00+00:00", "slug": "b", "total_cost_usd": 1.0,
             "input_tokens": 0, "output_tokens": 0, "cache_read": 0},
        ]
        s = ralph_usage.summarize(entries, days=7, today="2026-04-26")
        assert s["today"] == "2026-04-26"
        assert s["today_cost"] == 0.8
        assert s["today_runs"] == 2
        assert s["window_days"] == 7
        assert s["window_runs"] == 3
        assert "by_project" in s
        assert "by_day" in s
        assert s["total_runs"] == 3
        assert s["by_project"]["a"]["runs"] == 2
        assert s["by_project"]["b"]["runs"] == 1

    def test_summary_empty_entries(self):
        s = ralph_usage.summarize([], days=7, today="2026-04-26")
        assert s["today_cost"] == 0
        assert s["today_runs"] == 0
        assert s["by_project"] == {}
        assert s["by_day"] == {}
        assert s["total_runs"] == 0


# ── append_entry ───────────────────────────────────────────────────


class TestAppendEntry:
    def test_append_creates_file_with_jsonl_format(self, tmp_path):
        usage = tmp_path / "usage.jsonl"
        entry = {"slug": "x", "ts": "2026-04-26T00:00:00+00:00", "total_cost_usd": 0.1}
        ralph_usage.append_entry(usage, entry)
        text = usage.read_text(encoding="utf-8")
        assert text.endswith("\n")
        loaded = json.loads(text.strip())
        assert loaded == entry

    def test_append_preserves_existing_entries(self, tmp_path):
        usage = tmp_path / "usage.jsonl"
        usage.write_text(
            '{"slug": "a", "ts": "2026-04-25T00:00:00+00:00", "total_cost_usd": 0.5}\n',
            encoding="utf-8",
        )
        ralph_usage.append_entry(usage, {"slug": "b", "ts": "2026-04-26T00:00:00+00:00",
                                          "total_cost_usd": 0.3})
        entries = ralph_usage.parse_usage_jsonl(usage)
        assert len(entries) == 2
        assert entries[0]["slug"] == "a"
        assert entries[1]["slug"] == "b"

    def test_append_handles_missing_trailing_newline(self, tmp_path):
        usage = tmp_path / "usage.jsonl"
        usage.write_text(
            '{"slug": "a", "ts": "2026-04-25T00:00:00+00:00"}',  # no trailing \n
            encoding="utf-8",
        )
        ralph_usage.append_entry(usage, {"slug": "b", "ts": "2026-04-26T00:00:00+00:00"})
        entries = ralph_usage.parse_usage_jsonl(usage)
        assert [e["slug"] for e in entries] == ["a", "b"]


# ── CLI: append subcommand ─────────────────────────────────────────


class TestCliAppend:
    def test_append_from_log_file(self, tmp_path):
        log = tmp_path / "iter.log"
        log.write_text(json.dumps(_claude_envelope(cost=0.42)), encoding="utf-8")
        usage = tmp_path / "usage.jsonl"

        rc = ralph_usage.main([
            "append", str(usage), str(log),
            "--slug", "proj-a",
            "--story-id", "US-001",
            "--iter", "3",
        ])
        assert rc == 0
        entries = ralph_usage.parse_usage_jsonl(usage)
        assert len(entries) == 1
        e = entries[0]
        assert e["slug"] == "proj-a"
        assert e["story_id"] == "US-001"
        assert e["iter"] == 3
        assert e["total_cost_usd"] == 0.42

    def test_append_corrupt_log_no_op(self, tmp_path):
        log = tmp_path / "iter.log"
        log.write_text("not json", encoding="utf-8")
        usage = tmp_path / "usage.jsonl"

        rc = ralph_usage.main([
            "append", str(usage), str(log),
            "--slug", "proj-a",
        ])
        # Idempotent: corrupt JSON → exit 0, no entry written
        assert rc == 0
        assert not usage.exists() or ralph_usage.parse_usage_jsonl(usage) == []

    def test_append_missing_log_no_op(self, tmp_path):
        usage = tmp_path / "usage.jsonl"
        rc = ralph_usage.main([
            "append", str(usage), str(tmp_path / "missing.log"),
            "--slug", "x",
        ])
        assert rc == 0


# ── CLI: summarize subcommand ──────────────────────────────────────


class TestCliSummarize:
    def test_summarize_outputs_json(self, tmp_path, capsys):
        usage = tmp_path / "usage.jsonl"
        usage.write_text(
            json.dumps({"slug": "x", "ts": "2026-04-26T00:00:00+00:00", "total_cost_usd": 0.5}) + "\n",
            encoding="utf-8",
        )
        rc = ralph_usage.main(["summarize", str(usage), "--days", "7"])
        assert rc == 0
        out = json.loads(capsys.readouterr().out)
        assert "today" in out
        assert "by_project" in out
        assert "by_day" in out