feat(migrations): add one-shot import_openclaw_jobs_2026-04 script

Audit-trail tool that translates OpenClaw's nested jobs.json schema (schedule.expr with optional tz, payload.message, agentId, state) into echo-core's flat schema. UTC -> Europe/Bucharest cron conversion with DST-aware offset; Bucharest-tagged source expressions pass through unchanged. Rewrites `cd ~/clawd` / `/home/moltbot/clawd/` -> echo-core without matching `clawd-archive` or `clawdbot` substrings. Built-in skip list covers night-execute and antfarm/feature-dev/*; YouTube: prefix is auto-skipped. --dry-run, --skip-disabled, --skip, --channel, --source, --target flags. Duplicate job names in target are skipped with a warning; existing entries are preserved.
2026-04-21 07:13:50 +00:00
parent 67d10c4c9a
commit 5f87545b66
1 changed files with 458 additions and 0 deletions
--- a/tools/migrations/import_openclaw_jobs_2026-04.py
+++ b/tools/migrations/import_openclaw_jobs_2026-04.py
@@ -0,0 +1,458 @@
 #!/usr/bin/env python3
 """
 One-shot migration: translate OpenClaw cron/jobs.json to echo-core schema.
 Dated: 2026-04
 Status: ONE-SHOT tool. Kept in git as an audit artifact for the consolidation.
 Restore path: if this needs to be re-run, the original OpenClaw file is at
    /home/moltbot/.openclaw/cron/jobs.json
    /home/moltbot/.openclaw/cron/jobs.json.bak
 and the pre-migration echo-core jobs.json is recoverable from git history
 (commit preceding `feat(cron): populate jobs.json with decomposed ...`).
 OpenClaw schema (nested):
    {
      "id": "<uuid>",
      "agentId": "echo",
      "name": "<name>",
      "enabled": bool,
      "schedule": {"kind": "cron", "expr": "0 6 * * *", "tz": "Europe/Bucharest"?},
      "sessionTarget": "isolated",
      "payload": {"kind": "agentTurn", "message": "<prompt>", "model": "sonnet"?},
      "state": {...},
      ...
    }
 Echo-core schema (flat, Claude job):
    {
      "name": "<name>",
      "cron": "<expr, Bucharest local>",
      "channel": "<channel name>",
      "model": "sonnet",
      "prompt": "<prompt, path-rewritten>",
      "allowed_tools": [],
      "enabled": bool,
      "last_run": null, "last_status": null, "next_run": null
    }
 Echo-core scheduler interprets cron expressions in Europe/Bucharest. OpenClaw
 used UTC by default (per its runtime) unless schedule.tz is set explicitly.
 This script converts UTC -> Europe/Bucharest for jobs without an explicit tz.
 Usage:
    python3 tools/migrations/import_openclaw_jobs_2026-04.py [flags]
 Flags:
    --dry-run                 Print what would change without writing.
    --skip-disabled           Skip jobs where enabled is false (default: import all).
    --skip name1,name2,...    Comma-separated list of job names to exclude.
    --channel <name>          Default channel for imported jobs (default: echo-work).
    --source <path>           Path to openclaw jobs.json.
    --target <path>           Path to echo-core jobs.json.
 The script is idempotent with respect to existing jobs: if a job with the same
 name is already present in the target, it is skipped with a warning, and the
 existing entry is preserved untouched.
 """
 from __future__ import annotations
 import argparse
 import json
 import re
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
 from zoneinfo import ZoneInfo
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 PROJECT_ROOT = Path(__file__).resolve().parents[2]
 DEFAULT_SOURCE = Path("/home/moltbot/.openclaw/cron/jobs.json")
 DEFAULT_TARGET = PROJECT_ROOT / "cron" / "jobs.json"
 DEFAULT_CHANNEL = "echo-work"
 BUCHAREST = ZoneInfo("Europe/Bucharest")
 UTC = ZoneInfo("UTC")
 # Jobs to skip by default. Anti-foot-gun list for known-dead/bad openclaw jobs.
 # Can be extended at invocation time via --skip.
 SKIP_BY_DEFAULT: set[str] = {
    "night-execute",                        # SSH to LXC, dead infra
    "antfarm/feature-dev/planner",
    "antfarm/feature-dev/setup",
    "antfarm/feature-dev/developer",
    "antfarm/feature-dev/verifier",
    "antfarm/feature-dev/tester",
    "antfarm/feature-dev/reviewer",
 }
 # YouTube:* one-off pinned prompts — always auto-skipped regardless of flags.
 YOUTUBE_PREFIX = "YouTube:"
 # Path rewrites applied to prompt bodies. Each pattern is a compiled regex;
 # the replacement is a literal string. Order matters — longer/more-specific
 # patterns first so the shorter ones don't eat them prematurely.
 #
 # We use lookahead/boundary tricks so that `clawd-archive`, `clawdbot`,
 # `clawd.old`, etc. are NOT matched. `clawd` must be immediately followed
 # by `/` (path boundary) or `$` / whitespace (end-of-token).
 PATH_REWRITES: list[tuple[re.Pattern[str], str]] = [
    # Absolute path: /home/moltbot/clawd/... -> /home/moltbot/echo-core/...
    (re.compile(r"/home/moltbot/clawd(?=/)"), "/home/moltbot/echo-core"),
    # Shell form: cd ~/clawd -> cd ~/echo-core  (allow trailing & or space)
    (re.compile(r"(?<![\w-])cd\s+~/clawd(?![\w/-])"), "cd ~/echo-core"),
    # Shell form: cd /home/moltbot/clawd  -> cd /home/moltbot/echo-core
    (re.compile(r"(?<![\w-])cd\s+/home/moltbot/clawd(?![\w/-])"),
     "cd /home/moltbot/echo-core"),
 ]
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _parse_cron_field(field: str) -> list[int] | None:
    """Return sorted list of ints the field expands to, or None if uncertain.
    Handles: "*", "N", "N,M", "N-M", "*/S", "A-B/S", "N,M-P/S".
    Returns None for anything we don't recognise (caller should warn and leave
    the job for manual review).
    """
    # Don't attempt to resolve `*` here — caller handles it per-field since
    # the valid range depends on which field it is.
    result: set[int] = set()
    parts = field.split(",")
    for part in parts:
        if part == "*":
            return None  # caller handles
        step = 1
        if "/" in part:
            base, step_s = part.split("/", 1)
            try:
                step = int(step_s)
            except ValueError:
                return None
        else:
            base = part
        if base == "*":
            return None
        if "-" in base:
            try:
                lo_s, hi_s = base.split("-", 1)
                lo, hi = int(lo_s), int(hi_s)
            except ValueError:
                return None
            for v in range(lo, hi + 1, step):
                result.add(v)
        else:
            try:
                result.add(int(base))
            except ValueError:
                return None
    return sorted(result)
 def _convert_hour_field(hour_field: str, day_shift_from_utc: int) -> tuple[str, bool]:
    """Convert UTC hour field to Bucharest-local hour field.
    Returns (converted_field, approx_ok).  approx_ok is False when we
    couldn't confidently translate (e.g. odd step that crosses midnight).
    The caller should warn if False and present the job for manual review.
    Strategy: if the field is "*" -> "*"; if it expands to a concrete list
    of hours, shift each hour by `day_shift_from_utc` (UTC+2 or UTC+3 depending
    on DST) modulo 24.  If any hour wraps past midnight (which would change the
    day-of-week / day-of-month field in a way a simple script can't handle),
    flag approx_ok=False.
    """
    if hour_field == "*":
        return "*", True
    hours = _parse_cron_field(hour_field)
    if hours is None:
        return hour_field, False
    # Shift and check for day-wrap
    day_wrap = False
    shifted = []
    for h in hours:
        new_h = h + day_shift_from_utc
        if new_h >= 24:
            new_h -= 24
            day_wrap = True
        elif new_h < 0:
            new_h += 24
            day_wrap = True
        shifted.append(new_h)
    shifted = sorted(set(shifted))
    if not shifted:
        return hour_field, False
    # Try to re-compress into a step form if the input looked like A-B/S.
    # For simplicity we emit a comma-separated list. APScheduler accepts that.
    return ",".join(str(h) for h in shifted), not day_wrap
 def convert_cron_utc_to_bucharest(
    expr: str,
    src_tz: str | None,
    reference_dt: datetime | None = None,
 ) -> tuple[str, list[str]]:
    """Translate a cron expression from src_tz to Europe/Bucharest.
    If `src_tz == 'Europe/Bucharest'` the expression is returned unchanged.
    Otherwise we assume UTC source (OpenClaw's default runtime) and shift the
    hour field by the current UTC->Bucharest offset.
    Returns (new_expr, warnings).  warnings is a list of human-readable notes;
    if non-empty, caller should flag for manual review.
    DST caveat: the offset is evaluated at `reference_dt` (default: now).
    Jobs that span DST transitions may need manual tuning. We emit a warning
    rather than trying to be clever.
    """
    warnings: list[str] = []
    if src_tz == "Europe/Bucharest":
        return expr, warnings
    fields = expr.split()
    if len(fields) != 5:
        warnings.append(f"cron expr does not have 5 fields: {expr!r}")
        return expr, warnings
    minute, hour, dom, month, dow = fields
    ref = reference_dt or datetime.now(UTC)
    # offset for "what is UTC hour X in Bucharest?"
    offset_seconds = int(
        ref.replace(tzinfo=UTC).astimezone(BUCHAREST).utcoffset().total_seconds()
    )
    # should be +7200 (winter) or +10800 (summer)
    shift_hours = offset_seconds // 3600
    new_hour, ok = _convert_hour_field(hour, shift_hours)
    if not ok:
        warnings.append(
            f"hour field {hour!r} crosses day boundary or is complex — "
            "verify day-of-week/day-of-month manually"
        )
    return f"{minute} {new_hour} {dom} {month} {dow}", warnings
 def rewrite_prompt_paths(text: str) -> tuple[str, list[tuple[str, str]]]:
    """Apply path rewrites to a prompt body.
    Returns (new_text, substitutions) where substitutions is a list of
    (old_snippet, new_snippet) tuples — every rewrite that was performed.
    """
    substitutions: list[tuple[str, str]] = []
    new = text
    for pattern, replacement in PATH_REWRITES:
        def _sub(match: re.Match[str]) -> str:
            old = match.group(0)
            substitutions.append((old, replacement))
            return replacement
        new = pattern.sub(_sub, new)
    return new, substitutions
 def translate_job(
    oc_job: dict,
    default_channel: str,
    reference_dt: datetime | None = None,
 ) -> tuple[dict | None, list[str]]:
    """Translate one openclaw job dict to an echo-core job dict.
    Returns (echo_job, warnings). echo_job is None if the job cannot be
    translated (e.g. non-cron schedule).
    """
    warnings: list[str] = []
    name = oc_job.get("name") or oc_job.get("id") or "<unnamed>"
    sched = oc_job.get("schedule") or {}
    if sched.get("kind") != "cron":
        warnings.append(
            f"job {name!r}: schedule.kind={sched.get('kind')!r} "
            "is not 'cron' — skipping (manual review)"
        )
        return None, warnings
    expr = sched.get("expr")
    if not isinstance(expr, str) or not expr.strip():
        warnings.append(f"job {name!r}: missing/empty schedule.expr — skipping")
        return None, warnings
    src_tz = sched.get("tz")
    new_expr, tz_warnings = convert_cron_utc_to_bucharest(
        expr, src_tz, reference_dt=reference_dt
    )
    for w in tz_warnings:
        warnings.append(f"job {name!r}: {w}")
    payload = oc_job.get("payload") or {}
    prompt = payload.get("message") or ""
    new_prompt, subs = rewrite_prompt_paths(prompt)
    for old, new in subs:
        warnings.append(f"job {name!r}: rewrote {old!r} -> {new!r}")
    model = payload.get("model") or "sonnet"
    # openclaw doesn't track allowedTools in the same way; start with [].
    allowed = oc_job.get("allowedTools") or payload.get("allowedTools") or []
    if not isinstance(allowed, list):
        allowed = []
    echo_job = {
        "name": name,
        "cron": new_expr,
        "channel": default_channel,
        "model": model,
        "prompt": new_prompt,
        "allowed_tools": list(allowed),
        "enabled": bool(oc_job.get("enabled", False)),
        "last_run": None,
        "last_status": None,
        "next_run": None,
    }
    return echo_job, warnings
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
 def load_json(path: Path) -> object:
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)
 def _is_skipped(name: str, skip_set: set[str], include_default_skip: bool) -> bool:
    if name.startswith(YOUTUBE_PREFIX):
        return True
    if include_default_skip and name in SKIP_BY_DEFAULT:
        return True
    if name in skip_set:
        return True
    return False
 def run(argv: list[str] | None = None) -> int:
    p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
    p.add_argument("--dry-run", action="store_true")
    p.add_argument("--skip-disabled", action="store_true")
    p.add_argument("--skip", default="",
                   help="Comma-separated list of additional names to skip.")
    p.add_argument("--no-default-skip", action="store_true",
                   help="Disable the built-in SKIP_BY_DEFAULT list.")
    p.add_argument("--channel", default=DEFAULT_CHANNEL,
                   help=f"Default channel for imported jobs (default: {DEFAULT_CHANNEL}).")
    p.add_argument("--source", default=str(DEFAULT_SOURCE))
    p.add_argument("--target", default=str(DEFAULT_TARGET))
    args = p.parse_args(argv)
    source = Path(args.source)
    target = Path(args.target)
    extra_skip = {s.strip() for s in args.skip.split(",") if s.strip()}
    include_default_skip = not args.no_default_skip
    if not source.exists():
        print(f"ERROR: source not found: {source}", file=sys.stderr)
        return 2
    oc_data = load_json(source)
    if not isinstance(oc_data, dict) or "jobs" not in oc_data:
        print(f"ERROR: source {source} is not a dict with 'jobs' key",
              file=sys.stderr)
        return 2
    # load target (may not exist yet)
    if target.exists():
        target_jobs = load_json(target)
        if not isinstance(target_jobs, list):
            print(f"ERROR: target {target} is not a JSON list", file=sys.stderr)
            return 2
    else:
        target_jobs = []
    existing_names = {j.get("name") for j in target_jobs}
    ref = datetime.now(UTC)
    to_add: list[dict] = []
    summary_lines: list[str] = []
    for oc_job in oc_data["jobs"]:
        name = oc_job.get("name") or oc_job.get("id") or "<unnamed>"
        if _is_skipped(name, extra_skip, include_default_skip):
            summary_lines.append(f"  SKIP  {name:40s}  (skip list)")
            continue
        if args.skip_disabled and not oc_job.get("enabled", False):
            summary_lines.append(f"  SKIP  {name:40s}  (disabled, --skip-disabled)")
            continue
        echo_job, warnings = translate_job(oc_job, args.channel, reference_dt=ref)
        if echo_job is None:
            for w in warnings:
                summary_lines.append(f"  WARN  {w}")
            summary_lines.append(f"  SKIP  {name:40s}  (untranslatable)")
            continue
        if echo_job["name"] in existing_names:
            summary_lines.append(
                f"  DUPE  {name:40s}  (already in target — existing entry preserved)"
            )
            continue
        for w in warnings:
            summary_lines.append(f"  WARN  {w}")
        summary_lines.append(
            f"  ADD   {name:40s}  cron={echo_job['cron']!r:18s} "
            f"enabled={echo_job['enabled']} model={echo_job['model']}"
        )
        to_add.append(echo_job)
    # Print summary
    print(f"Source:  {source}")
    print(f"Target:  {target}")
    print(f"Dry-run: {args.dry_run}")
    print(f"Default channel for imports: {args.channel}")
    print(f"Existing target jobs: {len(target_jobs)}")
    print(f"Source jobs: {len(oc_data['jobs'])}")
    print()
    print("Per-job decisions:")
    for line in summary_lines:
        print(line)
    print()
    print(f"Would add {len(to_add)} new job(s) to target.")
    if args.dry_run:
        print("[DRY-RUN] no changes written.")
        return 0
    if not to_add:
        print("Nothing to write.")
        return 0
    target_jobs.extend(to_add)
    target.parent.mkdir(parents=True, exist_ok=True)
    tmp = target.with_suffix(target.suffix + ".tmp")
    with tmp.open("w", encoding="utf-8") as f:
        json.dump(target_jobs, f, indent=2, ensure_ascii=False)
        f.write("\n")
    tmp.replace(target)
    print(f"Wrote {len(target_jobs)} jobs to {target}")
    return 0
 if __name__ == "__main__":
    sys.exit(run())