hermes-agent/time_sync_watcher.py

#!/usr/bin/env python3
"""
time_sync_watcher.py — GreySec
Watches TIME-LOG.md for new entries and syncs them to Supabase.
Runs as a lightweight background process on pop-os.
Tracks last-read line count in state file to process only new rows.
"""

import json
import os
import re
import sys
import time
import hashlib
from datetime import datetime, date

# ── Config ────────────────────────────────────────────────────────────────────

TIME_LOG   = os.path.expanduser("~/Documents/empire/1-areas/kate/time/TIME-LOG.md")
STATE_FILE = os.path.expanduser("~/.hermes/state/timelog_sync.json")
API_BASE   = "http://localhost:3000"
API_KEY    = os.environ.get("GREYSEC_API_KEY", "greysec-dev-key-2026")

RECHECK_SECONDS = 30  # Poll interval

# ── State ──────────────────────────────────────────────────────────────────────

def load_state():
    if os.path.exists(STATE_FILE):
        try:
            with open(STATE_FILE) as f:
                return json.load(f)
        except (json.JSONDecodeError, IOError):
            pass
    return {"last_line_count": 0, "last_mtime": 0, "processed_hashes": []}

def save_state(state):
    os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
    with open(STATE_FILE, "w") as f:
        json.dump(state, f, indent=2)

# ── Markdown table parser ───────────────────────────────────────────────────────

def parse_time_log(path):
    """
    Returns list of dicts from the markdown table.
    Header: | Project | Date | AI Time | Human Time | Description |
    Skips section headers (##) and the header row itself.
    """
    entries = []
    if not os.path.exists(path):
        return entries

    with open(path) as f:
        lines = f.readlines()

    for line in lines:
        line = line.strip()
        # Skip section markers, header row, separator lines, empty lines
        if not line.startswith("|"):
            continue
        if "Project" in line and "Date" in line:  # header row
            continue
        if set(line) == {" ", "|", "-", ":", "+"}:  # separator like |---|---|
            continue
        if re.match(r"\|\s*-+\s*\|", line):  # markdown table separator
            continue

        parts = [p.strip() for p in line.split("|")]
        # parts[0] is empty string before first |
        parts = parts[1:]  # drop leading empty

        if len(parts) < 5:
            continue

        project   = parts[0]
        date_str  = parts[1]
        ai_hrs    = parts[2].replace("h", "").strip()
        human_hrs = parts[3].replace("h", "").strip()
        desc      = parts[4] if len(parts) > 4 else ""

        try:
            ai_hrs_f = float(ai_hrs) if ai_hrs else 0.0
        except ValueError:
            ai_hrs_f = 0.0

        try:
            human_hrs_f = float(human_hrs) if human_hrs else 0.0
        except ValueError:
            human_hrs_f = 0.0

        # Build a hash of the row content for deduplication
        row_hash = hashlib.md5(f"{project}|{date_str}|{ai_hrs_f}|{human_hrs_f}|{desc}".encode()).hexdigest()

        entries.append({
            "project":   project,
            "date":      date_str,
            "ai_hours":  ai_hrs_f,
            "human_hours": human_hrs_f,
            "description": desc,
            "row_hash":  row_hash,
        })

    return entries

# ── Supabase sync ──────────────────────────────────────────────────────────────

def get_projects_map():
    """Returns dict of slug -> id for looking up project IDs."""
    import urllib.request

    req = urllib.request.Request(
        f"{API_BASE}/rest/v1/projects?select=id,slug",
        headers={"Authorization": f"Bearer {API_KEY}"}
    )
    try:
        with urllib.request.urlopen(req, timeout=5) as resp:
            data = json.loads(resp.read())
            return {p["slug"]: p["id"] for p in data}
    except Exception as e:
        print(f"[time_sync] WARNING: could not fetch projects map: {e}")
        return {}

def post_time_entry(entry, project_id):
    """POST a single time entry to Supabase via Flask API."""
    import urllib.request
    import urllib.error

    payload = {
        "project_id":   project_id,
        "date":         entry["date"],
        "ai_hours":     str(entry["ai_hours"]),
        "human_hours":  str(entry["human_hours"]),
        "description":  entry["description"],
        "source":       "obsidian",
    }

    req = urllib.request.Request(
        f"{API_BASE}/rest/v1/time_entries",
        data=json.dumps(payload).encode(),
        headers={
            "Authorization": f"Bearer {API_KEY}",
            "Content-Type":  "application/json",
        },
        method="POST"
    )
    try:
        with urllib.request.urlopen(req, timeout=10) as resp:
            return json.loads(resp.read())
    except urllib.error.HTTPError as e:
        body = e.read().decode()
        print(f"[time_sync] HTTP {e.code} posting time entry: {body}")
        return None
    except Exception as e:
        print(f"[time_sync] Error posting time entry: {e}")
        return None

# ── Main loop ──────────────────────────────────────────────────────────────────

def main():
    print(f"[time_sync] Starting TIME-LOG watcher on {TIME_LOG}")
    print(f"[time_sync] API: {API_BASE} | Poll interval: {RECHECK_SECONDS}s")

    state = load_state()
    last_line_count   = state.get("last_line_count", 0)
    last_mtime        = state.get("last_mtime", 0)
    processed_hashes  = set(state.get("processed_hashes", []))

    # Initial line count (don't reprocess existing entries on start)
    if os.path.exists(TIME_LOG):
        with open(TIME_LOG) as f:
            initial_lines = len(f.readlines())
        if last_line_count == 0:
            last_line_count = initial_lines
            state["last_line_count"] = last_line_count
            save_state(state)
            print(f"[time_sync] Initialized — starting from line {last_line_count}")

    projects_map = {}

    while True:
        try:
            if not os.path.exists(TIME_LOG):
                time.sleep(RECHECK_SECONDS)
                continue

            mtime = os.path.getmtime(TIME_LOG)

            # Check if file changed
            if mtime == last_mtime:
                time.sleep(RECHECK_SECONDS)
                continue

            # Refresh project map periodically (every 10 cycles)
            cycle_count = getattr(main, "_cycle", 0) + 1
            main._cycle = cycle_count
            if cycle_count % 10 == 1 or not projects_map:
                projects_map = get_projects_map()
                if projects_map:
                    print(f"[time_sync] Project map loaded: {len(projects_map)} projects")
                else:
                    print(f"[time_sync] WARNING: could not reach API, will retry")

            with open(TIME_LOG) as f:
                all_lines = f.readlines()

            current_line_count = len(all_lines)

            if current_line_count <= last_line_count:
                # File was truncated or rebuilt — reinitialize
                print(f"[time_sync] File reset detected (was {last_line_count}, now {current_line_count}). Reinitializing.")
                last_line_count = current_line_count
                processed_hashes.clear()
                state["processed_hashes"] = []
                save_state(state)
                time.sleep(RECHECK_SECONDS)
                continue

            # Process only new lines
            new_lines = all_lines[last_line_count:]
            print(f"[time_sync] Processing {len(new_lines)} new lines")

            # Re-parse full file and process only entries after last_line_count
            all_entries = parse_time_log(TIME_LOG)

            # We need to figure out which entries are new.
            # Simpler approach: parse the new raw lines directly.
            new_entries = []
            for line in new_lines:
                line = line.strip()
                if not line.startswith("|"):
                    continue
                if "Project" in line and "Date" in line:
                    continue
                if re.match(r"\|\s*-+\s*\|", line):
                    continue

                parts = [p.strip() for p in line.split("|")]
                parts = parts[1:]

                if len(parts) < 5:
                    continue

                project   = parts[0]
                date_str  = parts[1]
                ai_hrs    = parts[2].replace("h", "").strip()
                human_hrs = parts[3].replace("h", "").strip()
                desc      = parts[4] if len(parts) > 4 else ""

                try:
                    ai_hrs_f = float(ai_hrs) if ai_hrs else 0.0
                except ValueError:
                    ai_hrs_f = 0.0
                try:
                    human_hrs_f = float(human_hrs) if human_hrs else 0.0
                except ValueError:
                    human_hrs_f = 0.0

                row_hash = hashlib.md5(
                    f"{project}|{date_str}|{ai_hrs_f}|{human_hrs_f}|{desc}".encode()
                ).hexdigest()

                if row_hash in processed_hashes:
                    continue

                if project not in projects_map:
                    print(f"[time_sync] SKIP — unknown project slug: '{project}' (add it to DB first)")
                    processed_hashes.add(row_hash)
                    continue

                result = post_time_entry({
                    "project":     project,
                    "date":        date_str,
                    "ai_hours":    ai_hrs_f,
                    "human_hours": human_hrs_f,
                    "description": desc,
                }, projects_map[project])

                if result:
                    print(f"[time_sync] SYNCED: {date_str} | {project} | AI:{ai_hrs_f}h Human:{human_hrs_f}h")
                else:
                    print(f"[time_sync] FAILED — will retry: {date_str} | {project}")

                processed_hashes.add(row_hash)

            last_line_count = current_line_count
            last_mtime = mtime
            state["last_line_count"]  = last_line_count
            state["last_mtime"]       = last_mtime
            state["processed_hashes"] = list(processed_hashes)[-500:]  # keep last 500 for dedup
            save_state(state)

        except Exception as e:
            print(f"[time_sync] ERROR in main loop: {e}")

        time.sleep(RECHECK_SECONDS)

if __name__ == "__main__":
    main()