Files
hermes-agent/time_sync_watcher.py

300 lines
11 KiB
Python

#!/usr/bin/env python3
"""
time_sync_watcher.py — GreySec
Watches TIME-LOG.md for new entries and syncs them to Supabase.
Runs as a lightweight background process on pop-os.
Tracks last-read line count in state file to process only new rows.
"""
import json
import os
import re
import sys
import time
import hashlib
from datetime import datetime, date
# ── Config ────────────────────────────────────────────────────────────────────
TIME_LOG = os.path.expanduser("~/Documents/empire/1-areas/kate/time/TIME-LOG.md")
STATE_FILE = os.path.expanduser("~/.hermes/state/timelog_sync.json")
API_BASE = "http://localhost:3000"
API_KEY = os.environ.get("GREYSEC_API_KEY", "greysec-dev-key-2026")
RECHECK_SECONDS = 30 # Poll interval
# ── State ──────────────────────────────────────────────────────────────────────
def load_state():
if os.path.exists(STATE_FILE):
try:
with open(STATE_FILE) as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
pass
return {"last_line_count": 0, "last_mtime": 0, "processed_hashes": []}
def save_state(state):
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
with open(STATE_FILE, "w") as f:
json.dump(state, f, indent=2)
# ── Markdown table parser ───────────────────────────────────────────────────────
def parse_time_log(path):
"""
Returns list of dicts from the markdown table.
Header: | Project | Date | AI Time | Human Time | Description |
Skips section headers (##) and the header row itself.
"""
entries = []
if not os.path.exists(path):
return entries
with open(path) as f:
lines = f.readlines()
for line in lines:
line = line.strip()
# Skip section markers, header row, separator lines, empty lines
if not line.startswith("|"):
continue
if "Project" in line and "Date" in line: # header row
continue
if set(line) == {" ", "|", "-", ":", "+"}: # separator like |---|---|
continue
if re.match(r"\|\s*-+\s*\|", line): # markdown table separator
continue
parts = [p.strip() for p in line.split("|")]
# parts[0] is empty string before first |
parts = parts[1:] # drop leading empty
if len(parts) < 5:
continue
project = parts[0]
date_str = parts[1]
ai_hrs = parts[2].replace("h", "").strip()
human_hrs = parts[3].replace("h", "").strip()
desc = parts[4] if len(parts) > 4 else ""
try:
ai_hrs_f = float(ai_hrs) if ai_hrs else 0.0
except ValueError:
ai_hrs_f = 0.0
try:
human_hrs_f = float(human_hrs) if human_hrs else 0.0
except ValueError:
human_hrs_f = 0.0
# Build a hash of the row content for deduplication
row_hash = hashlib.md5(f"{project}|{date_str}|{ai_hrs_f}|{human_hrs_f}|{desc}".encode()).hexdigest()
entries.append({
"project": project,
"date": date_str,
"ai_hours": ai_hrs_f,
"human_hours": human_hrs_f,
"description": desc,
"row_hash": row_hash,
})
return entries
# ── Supabase sync ──────────────────────────────────────────────────────────────
def get_projects_map():
"""Returns dict of slug -> id for looking up project IDs."""
import urllib.request
req = urllib.request.Request(
f"{API_BASE}/rest/v1/projects?select=id,slug",
headers={"Authorization": f"Bearer {API_KEY}"}
)
try:
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
return {p["slug"]: p["id"] for p in data}
except Exception as e:
print(f"[time_sync] WARNING: could not fetch projects map: {e}")
return {}
def post_time_entry(entry, project_id):
"""POST a single time entry to Supabase via Flask API."""
import urllib.request
import urllib.error
payload = {
"project_id": project_id,
"date": entry["date"],
"ai_hours": str(entry["ai_hours"]),
"human_hours": str(entry["human_hours"]),
"description": entry["description"],
"source": "obsidian",
}
req = urllib.request.Request(
f"{API_BASE}/rest/v1/time_entries",
data=json.dumps(payload).encode(),
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
},
method="POST"
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
body = e.read().decode()
print(f"[time_sync] HTTP {e.code} posting time entry: {body}")
return None
except Exception as e:
print(f"[time_sync] Error posting time entry: {e}")
return None
# ── Main loop ──────────────────────────────────────────────────────────────────
def main():
print(f"[time_sync] Starting TIME-LOG watcher on {TIME_LOG}")
print(f"[time_sync] API: {API_BASE} | Poll interval: {RECHECK_SECONDS}s")
state = load_state()
last_line_count = state.get("last_line_count", 0)
last_mtime = state.get("last_mtime", 0)
processed_hashes = set(state.get("processed_hashes", []))
# Initial line count (don't reprocess existing entries on start)
if os.path.exists(TIME_LOG):
with open(TIME_LOG) as f:
initial_lines = len(f.readlines())
if last_line_count == 0:
last_line_count = initial_lines
state["last_line_count"] = last_line_count
save_state(state)
print(f"[time_sync] Initialized — starting from line {last_line_count}")
projects_map = {}
while True:
try:
if not os.path.exists(TIME_LOG):
time.sleep(RECHECK_SECONDS)
continue
mtime = os.path.getmtime(TIME_LOG)
# Check if file changed
if mtime == last_mtime:
time.sleep(RECHECK_SECONDS)
continue
# Refresh project map periodically (every 10 cycles)
cycle_count = getattr(main, "_cycle", 0) + 1
main._cycle = cycle_count
if cycle_count % 10 == 1 or not projects_map:
projects_map = get_projects_map()
if projects_map:
print(f"[time_sync] Project map loaded: {len(projects_map)} projects")
else:
print(f"[time_sync] WARNING: could not reach API, will retry")
with open(TIME_LOG) as f:
all_lines = f.readlines()
current_line_count = len(all_lines)
if current_line_count <= last_line_count:
# File was truncated or rebuilt — reinitialize
print(f"[time_sync] File reset detected (was {last_line_count}, now {current_line_count}). Reinitializing.")
last_line_count = current_line_count
processed_hashes.clear()
state["processed_hashes"] = []
save_state(state)
time.sleep(RECHECK_SECONDS)
continue
# Process only new lines
new_lines = all_lines[last_line_count:]
print(f"[time_sync] Processing {len(new_lines)} new lines")
# Re-parse full file and process only entries after last_line_count
all_entries = parse_time_log(TIME_LOG)
# We need to figure out which entries are new.
# Simpler approach: parse the new raw lines directly.
new_entries = []
for line in new_lines:
line = line.strip()
if not line.startswith("|"):
continue
if "Project" in line and "Date" in line:
continue
if re.match(r"\|\s*-+\s*\|", line):
continue
parts = [p.strip() for p in line.split("|")]
parts = parts[1:]
if len(parts) < 5:
continue
project = parts[0]
date_str = parts[1]
ai_hrs = parts[2].replace("h", "").strip()
human_hrs = parts[3].replace("h", "").strip()
desc = parts[4] if len(parts) > 4 else ""
try:
ai_hrs_f = float(ai_hrs) if ai_hrs else 0.0
except ValueError:
ai_hrs_f = 0.0
try:
human_hrs_f = float(human_hrs) if human_hrs else 0.0
except ValueError:
human_hrs_f = 0.0
row_hash = hashlib.md5(
f"{project}|{date_str}|{ai_hrs_f}|{human_hrs_f}|{desc}".encode()
).hexdigest()
if row_hash in processed_hashes:
continue
if project not in projects_map:
print(f"[time_sync] SKIP — unknown project slug: '{project}' (add it to DB first)")
processed_hashes.add(row_hash)
continue
result = post_time_entry({
"project": project,
"date": date_str,
"ai_hours": ai_hrs_f,
"human_hours": human_hrs_f,
"description": desc,
}, projects_map[project])
if result:
print(f"[time_sync] SYNCED: {date_str} | {project} | AI:{ai_hrs_f}h Human:{human_hrs_f}h")
else:
print(f"[time_sync] FAILED — will retry: {date_str} | {project}")
processed_hashes.add(row_hash)
last_line_count = current_line_count
last_mtime = mtime
state["last_line_count"] = last_line_count
state["last_mtime"] = last_mtime
state["processed_hashes"] = list(processed_hashes)[-500:] # keep last 500 for dedup
save_state(state)
except Exception as e:
print(f"[time_sync] ERROR in main loop: {e}")
time.sleep(RECHECK_SECONDS)
if __name__ == "__main__":
main()