#!/usr/bin/env python3 """ session_prequery.py — Check gbrain for relevant past sessions before new tasks. Usage: python3 session_prequery.py "how do I configure input-remapper on Pop!_OS" python3 session_prequery.py --recent "what did we work on yesterday" python3 session_prequery.py --query-type sessions "上次我们做了什么" python3 session_prequery.py --query-type all "flask api postgres numeric" Wires into the agent's startup: load relevant session history AND skill pages from gbrain before reasoning from scratch. The --query-type flag controls what gets searched: sessions — session/ pages only (what we did) all — everything in the brain (skills, decisions, workflows, facts, sessions) The agent loads this automatically via a skill or startup hook to ensure local-first reasoning — brain knowledge before web search or external tooling. """ import argparse import os import subprocess import sys from pathlib import Path GBRAIN_BUN = Path.home() / ".bun/bin/bun" GBRAIN_CLI = Path.home() / ".bun/install/global/node_modules/gbrain/src/cli.ts" OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://localhost:11434") MAX_QUERY_LEN = 32 # Vector search degrades past ~35 chars; truncate to 32 to be safe def smart_query(question: str) -> str: """Extract the best search query from a long user message. Vector search degrades past ~35 chars. Strategy: 1. Check if the whole question is short enough already 2. Extract project slugs / known terms (alphanumeric + hyphen, up to 2) 3. Fall back to first N meaningful words """ if len(question.strip()) <= MAX_QUERY_LEN: return question.strip() # Try to extract known project slugs or identifiers (e.g. OISI, keystone, prowler) slug_chars = "abcdefghijklmnopqrstuvwxyz0123456789/-" slugs = [w.lower() for w in question.split() if len(w) >= 4 and all(c in slug_chars for c in w.lower())] if slugs[:2]: return " ".join(slugs[:2]) # Grab first two meaningful words (skip stopwords) stopwords = {"the", "a", "an", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "can", "to", "of", "in", "for", "on", "at", "by", "it", "this", "that", "what", "how", "why", "i", "we", "you", "my", "our", "and", "or", "but", "with", "from", "about", "as", "so", "if", "its", "who", "which"} words = [w for w in question.split() if w.lower() not in stopwords] if len(words) >= 2: combined = words[0] + " " + words[1] if len(combined) <= MAX_QUERY_LEN: return combined elif words: return words[0][:MAX_QUERY_LEN] # Last resort: first 32 chars return question[:MAX_QUERY_LEN].strip() def gbrain_query(question: str, top_k: int = 5) -> str: """Query gbrain for relevant pages. Returns raw CLI output.""" env = os.environ.copy() env["OLLAMA_HOST"] = OLLAMA_HOST query = smart_query(question) cmd = [ str(GBRAIN_BUN), str(GBRAIN_CLI), "query", query, "--no-expand", "-n", str(top_k) ] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=30, env=env ) if result.returncode != 0: return "" return result.stdout except Exception: return "" def get_recent_sessions(limit: int = 5) -> str: """Get the most recent session slugs from gbrain.""" env = os.environ.copy() env["OLLAMA_HOST"] = OLLAMA_HOST cmd = [str(GBRAIN_BUN), str(GBRAIN_CLI), "list", "-n", str(limit)] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=15, env=env ) if result.returncode == 0: return result.stdout except Exception: pass return "" def get_page_content(slug: str) -> dict: """Get a specific page's full content.""" env = os.environ.copy() env["OLLAMA_HOST"] = OLLAMA_HOST cmd = [str(GBRAIN_BUN), str(GBRAIN_CLI), "get", slug] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=15, env=env ) if result.returncode == 0: return {"slug": slug, "content": result.stdout} except Exception: pass return {"slug": slug, "content": ""} def parse_page_slugs(results: str, query_type: str = "all") -> list[str]: """Parse page slugs from gbrain query output. query_type "sessions" — only session/ slugs. query_type "all" — all slugs (skills/, decisions/, workflows/, session/, etc.). """ if not results: return [] slugs = [] for line in results.split("\n"): line = line.strip() if not line: continue slug = None if "] session/" in line: slug = line.split("] session/")[1].split(" --")[0] slug = "session/" + slug elif "] " in line and query_type == "all": # Format: "[1.0000] skills/greysec-phishing-orchestration -- # GreySec Phishing..." parts = line.split("] ", 1) if len(parts) == 2: slug_candidate = parts[1].split(" --")[0].strip() if "/" in slug_candidate: slug = slug_candidate if slug and ((query_type == "sessions" and slug.startswith("session/")) or (query_type == "all" and slug)): slugs.append(slug) # deduplicate preserving order seen = set() deduped = [] for s in slugs: if s not in seen: seen.add(s) deduped.append(s) return deduped def format_context(pages: list, question: str, query_type: str = "all") -> str: """Format found pages as context for the agent.""" if not pages: return "" label = "Past Session" if query_type == "sessions" else "Brain Page" lines = [ f"## Relevant Past Sessions & Brain Pages (from gbrain)", f"_Query: {question}_", "" ] for i, page in enumerate(pages, 1): slug = page.get("slug", "") content = page.get("content", "") truncated = content[:800] + ("..." if len(content) > 800 else "") lines.append(f"### {i}. [{label}] {slug}") lines.append(truncated) lines.append("") return "\n".join(lines) def main(): parser = argparse.ArgumentParser( description="Query gbrain for relevant session + knowledge context" ) parser.add_argument("question", nargs="?", help="Question to search for") parser.add_argument("--recent", action="store_true", help="Get recent sessions instead of searching") parser.add_argument("--limit", type=int, default=5, help="Number of pages to retrieve (default: 5)") parser.add_argument("--quiet", action="store_true", help="Suppress debug output") parser.add_argument("--query-type", dest="query_type", default="all", choices=["sessions", "all"], help="sessions = past sessions only; all = everything in brain (default: all)") parser.add_argument("--format", dest="fmt", default="full", choices=["full", "context"], help="Output format: full (with headers) or context (bare block)") args = parser.parse_args() if args.recent: recent_list = get_recent_sessions(args.limit) print(f"## Recent Sessions\n{recent_list}") return if not args.question: print("No question provided. Use --question or --recent", file=sys.stderr) sys.exit(1) # Query gbrain for relevant content results = gbrain_query(args.question, top_k=args.limit) if not args.quiet: print(f"[session_prequery] query_type={args.query_type} Searching for: {args.question}", file=sys.stderr) if results: print(f"[session_prequery] Raw results:\n{results[:500]}", file=sys.stderr) # Parse slugs from query output page_slugs = parse_page_slugs(results, query_type=args.query_type) # Fetch content for top matches pages = [] for slug in page_slugs[:args.limit]: content = get_page_content(slug) if content["content"]: pages.append(content) context = format_context(pages, args.question, query_type=args.query_type) if context: if args.fmt == "context": # Strip header lines for clean injection lines = context.split("\n") idx = next((i for i, l in enumerate(lines) if l.startswith("## Relevant")), -1) if idx >= 0: lines = lines[idx:] lines = [l for l in lines if not l.startswith("_Query:")] print("\n".join(lines).strip()) else: print(context) else: if not args.quiet: print("[session_prequery] No relevant pages found in gbrain.", file=sys.stderr) if __name__ == "__main__": main()