255 lines
8.9 KiB
Python
255 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
session_prequery.py — Check gbrain for relevant past sessions before new tasks.
|
|
|
|
Usage:
|
|
python3 session_prequery.py "how do I configure input-remapper on Pop!_OS"
|
|
python3 session_prequery.py --recent "what did we work on yesterday"
|
|
python3 session_prequery.py --query-type sessions "上次我们做了什么"
|
|
python3 session_prequery.py --query-type all "flask api postgres numeric"
|
|
|
|
Wires into the agent's startup: load relevant session history AND skill pages
|
|
from gbrain before reasoning from scratch.
|
|
|
|
The --query-type flag controls what gets searched:
|
|
sessions — session/ pages only (what we did)
|
|
all — everything in the brain (skills, decisions, workflows, facts, sessions)
|
|
|
|
The agent loads this automatically via a skill or startup hook to ensure
|
|
local-first reasoning — brain knowledge before web search or external tooling.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
GBRAIN_BUN = Path.home() / ".bun/bin/bun"
|
|
GBRAIN_CLI = Path.home() / ".bun/install/global/node_modules/gbrain/src/cli.ts"
|
|
OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
|
MAX_QUERY_LEN = 32 # Vector search degrades past ~35 chars; truncate to 32 to be safe
|
|
|
|
|
|
def smart_query(question: str) -> str:
|
|
"""Extract the best search query from a long user message.
|
|
|
|
Vector search degrades past ~35 chars. Strategy:
|
|
1. Check if the whole question is short enough already
|
|
2. Extract project slugs / known terms (alphanumeric + hyphen, up to 2)
|
|
3. Fall back to first N meaningful words
|
|
"""
|
|
if len(question.strip()) <= MAX_QUERY_LEN:
|
|
return question.strip()
|
|
|
|
# Try to extract known project slugs or identifiers (e.g. OISI, keystone, prowler)
|
|
slug_chars = "abcdefghijklmnopqrstuvwxyz0123456789/-"
|
|
slugs = [w.lower() for w in question.split()
|
|
if len(w) >= 4 and all(c in slug_chars for c in w.lower())]
|
|
if slugs[:2]:
|
|
return " ".join(slugs[:2])
|
|
|
|
# Grab first two meaningful words (skip stopwords)
|
|
stopwords = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
"have", "has", "had", "do", "does", "did", "will", "would",
|
|
"could", "should", "can", "to", "of", "in", "for", "on",
|
|
"at", "by", "it", "this", "that", "what", "how", "why",
|
|
"i", "we", "you", "my", "our", "and", "or", "but", "with",
|
|
"from", "about", "as", "so", "if", "its", "who", "which"}
|
|
words = [w for w in question.split() if w.lower() not in stopwords]
|
|
if len(words) >= 2:
|
|
combined = words[0] + " " + words[1]
|
|
if len(combined) <= MAX_QUERY_LEN:
|
|
return combined
|
|
elif words:
|
|
return words[0][:MAX_QUERY_LEN]
|
|
|
|
# Last resort: first 32 chars
|
|
return question[:MAX_QUERY_LEN].strip()
|
|
|
|
|
|
def gbrain_query(question: str, top_k: int = 5) -> str:
|
|
"""Query gbrain for relevant pages. Returns raw CLI output."""
|
|
env = os.environ.copy()
|
|
env["OLLAMA_HOST"] = OLLAMA_HOST
|
|
|
|
query = smart_query(question)
|
|
|
|
cmd = [
|
|
str(GBRAIN_BUN), str(GBRAIN_CLI), "query",
|
|
query, "--no-expand", "-n", str(top_k)
|
|
]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=30, env=env
|
|
)
|
|
if result.returncode != 0:
|
|
return ""
|
|
return result.stdout
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def get_recent_sessions(limit: int = 5) -> str:
|
|
"""Get the most recent session slugs from gbrain."""
|
|
env = os.environ.copy()
|
|
env["OLLAMA_HOST"] = OLLAMA_HOST
|
|
|
|
cmd = [str(GBRAIN_BUN), str(GBRAIN_CLI), "list", "-n", str(limit)]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=15, env=env
|
|
)
|
|
if result.returncode == 0:
|
|
return result.stdout
|
|
except Exception:
|
|
pass
|
|
return ""
|
|
|
|
|
|
def get_page_content(slug: str) -> dict:
|
|
"""Get a specific page's full content."""
|
|
env = os.environ.copy()
|
|
env["OLLAMA_HOST"] = OLLAMA_HOST
|
|
|
|
cmd = [str(GBRAIN_BUN), str(GBRAIN_CLI), "get", slug]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=15, env=env
|
|
)
|
|
if result.returncode == 0:
|
|
return {"slug": slug, "content": result.stdout}
|
|
except Exception:
|
|
pass
|
|
return {"slug": slug, "content": ""}
|
|
|
|
|
|
def parse_page_slugs(results: str, query_type: str = "all") -> list[str]:
|
|
"""Parse page slugs from gbrain query output.
|
|
|
|
query_type "sessions" — only session/ slugs.
|
|
query_type "all" — all slugs (skills/, decisions/, workflows/, session/, etc.).
|
|
"""
|
|
if not results:
|
|
return []
|
|
slugs = []
|
|
for line in results.split("\n"):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
slug = None
|
|
if "] session/" in line:
|
|
slug = line.split("] session/")[1].split(" --")[0]
|
|
slug = "session/" + slug
|
|
elif "] " in line and query_type == "all":
|
|
# Format: "[1.0000] skills/greysec-phishing-orchestration -- # GreySec Phishing..."
|
|
parts = line.split("] ", 1)
|
|
if len(parts) == 2:
|
|
slug_candidate = parts[1].split(" --")[0].strip()
|
|
if "/" in slug_candidate:
|
|
slug = slug_candidate
|
|
|
|
if slug and ((query_type == "sessions" and slug.startswith("session/")) or
|
|
(query_type == "all" and slug)):
|
|
slugs.append(slug)
|
|
|
|
# deduplicate preserving order
|
|
seen = set()
|
|
deduped = []
|
|
for s in slugs:
|
|
if s not in seen:
|
|
seen.add(s)
|
|
deduped.append(s)
|
|
return deduped
|
|
|
|
|
|
def format_context(pages: list, question: str, query_type: str = "all") -> str:
|
|
"""Format found pages as context for the agent."""
|
|
if not pages:
|
|
return ""
|
|
|
|
label = "Past Session" if query_type == "sessions" else "Brain Page"
|
|
|
|
lines = [
|
|
f"## Relevant Past Sessions & Brain Pages (from gbrain)",
|
|
f"_Query: {question}_",
|
|
""
|
|
]
|
|
for i, page in enumerate(pages, 1):
|
|
slug = page.get("slug", "")
|
|
content = page.get("content", "")
|
|
truncated = content[:800] + ("..." if len(content) > 800 else "")
|
|
lines.append(f"### {i}. [{label}] {slug}")
|
|
lines.append(truncated)
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Query gbrain for relevant session + knowledge context"
|
|
)
|
|
parser.add_argument("question", nargs="?", help="Question to search for")
|
|
parser.add_argument("--recent", action="store_true",
|
|
help="Get recent sessions instead of searching")
|
|
parser.add_argument("--limit", type=int, default=5,
|
|
help="Number of pages to retrieve (default: 5)")
|
|
parser.add_argument("--quiet", action="store_true",
|
|
help="Suppress debug output")
|
|
parser.add_argument("--query-type", dest="query_type", default="all",
|
|
choices=["sessions", "all"],
|
|
help="sessions = past sessions only; all = everything in brain (default: all)")
|
|
parser.add_argument("--format", dest="fmt", default="full",
|
|
choices=["full", "context"],
|
|
help="Output format: full (with headers) or context (bare block)")
|
|
args = parser.parse_args()
|
|
|
|
if args.recent:
|
|
recent_list = get_recent_sessions(args.limit)
|
|
print(f"## Recent Sessions\n{recent_list}")
|
|
return
|
|
|
|
if not args.question:
|
|
print("No question provided. Use --question or --recent", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Query gbrain for relevant content
|
|
results = gbrain_query(args.question, top_k=args.limit)
|
|
|
|
if not args.quiet:
|
|
print(f"[session_prequery] query_type={args.query_type} Searching for: {args.question}", file=sys.stderr)
|
|
if results:
|
|
print(f"[session_prequery] Raw results:\n{results[:500]}", file=sys.stderr)
|
|
|
|
# Parse slugs from query output
|
|
page_slugs = parse_page_slugs(results, query_type=args.query_type)
|
|
|
|
# Fetch content for top matches
|
|
pages = []
|
|
for slug in page_slugs[:args.limit]:
|
|
content = get_page_content(slug)
|
|
if content["content"]:
|
|
pages.append(content)
|
|
|
|
context = format_context(pages, args.question, query_type=args.query_type)
|
|
if context:
|
|
if args.fmt == "context":
|
|
# Strip header lines for clean injection
|
|
lines = context.split("\n")
|
|
idx = next((i for i, l in enumerate(lines) if l.startswith("## Relevant")), -1)
|
|
if idx >= 0:
|
|
lines = lines[idx:]
|
|
lines = [l for l in lines if not l.startswith("_Query:")]
|
|
print("\n".join(lines).strip())
|
|
else:
|
|
print(context)
|
|
else:
|
|
if not args.quiet:
|
|
print("[session_prequery] No relevant pages found in gbrain.", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|