83a933ea1a
- brain.py: prefers direct Anthropic API (ANTHROPIC_API_KEY) over Hermes
for all LLM calls — ~22x cheaper (122 tokens vs 5600+ Hermes overhead).
Falls back to Hermes then Ollama if key unavailable.
extract_task_fields(): non-streaming call returns clean {title, project}
from any natural language phrasing — no more regex whack-a-mole.
- token_log.py: appends every LLM call to token-usage.jsonl with intent,
in/out token counts, and USD cost. get_summary() aggregates all-time,
today, and per-intent breakdowns.
- main.py: task handler uses extract_task_fields() with regex fallback;
streaming handler captures usage from final chunk; GET /usage endpoint
returns live cost summary.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
80 lines
2.4 KiB
Python
80 lines
2.4 KiB
Python
import json
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
|
|
TOKEN_LOG = AGENT_OS_DIR / "logs" / "jon-snow" / "token-usage.jsonl"
|
|
|
|
# claude-sonnet-4-6 pricing (USD per token)
|
|
INPUT_COST_PER_TOKEN = 3.00 / 1_000_000
|
|
OUTPUT_COST_PER_TOKEN = 15.00 / 1_000_000
|
|
|
|
|
|
def log_usage(intent: str, prompt_tokens: int, completion_tokens: int) -> None:
|
|
TOKEN_LOG.parent.mkdir(parents=True, exist_ok=True)
|
|
cost = (prompt_tokens * INPUT_COST_PER_TOKEN) + (completion_tokens * OUTPUT_COST_PER_TOKEN)
|
|
entry = {
|
|
"ts": datetime.now(timezone.utc).isoformat(),
|
|
"intent": intent,
|
|
"in": prompt_tokens,
|
|
"out": completion_tokens,
|
|
"cost_usd": round(cost, 6),
|
|
}
|
|
with TOKEN_LOG.open("a") as f:
|
|
f.write(json.dumps(entry) + "\n")
|
|
|
|
|
|
def get_summary() -> dict:
|
|
if not TOKEN_LOG.exists():
|
|
return _empty()
|
|
|
|
entries = []
|
|
for line in TOKEN_LOG.read_text().splitlines():
|
|
line = line.strip()
|
|
if line:
|
|
try:
|
|
entries.append(json.loads(line))
|
|
except Exception:
|
|
pass
|
|
|
|
if not entries:
|
|
return _empty()
|
|
|
|
today = datetime.now(timezone.utc).date().isoformat()
|
|
today_entries = [e for e in entries if e["ts"][:10] == today]
|
|
|
|
by_intent: dict = {}
|
|
for e in entries:
|
|
b = by_intent.setdefault(e["intent"], {"calls": 0, "in": 0, "out": 0, "cost_usd": 0.0})
|
|
b["calls"] += 1
|
|
b["in"] += e["in"]
|
|
b["out"] += e["out"]
|
|
b["cost_usd"] = round(b["cost_usd"] + e["cost_usd"], 6)
|
|
|
|
return {
|
|
"all_time": {
|
|
"calls": len(entries),
|
|
"tokens_in": sum(e["in"] for e in entries),
|
|
"tokens_out": sum(e["out"] for e in entries),
|
|
"cost_usd": round(sum(e["cost_usd"] for e in entries), 6),
|
|
},
|
|
"today": {
|
|
"calls": len(today_entries),
|
|
"tokens_in": sum(e["in"] for e in today_entries),
|
|
"tokens_out": sum(e["out"] for e in today_entries),
|
|
"cost_usd": round(sum(e["cost_usd"] for e in today_entries), 6),
|
|
},
|
|
"by_intent": by_intent,
|
|
"recent": entries[-20:],
|
|
}
|
|
|
|
|
|
def _empty() -> dict:
|
|
return {
|
|
"all_time": {"calls": 0, "tokens_in": 0, "tokens_out": 0, "cost_usd": 0.0},
|
|
"today": {"calls": 0, "tokens_in": 0, "tokens_out": 0, "cost_usd": 0.0},
|
|
"by_intent": {},
|
|
"recent": [],
|
|
}
|