mirror of
https://github.com/infinition/Bjorn.git
synced 2026-03-10 06:31:59 +00:00
- Implemented methods for fetching AI stats, training history, and recent experiences. - Added functionality to set operation mode (MANUAL, AUTO, AI) with appropriate handling. - Included helper methods for querying the database and sending JSON responses. - Integrated model metadata extraction for visualization purposes.
537 lines
18 KiB
Python
537 lines
18 KiB
Python
"""
|
|
Debug / Profiling utilities for the Bjorn Debug page.
|
|
Exposes process-level and per-thread metrics via /proc (no external deps).
|
|
Designed for Pi Zero 2: lightweight reads, no subprocess spawning.
|
|
OPTIMIZED: minimal allocations, cached tracemalloc, /proc/self/smaps for C memory.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import threading
|
|
import time
|
|
import tracemalloc
|
|
|
|
from logger import Logger
|
|
|
|
logger = Logger(name="debug_utils")
|
|
|
|
_SC_CLK_TCK = os.sysconf("SC_CLK_TCK") if hasattr(os, "sysconf") else 100
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# /proc helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _read_proc_status():
|
|
result = {}
|
|
try:
|
|
with open("/proc/self/status", "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
if line.startswith("VmRSS:"):
|
|
result["vm_rss_kb"] = int(line.split()[1])
|
|
elif line.startswith("VmSize:"):
|
|
result["vm_size_kb"] = int(line.split()[1])
|
|
elif line.startswith("VmPeak:"):
|
|
result["vm_peak_kb"] = int(line.split()[1])
|
|
elif line.startswith("VmSwap:"):
|
|
result["vm_swap_kb"] = int(line.split()[1])
|
|
elif line.startswith("FDSize:"):
|
|
result["fd_slots"] = int(line.split()[1])
|
|
elif line.startswith("Threads:"):
|
|
result["kernel_threads"] = int(line.split()[1])
|
|
elif line.startswith("RssAnon:"):
|
|
result["rss_anon_kb"] = int(line.split()[1])
|
|
elif line.startswith("RssFile:"):
|
|
result["rss_file_kb"] = int(line.split()[1])
|
|
elif line.startswith("RssShmem:"):
|
|
result["rss_shmem_kb"] = int(line.split()[1])
|
|
except Exception:
|
|
pass
|
|
return result
|
|
|
|
|
|
def _fd_count():
|
|
try:
|
|
return len(os.listdir("/proc/self/fd"))
|
|
except Exception:
|
|
return -1
|
|
|
|
|
|
def _read_open_files():
|
|
"""Read open FDs — reuses a single dict to minimize allocations."""
|
|
fd_dir = "/proc/self/fd"
|
|
fd_map = {}
|
|
try:
|
|
fds = os.listdir(fd_dir)
|
|
except Exception:
|
|
return []
|
|
|
|
for fd in fds:
|
|
try:
|
|
target = os.readlink(fd_dir + "/" + fd)
|
|
except Exception:
|
|
target = "???"
|
|
|
|
if target.startswith("/"):
|
|
ftype = "device" if "/dev/" in target else "proc" if target.startswith("/proc/") else "temp" if (target.startswith("/tmp/") or target.startswith("/run/")) else "file"
|
|
elif target.startswith("socket:"):
|
|
ftype = "socket"
|
|
elif target.startswith("pipe:"):
|
|
ftype = "pipe"
|
|
elif target.startswith("anon_inode:"):
|
|
ftype = "anon"
|
|
else:
|
|
ftype = "other"
|
|
|
|
entry = fd_map.get(target)
|
|
if entry is None:
|
|
entry = {"target": target, "type": ftype, "count": 0, "fds": []}
|
|
fd_map[target] = entry
|
|
entry["count"] += 1
|
|
if len(entry["fds"]) < 5:
|
|
entry["fds"].append(int(fd))
|
|
|
|
result = sorted(fd_map.values(), key=lambda x: (-x["count"], x["target"]))
|
|
return result
|
|
|
|
|
|
def _read_thread_stats():
|
|
threads = []
|
|
task_dir = "/proc/self/task"
|
|
try:
|
|
tids = os.listdir(task_dir)
|
|
except Exception:
|
|
return threads
|
|
|
|
for tid in tids:
|
|
try:
|
|
with open(task_dir + "/" + tid + "/stat", "r", encoding="utf-8") as f:
|
|
raw = f.read()
|
|
i1 = raw.find("(")
|
|
i2 = raw.rfind(")")
|
|
if i1 < 0 or i2 < 0:
|
|
continue
|
|
name = raw[i1 + 1:i2]
|
|
fields = raw[i2 + 2:].split()
|
|
state = fields[0] if fields else "?"
|
|
utime = int(fields[11]) if len(fields) > 11 else 0
|
|
stime = int(fields[12]) if len(fields) > 12 else 0
|
|
threads.append({
|
|
"tid": int(tid),
|
|
"name": name,
|
|
"state": state,
|
|
"cpu_ticks": utime + stime,
|
|
})
|
|
except Exception:
|
|
continue
|
|
return threads
|
|
|
|
|
|
def _get_python_threads_rich():
|
|
"""Enumerate Python threads with target + current frame. Minimal allocations."""
|
|
frames = sys._current_frames()
|
|
result = []
|
|
|
|
for t in threading.enumerate():
|
|
ident = t.ident
|
|
nid = getattr(t, "native_id", None)
|
|
|
|
# Target function info
|
|
target = getattr(t, "_target", None)
|
|
if target is not None:
|
|
tf = getattr(target, "__qualname__", getattr(target, "__name__", "?"))
|
|
tm = getattr(target, "__module__", "")
|
|
# Source file — use __code__ directly (avoids importing inspect)
|
|
tfile = ""
|
|
code = getattr(target, "__code__", None)
|
|
if code:
|
|
tfile = getattr(code, "co_filename", "")
|
|
else:
|
|
tf = "(main)" if t.name == "MainThread" else "(no target)"
|
|
tm = ""
|
|
tfile = ""
|
|
|
|
# Current stack — top 5 frames, build compact strings directly
|
|
stack = []
|
|
frame = frames.get(ident)
|
|
depth = 0
|
|
while frame is not None and depth < 5:
|
|
co = frame.f_code
|
|
fn = co.co_filename
|
|
# Shorten: last 2 path components
|
|
sep = fn.rfind("/")
|
|
if sep > 0:
|
|
sep2 = fn.rfind("/", 0, sep)
|
|
short = fn[sep2 + 1:] if sep2 >= 0 else fn
|
|
else:
|
|
short = fn
|
|
stack.append({
|
|
"file": short,
|
|
"line": frame.f_lineno,
|
|
"func": co.co_name,
|
|
})
|
|
frame = frame.f_back
|
|
depth += 1
|
|
# Release frame reference immediately
|
|
del frame
|
|
|
|
result.append({
|
|
"name": t.name,
|
|
"daemon": t.daemon,
|
|
"alive": t.is_alive(),
|
|
"ident": ident,
|
|
"native_id": nid,
|
|
"target_func": tf,
|
|
"target_module": tm,
|
|
"target_file": tfile,
|
|
"stack_top": stack,
|
|
})
|
|
|
|
# Release all frame references
|
|
del frames
|
|
return result
|
|
|
|
|
|
def _system_cpu_mem():
|
|
result = {"cpu_count": 1, "mem_total_kb": 0, "mem_available_kb": 0}
|
|
try:
|
|
with open("/proc/meminfo", "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
if line.startswith("MemTotal:"):
|
|
result["mem_total_kb"] = int(line.split()[1])
|
|
elif line.startswith("MemAvailable:"):
|
|
result["mem_available_kb"] = int(line.split()[1])
|
|
except Exception:
|
|
pass
|
|
try:
|
|
result["cpu_count"] = len(os.sched_getaffinity(0))
|
|
except Exception:
|
|
try:
|
|
result["cpu_count"] = os.cpu_count() or 1
|
|
except Exception:
|
|
pass
|
|
return result
|
|
|
|
|
|
def _read_smaps_rollup():
|
|
"""
|
|
Read /proc/self/smaps_rollup for a breakdown of what consumes RSS.
|
|
This shows: Shared_Clean, Shared_Dirty, Private_Clean, Private_Dirty,
|
|
which helps identify C extension memory vs Python heap vs mmap.
|
|
"""
|
|
result = {}
|
|
try:
|
|
with open("/proc/self/smaps_rollup", "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
key = parts[0].rstrip(":")
|
|
if key in ("Rss", "Pss", "Shared_Clean", "Shared_Dirty",
|
|
"Private_Clean", "Private_Dirty", "Referenced",
|
|
"Anonymous", "Swap", "Locked"):
|
|
result[key.lower() + "_kb"] = int(parts[1])
|
|
except Exception:
|
|
pass
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Cached tracemalloc — take snapshot at most every 5s to reduce overhead
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_tm_cache_lock = threading.Lock()
|
|
_tm_cache = None # (current, peak, by_file, by_line)
|
|
_tm_cache_time = 0.0
|
|
_TM_CACHE_TTL = 5.0 # seconds
|
|
|
|
|
|
def _get_tracemalloc_cached():
|
|
"""Return cached tracemalloc data, refreshing at most every 5s."""
|
|
global _tm_cache, _tm_cache_time
|
|
|
|
if not tracemalloc.is_tracing():
|
|
return 0, 0, [], []
|
|
|
|
now = time.monotonic()
|
|
with _tm_cache_lock:
|
|
if _tm_cache is not None and (now - _tm_cache_time) < _TM_CACHE_TTL:
|
|
return _tm_cache
|
|
|
|
# Take snapshot outside the lock (it's slow)
|
|
current, peak = tracemalloc.get_traced_memory()
|
|
snap = tracemalloc.take_snapshot()
|
|
|
|
# Single statistics call — use lineno (more useful), derive file-level client-side
|
|
stats_line = snap.statistics("lineno")[:30]
|
|
top_by_line = []
|
|
file_agg = {}
|
|
for s in stats_line:
|
|
frame = s.traceback[0] if s.traceback else None
|
|
if frame is None:
|
|
continue
|
|
fn = frame.filename
|
|
sep = fn.rfind("/")
|
|
if sep > 0:
|
|
sep2 = fn.rfind("/", 0, sep)
|
|
short = fn[sep2 + 1:] if sep2 >= 0 else fn
|
|
else:
|
|
short = fn
|
|
top_by_line.append({
|
|
"file": short,
|
|
"full_path": fn,
|
|
"line": frame.lineno,
|
|
"size_kb": round(s.size / 1024, 1),
|
|
"count": s.count,
|
|
})
|
|
# Aggregate by file
|
|
if fn not in file_agg:
|
|
file_agg[fn] = {"file": short, "full_path": fn, "size_kb": 0, "count": 0}
|
|
file_agg[fn]["size_kb"] += round(s.size / 1024, 1)
|
|
file_agg[fn]["count"] += s.count
|
|
|
|
# Also get file-level stats for files that don't appear in line-level top
|
|
stats_file = snap.statistics("filename")[:20]
|
|
for s in stats_file:
|
|
fn = str(s.traceback) if hasattr(s.traceback, '__str__') else ""
|
|
# traceback for filename stats is just the filename
|
|
raw_fn = s.traceback[0].filename if s.traceback else fn
|
|
if raw_fn not in file_agg:
|
|
sep = raw_fn.rfind("/")
|
|
if sep > 0:
|
|
sep2 = raw_fn.rfind("/", 0, sep)
|
|
short = raw_fn[sep2 + 1:] if sep2 >= 0 else raw_fn
|
|
else:
|
|
short = raw_fn
|
|
file_agg[raw_fn] = {"file": short, "full_path": raw_fn, "size_kb": 0, "count": 0}
|
|
entry = file_agg[raw_fn]
|
|
# Use the larger of aggregated or direct stats
|
|
direct_kb = round(s.size / 1024, 1)
|
|
if direct_kb > entry["size_kb"]:
|
|
entry["size_kb"] = direct_kb
|
|
if s.count > entry["count"]:
|
|
entry["count"] = s.count
|
|
|
|
top_by_file = sorted(file_agg.values(), key=lambda x: -x["size_kb"])[:20]
|
|
|
|
# Release snapshot immediately
|
|
del snap
|
|
|
|
result = (current, peak, top_by_file, top_by_line)
|
|
with _tm_cache_lock:
|
|
_tm_cache = result
|
|
_tm_cache_time = now
|
|
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Snapshot + history ring buffer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_MAX_HISTORY = 120
|
|
_history_lock = threading.Lock()
|
|
_history = []
|
|
_prev_thread_ticks = {}
|
|
_prev_proc_ticks = 0
|
|
_prev_wall = 0.0
|
|
|
|
|
|
def _take_snapshot():
|
|
global _prev_thread_ticks, _prev_proc_ticks, _prev_wall
|
|
|
|
now = time.time()
|
|
wall_delta = now - _prev_wall if _prev_wall > 0 else 1.0
|
|
tick_budget = wall_delta * _SC_CLK_TCK
|
|
|
|
# Process-level
|
|
status = _read_proc_status()
|
|
fd_open = _fd_count()
|
|
sys_info = _system_cpu_mem()
|
|
smaps = _read_smaps_rollup()
|
|
|
|
# Thread CPU from /proc
|
|
raw_threads = _read_thread_stats()
|
|
thread_details = []
|
|
new_ticks_map = {}
|
|
total_proc_ticks = 0
|
|
|
|
for t in raw_threads:
|
|
tid = t["tid"]
|
|
prev = _prev_thread_ticks.get(tid, t["cpu_ticks"])
|
|
delta = max(0, t["cpu_ticks"] - prev)
|
|
cpu_pct = (delta / tick_budget * 100.0) if tick_budget > 0 else 0.0
|
|
new_ticks_map[tid] = t["cpu_ticks"]
|
|
total_proc_ticks += t["cpu_ticks"]
|
|
thread_details.append({
|
|
"tid": tid,
|
|
"name": t["name"],
|
|
"state": t["state"],
|
|
"cpu_pct": round(cpu_pct, 2),
|
|
"cpu_ticks_total": t["cpu_ticks"],
|
|
})
|
|
|
|
thread_details.sort(key=lambda x: x["cpu_pct"], reverse=True)
|
|
|
|
proc_delta = total_proc_ticks - _prev_proc_ticks if _prev_proc_ticks else 0
|
|
proc_cpu_pct = (proc_delta / tick_budget * 100.0) if tick_budget > 0 else 0.0
|
|
|
|
_prev_thread_ticks = new_ticks_map
|
|
_prev_proc_ticks = total_proc_ticks
|
|
_prev_wall = now
|
|
|
|
# Python threads
|
|
py_threads = _get_python_threads_rich()
|
|
|
|
# Match kernel TIDs to Python threads
|
|
native_to_py = {}
|
|
for pt in py_threads:
|
|
nid = pt.get("native_id")
|
|
if nid is not None:
|
|
native_to_py[nid] = pt
|
|
|
|
for td in thread_details:
|
|
pt = native_to_py.get(td["tid"])
|
|
if pt:
|
|
td["py_name"] = pt["name"]
|
|
td["py_target"] = pt.get("target_func", "")
|
|
td["py_module"] = pt.get("target_module", "")
|
|
td["py_file"] = pt.get("target_file", "")
|
|
if pt.get("stack_top"):
|
|
top = pt["stack_top"][0]
|
|
td["py_current"] = f"{top['file']}:{top['line']} {top['func']}()"
|
|
|
|
# tracemalloc (cached, refreshes every 5s)
|
|
tm_current, tm_peak, tm_by_file, tm_by_line = _get_tracemalloc_cached()
|
|
|
|
# Open files
|
|
open_files = _read_open_files()
|
|
|
|
# Memory breakdown
|
|
rss_kb = status.get("vm_rss_kb", 0)
|
|
tm_current_kb = round(tm_current / 1024, 1)
|
|
# C/native memory = RSS - Python traced (approximation)
|
|
rss_anon_kb = status.get("rss_anon_kb", 0)
|
|
rss_file_kb = status.get("rss_file_kb", 0)
|
|
|
|
snapshot = {
|
|
"ts": round(now, 3),
|
|
"proc_cpu_pct": round(proc_cpu_pct, 2),
|
|
"rss_kb": rss_kb,
|
|
"vm_size_kb": status.get("vm_size_kb", 0),
|
|
"vm_peak_kb": status.get("vm_peak_kb", 0),
|
|
"vm_swap_kb": status.get("vm_swap_kb", 0),
|
|
"fd_open": fd_open,
|
|
"fd_slots": status.get("fd_slots", 0),
|
|
"kernel_threads": status.get("kernel_threads", 0),
|
|
"py_thread_count": len(py_threads),
|
|
"sys_cpu_count": sys_info["cpu_count"],
|
|
"sys_mem_total_kb": sys_info["mem_total_kb"],
|
|
"sys_mem_available_kb": sys_info["mem_available_kb"],
|
|
# Memory breakdown
|
|
"rss_anon_kb": rss_anon_kb,
|
|
"rss_file_kb": rss_file_kb,
|
|
"rss_shmem_kb": status.get("rss_shmem_kb", 0),
|
|
"private_dirty_kb": smaps.get("private_dirty_kb", 0),
|
|
"private_clean_kb": smaps.get("private_clean_kb", 0),
|
|
"shared_dirty_kb": smaps.get("shared_dirty_kb", 0),
|
|
"shared_clean_kb": smaps.get("shared_clean_kb", 0),
|
|
# Data
|
|
"threads": thread_details,
|
|
"py_threads": py_threads,
|
|
"tracemalloc_active": tracemalloc.is_tracing(),
|
|
"tracemalloc_current_kb": tm_current_kb,
|
|
"tracemalloc_peak_kb": round(tm_peak / 1024, 1),
|
|
"tracemalloc_by_file": tm_by_file,
|
|
"tracemalloc_by_line": tm_by_line,
|
|
"open_files": open_files,
|
|
}
|
|
|
|
with _history_lock:
|
|
_history.append({
|
|
"ts": snapshot["ts"],
|
|
"proc_cpu_pct": snapshot["proc_cpu_pct"],
|
|
"rss_kb": rss_kb,
|
|
"fd_open": fd_open,
|
|
"py_thread_count": snapshot["py_thread_count"],
|
|
"kernel_threads": snapshot["kernel_threads"],
|
|
"vm_swap_kb": snapshot["vm_swap_kb"],
|
|
"private_dirty_kb": snapshot["private_dirty_kb"],
|
|
})
|
|
if len(_history) > _MAX_HISTORY:
|
|
del _history[: len(_history) - _MAX_HISTORY]
|
|
|
|
return snapshot
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WebUtils class
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class DebugUtils:
|
|
def __init__(self, shared_data):
|
|
self.shared_data = shared_data
|
|
|
|
def get_snapshot(self, handler):
|
|
try:
|
|
data = _take_snapshot()
|
|
self._send_json(handler, data)
|
|
except Exception as exc:
|
|
logger.error(f"debug snapshot error: {exc}")
|
|
self._send_json(handler, {"error": str(exc)}, status=500)
|
|
|
|
def get_history(self, handler):
|
|
try:
|
|
with _history_lock:
|
|
data = list(_history)
|
|
self._send_json(handler, {"history": data})
|
|
except Exception as exc:
|
|
logger.error(f"debug history error: {exc}")
|
|
self._send_json(handler, {"error": str(exc)}, status=500)
|
|
|
|
def toggle_tracemalloc(self, data):
|
|
global _tm_cache, _tm_cache_time
|
|
action = data.get("action", "status")
|
|
try:
|
|
if action == "start":
|
|
if not tracemalloc.is_tracing():
|
|
tracemalloc.start(int(data.get("nframes", 10)))
|
|
return {"status": "ok", "tracing": True}
|
|
elif action == "stop":
|
|
if tracemalloc.is_tracing():
|
|
tracemalloc.stop()
|
|
with _tm_cache_lock:
|
|
_tm_cache = None
|
|
_tm_cache_time = 0.0
|
|
return {"status": "ok", "tracing": False}
|
|
else:
|
|
return {"status": "ok", "tracing": tracemalloc.is_tracing()}
|
|
except Exception as exc:
|
|
return {"status": "error", "message": str(exc)}
|
|
|
|
def get_gc_stats(self, handler):
|
|
import gc
|
|
try:
|
|
counts = gc.get_count()
|
|
thresholds = gc.get_threshold()
|
|
self._send_json(handler, {
|
|
"gc_enabled": gc.isenabled(),
|
|
"counts": {"gen0": counts[0], "gen1": counts[1], "gen2": counts[2]},
|
|
"thresholds": {"gen0": thresholds[0], "gen1": thresholds[1], "gen2": thresholds[2]},
|
|
})
|
|
except Exception as exc:
|
|
self._send_json(handler, {"error": str(exc)}, status=500)
|
|
|
|
def force_gc(self, data):
|
|
import gc
|
|
try:
|
|
return {"status": "ok", "collected": gc.collect()}
|
|
except Exception as exc:
|
|
return {"status": "error", "message": str(exc)}
|
|
|
|
@staticmethod
|
|
def _send_json(handler, data, status=200):
|
|
handler.send_response(status)
|
|
handler.send_header("Content-Type", "application/json")
|
|
handler.end_headers()
|
|
handler.wfile.write(json.dumps(data, default=str).encode("utf-8"))
|