mirror of
https://github.com/infinition/Bjorn.git
synced 2026-03-09 06:01:59 +00:00
- Implemented methods for fetching AI stats, training history, and recent experiences. - Added functionality to set operation mode (MANUAL, AUTO, AI) with appropriate handling. - Included helper methods for querying the database and sending JSON responses. - Integrated model metadata extraction for visualization purposes.
787 lines
34 KiB
Python
787 lines
34 KiB
Python
# orchestrator.py
|
|
# Action queue consumer for Bjorn - executes actions from the scheduler queue
|
|
|
|
import importlib
|
|
import time
|
|
import logging
|
|
import threading
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from init_shared import shared_data
|
|
from logger import Logger
|
|
from action_scheduler import ActionScheduler
|
|
from ai_engine import get_or_create_ai_engine, invalidate_ai_engine
|
|
from feature_logger import FeatureLogger
|
|
from data_consolidator import DataConsolidator
|
|
|
|
logger = Logger(name="orchestrator.py", level=logging.DEBUG)
|
|
|
|
|
|
class Orchestrator:
|
|
"""Orchestrator that consumes the action queue generated by the scheduler"""
|
|
|
|
def __init__(self):
|
|
self.shared_data = shared_data
|
|
self.actions = {} # Dictionary of action instances
|
|
self.network_scanner = None
|
|
self.scheduler = None
|
|
self.scheduler_thread = None
|
|
self._loop_error_backoff = 1.0
|
|
|
|
# ┌─────────────────────────────────────────────────────────┐
|
|
# │ AI / Feature-logging Components │
|
|
# └─────────────────────────────────────────────────────────┘
|
|
# feature_logger runs in AUTO and AI mode to collect training data
|
|
# from ALL automated executions.
|
|
# ai_engine + data_consolidator run only in AI mode.
|
|
self.ai_engine = None
|
|
self.data_consolidator = None
|
|
self.ai_enabled = bool(self.shared_data.operation_mode == "AI")
|
|
self._ai_server_failure_streak = 0
|
|
|
|
# FeatureLogger: active as long as the orchestrator runs (AUTO or AI)
|
|
self.feature_logger = None
|
|
if self.shared_data.operation_mode in ("AUTO", "AI"):
|
|
try:
|
|
self.feature_logger = FeatureLogger(self.shared_data)
|
|
logger.info("FeatureLogger initialized (data collection active)")
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"FeatureLogger unavailable; execution data will not be logged: {e}",
|
|
key="orch_feature_logger_init_failed",
|
|
interval_s=300.0,
|
|
)
|
|
self.feature_logger = None
|
|
|
|
if self.ai_enabled:
|
|
try:
|
|
self.ai_engine = get_or_create_ai_engine(self.shared_data)
|
|
self.data_consolidator = DataConsolidator(self.shared_data)
|
|
logger.info("AI engine + DataConsolidator initialized (AI mode)")
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"AI mode active but AI components unavailable; continuing heuristic-only: {e}",
|
|
key="orch_ai_init_failed",
|
|
interval_s=300.0,
|
|
)
|
|
self.ai_engine = None
|
|
self.data_consolidator = None
|
|
self.ai_enabled = False
|
|
|
|
# Load all available actions
|
|
self.load_actions()
|
|
logger.info(f"Actions loaded: {list(self.actions.keys())}")
|
|
|
|
def _is_enabled_value(self, value: Any) -> bool:
|
|
"""Robust parser for b_enabled values coming from DB."""
|
|
if value is None:
|
|
return True
|
|
if isinstance(value, bool):
|
|
return value
|
|
if isinstance(value, (int, float)):
|
|
return int(value) == 1
|
|
s = str(value).strip().lower()
|
|
if s in {"1", "true", "yes", "on"}:
|
|
return True
|
|
if s in {"0", "false", "no", "off"}:
|
|
return False
|
|
try:
|
|
return int(float(s)) == 1
|
|
except Exception:
|
|
return True
|
|
|
|
def _is_action_eligible_for_ai_learning(self, action_name: str) -> bool:
|
|
"""Exclude control-plane actions from AI training/reward."""
|
|
return str(action_name) not in {"NetworkScanner"}
|
|
|
|
def _update_ai_server_health(self, contact_events: List[bool]) -> None:
|
|
"""
|
|
Update consecutive AI server failure counter and fallback to AUTO when needed.
|
|
`contact_events` contains one bool per attempted contact in this cycle.
|
|
"""
|
|
if not contact_events:
|
|
return
|
|
|
|
contacted_ok = any(contact_events)
|
|
if contacted_ok:
|
|
if self._ai_server_failure_streak > 0:
|
|
logger.info("AI server contact recovered; reset failure streak")
|
|
self._ai_server_failure_streak = 0
|
|
return
|
|
|
|
self._ai_server_failure_streak += 1
|
|
max_failures = max(
|
|
1,
|
|
int(getattr(self.shared_data, "ai_server_max_failures_before_auto", 3)),
|
|
)
|
|
model_loaded = bool(getattr(self.ai_engine, "model_loaded", False))
|
|
|
|
if self.shared_data.operation_mode == "AI" and (not model_loaded):
|
|
remaining_cycles = max(0, max_failures - self._ai_server_failure_streak)
|
|
if remaining_cycles > 0:
|
|
logger.info_throttled(
|
|
f"AI server unreachable ({self._ai_server_failure_streak}/{max_failures}) and no local model loaded; "
|
|
f"AUTO fallback in {remaining_cycles} cycle(s) if server remains offline",
|
|
key="orch_ai_unreachable_no_model_pre_fallback",
|
|
interval_s=60.0,
|
|
)
|
|
|
|
if (
|
|
self.shared_data.operation_mode == "AI"
|
|
and self._ai_server_failure_streak >= max_failures
|
|
and (not model_loaded)
|
|
):
|
|
logger.warning(
|
|
f"AI server unreachable for {self._ai_server_failure_streak} consecutive cycles and no local AI model is loaded; "
|
|
"switching operation mode to AUTO (heuristics-only)"
|
|
)
|
|
self.shared_data.operation_mode = "AUTO"
|
|
self._disable_ai_components()
|
|
|
|
def load_actions(self):
|
|
"""Load all actions from database"""
|
|
actions_config = self.shared_data.get_actions_config()
|
|
|
|
for action in actions_config:
|
|
module_name = action["b_module"]
|
|
b_class = action["b_class"]
|
|
|
|
# 🔴 Skip disabled actions
|
|
if not int(action.get("b_enabled", 1)):
|
|
logger.info(f"Skipping disabled action: {b_class}")
|
|
continue
|
|
|
|
try:
|
|
# Import the module dynamically
|
|
module = importlib.import_module(f'actions.{module_name}')
|
|
|
|
# Global actions (NetworkScanner)
|
|
if module_name == "scanning":
|
|
scanner_class = getattr(module, b_class)
|
|
self.network_scanner = scanner_class(self.shared_data)
|
|
self.actions[b_class] = self.network_scanner
|
|
else:
|
|
# Normal actions
|
|
action_class = getattr(module, b_class)
|
|
action_instance = action_class(self.shared_data)
|
|
action_instance.action_name = b_class
|
|
action_instance.port = action.get("b_port")
|
|
action_instance.b_parent_action = action.get("b_parent")
|
|
self.actions[b_class] = action_instance
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load action {b_class}: {e}")
|
|
|
|
# ----------------------------------------------------------------- AI mode
|
|
|
|
def _ensure_feature_logger(self) -> None:
|
|
"""Init FeatureLogger if not yet running (called when entering AUTO or AI mode)."""
|
|
if self.feature_logger is not None:
|
|
return
|
|
try:
|
|
self.feature_logger = FeatureLogger(self.shared_data)
|
|
logger.info("FeatureLogger enabled")
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"FeatureLogger unavailable: {e}",
|
|
key="orch_feature_logger_enable_failed",
|
|
interval_s=300.0,
|
|
)
|
|
|
|
def _enable_ai_components(self) -> None:
|
|
"""Lazy-init AI-specific helpers when switching to AI mode at runtime."""
|
|
self._ensure_feature_logger()
|
|
|
|
if self.ai_engine and self.data_consolidator:
|
|
self.ai_enabled = True
|
|
return
|
|
|
|
try:
|
|
self.ai_engine = get_or_create_ai_engine(self.shared_data)
|
|
self.data_consolidator = DataConsolidator(self.shared_data)
|
|
self.ai_enabled = True
|
|
if self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)):
|
|
logger.warning(
|
|
"AI mode active but no local model loaded yet; "
|
|
"will fallback to AUTO if server stays unreachable"
|
|
)
|
|
logger.info("AI engine + DataConsolidator enabled")
|
|
except Exception as e:
|
|
self.ai_engine = None
|
|
self.data_consolidator = None
|
|
self.ai_enabled = False
|
|
logger.info_throttled(
|
|
f"AI components not available; staying heuristic-only: {e}",
|
|
key="orch_ai_enable_failed",
|
|
interval_s=300.0,
|
|
)
|
|
|
|
def _disable_ai_components(self) -> None:
|
|
"""Drop AI-specific helpers when leaving AI mode.
|
|
FeatureLogger is kept alive so AUTO mode still collects data."""
|
|
self.ai_enabled = False
|
|
self.ai_engine = None
|
|
self.data_consolidator = None
|
|
# Release cached AI engine singleton so model weights can be freed in AUTO mode.
|
|
try:
|
|
invalidate_ai_engine(self.shared_data)
|
|
except Exception:
|
|
pass
|
|
|
|
def _sync_ai_components(self) -> None:
|
|
"""Keep runtime AI helpers aligned with shared_data.operation_mode."""
|
|
mode = self.shared_data.operation_mode
|
|
if mode == "AI":
|
|
if not self.ai_enabled:
|
|
self._enable_ai_components()
|
|
else:
|
|
if self.ai_enabled:
|
|
self._disable_ai_components()
|
|
# Ensure feature_logger is alive in AUTO mode too
|
|
if mode == "AUTO":
|
|
self._ensure_feature_logger()
|
|
|
|
|
|
def start_scheduler(self):
|
|
"""Start the scheduler in background"""
|
|
if self.scheduler_thread and self.scheduler_thread.is_alive():
|
|
logger.info("ActionScheduler thread already running")
|
|
return
|
|
|
|
logger.info("Starting ActionScheduler in background...")
|
|
self.scheduler = ActionScheduler(self.shared_data)
|
|
self.scheduler_thread = threading.Thread(
|
|
target=self.scheduler.run,
|
|
daemon=True,
|
|
name="ActionScheduler"
|
|
)
|
|
self.scheduler_thread.start()
|
|
logger.info("ActionScheduler started")
|
|
|
|
def get_next_action(self) -> Optional[Dict[str, Any]]:
|
|
"""Get next action from queue"""
|
|
action = self.shared_data.db.get_next_queued_action()
|
|
if action:
|
|
logger.info(
|
|
f"Next action: {action['action_name']} for {action['mac_address']} "
|
|
f"(priority={action.get('priority_effective')})"
|
|
)
|
|
return action
|
|
|
|
def _build_host_state(self, mac_address: str) -> Dict:
|
|
"""
|
|
Build RL state dict from host data in database.
|
|
|
|
Args:
|
|
mac_address: Target MAC address
|
|
|
|
Returns:
|
|
Dict with keys: mac, ports, hostname
|
|
"""
|
|
try:
|
|
# Get host from database
|
|
host = self.shared_data.db.get_host_by_mac(mac_address)
|
|
|
|
if not host:
|
|
logger.warning(f"Host not found for MAC: {mac_address}")
|
|
return {'mac': mac_address, 'ports': [], 'hostname': ''}
|
|
|
|
# Parse ports
|
|
ports_str = host.get('ports', '')
|
|
ports = []
|
|
if ports_str:
|
|
for p in ports_str.split(';'):
|
|
p = p.strip()
|
|
if p.isdigit():
|
|
ports.append(int(p))
|
|
|
|
# Get first hostname
|
|
hostnames_str = host.get('hostnames', '')
|
|
hostname = hostnames_str.split(';')[0] if hostnames_str else ''
|
|
|
|
return {
|
|
'mac': mac_address,
|
|
'ports': ports,
|
|
'hostname': hostname
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error building host state: {e}")
|
|
return {'mac': mac_address, 'ports': [], 'hostname': ''}
|
|
|
|
def _calculate_reward(
|
|
self,
|
|
action_name: str,
|
|
success: bool,
|
|
duration: float,
|
|
mac: str,
|
|
state_before: Dict,
|
|
state_after: Dict
|
|
) -> float:
|
|
"""
|
|
Calculate reward for RL update.
|
|
|
|
Reward structure:
|
|
- Base: +50 for success, -5 for failure
|
|
- Credentials found: +100
|
|
- New services: +20 per service
|
|
- Time bonus: +20 if <30s, -10 if >120s
|
|
- New ports discovered: +15 per port
|
|
|
|
Args:
|
|
action_name: Name of action executed
|
|
success: Did action succeed?
|
|
duration: Execution time in seconds
|
|
mac: Target MAC address
|
|
state_before: State dict before action
|
|
state_after: State dict after action
|
|
|
|
Returns:
|
|
Reward value (float)
|
|
"""
|
|
if not self._is_action_eligible_for_ai_learning(action_name):
|
|
return 0.0
|
|
|
|
# Base reward
|
|
reward = 50.0 if success else -5.0
|
|
|
|
if not success:
|
|
# Penalize time waste on failure
|
|
reward -= (duration * 0.1)
|
|
return reward
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Check for credentials found (high value!)
|
|
# ─────────────────────────────────────────────────────────
|
|
try:
|
|
recent_creds = self.shared_data.db.query("""
|
|
SELECT COUNT(*) as cnt FROM creds
|
|
WHERE mac_address=?
|
|
AND first_seen > datetime('now', '-1 minute')
|
|
""", (mac,))
|
|
|
|
if recent_creds and recent_creds[0]['cnt'] > 0:
|
|
creds_count = recent_creds[0]['cnt']
|
|
reward += 100 * creds_count # 100 per credential!
|
|
logger.info(f"RL: +{100*creds_count} reward for {creds_count} credentials")
|
|
except Exception as e:
|
|
logger.error(f"Error checking credentials: {e}")
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Check for new services discovered
|
|
# ─────────────────────────────────────────────────────────
|
|
try:
|
|
# Compare ports before/after
|
|
ports_before = set(state_before.get('ports', []))
|
|
ports_after = set(state_after.get('ports', []))
|
|
new_ports = ports_after - ports_before
|
|
|
|
if new_ports:
|
|
reward += 15 * len(new_ports)
|
|
logger.info(f"RL: +{15*len(new_ports)} reward for {len(new_ports)} new ports")
|
|
except Exception as e:
|
|
logger.error(f"Error checking new ports: {e}")
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Time efficiency bonus/penalty
|
|
# ─────────────────────────────────────────────────────────
|
|
if duration < 30:
|
|
reward += 20 # Fast execution bonus
|
|
elif duration > 120:
|
|
reward -= 10 # Slow execution penalty
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Action-specific bonuses
|
|
# ─────────────────────────────────────────────────────────
|
|
if action_name == "SSHBruteforce" and success:
|
|
# Extra bonus for SSH success (difficult action)
|
|
reward += 30
|
|
|
|
logger.debug(f"RL Reward calculated: {reward:.1f} for {action_name}")
|
|
return reward
|
|
|
|
def execute_queued_action(self, queued_action: Dict[str, Any]) -> bool:
|
|
"""Execute a single queued action with RL integration"""
|
|
queue_id = queued_action['id']
|
|
action_name = queued_action['action_name']
|
|
mac = queued_action['mac_address']
|
|
ip = queued_action['ip']
|
|
port = queued_action['port']
|
|
|
|
# Parse metadata once — used throughout this function
|
|
metadata = json.loads(queued_action.get('metadata', '{}'))
|
|
source = str(metadata.get('decision_method', 'unknown'))
|
|
source_label = f"[{source.upper()}]" if source != 'unknown' else ""
|
|
|
|
decision_origin = str(metadata.get('decision_origin', 'unknown'))
|
|
ai_confidence = metadata.get('ai_confidence')
|
|
ai_threshold = metadata.get('ai_threshold', getattr(self.shared_data, "ai_confirm_threshold", 0.3))
|
|
ai_reason = str(metadata.get('ai_reason', 'n/a'))
|
|
ai_method = metadata.get('ai_method')
|
|
if not ai_method:
|
|
ai_method = (metadata.get('ai_debug') or {}).get('method')
|
|
ai_method = str(ai_method or 'n/a')
|
|
ai_model_loaded = bool(metadata.get('ai_model_loaded', bool(getattr(self.ai_engine, "model_loaded", False)) if self.ai_engine else False))
|
|
decision_scope = str(metadata.get('decision_scope', 'unknown'))
|
|
|
|
exec_payload = {
|
|
"action": action_name,
|
|
"target": ip,
|
|
"port": port,
|
|
"decision_method": source,
|
|
"decision_origin": decision_origin,
|
|
"decision_scope": decision_scope,
|
|
"ai_method": ai_method,
|
|
"ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None,
|
|
"ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None,
|
|
"ai_model_loaded": ai_model_loaded,
|
|
"ai_reason": ai_reason,
|
|
}
|
|
|
|
logger.info(f"Executing {source_label}: {action_name} for {ip}:{port}")
|
|
logger.info(f"[DECISION_EXEC] {json.dumps(exec_payload)}")
|
|
|
|
# Guard rail: stale queue rows can exist for disabled or not-loaded actions.
|
|
try:
|
|
action_row = self.shared_data.db.get_action_by_class(action_name)
|
|
if action_row and not self._is_enabled_value(action_row.get("b_enabled", 1)):
|
|
self.shared_data.db.update_queue_status(
|
|
queue_id,
|
|
'cancelled',
|
|
f"Action {action_name} disabled (b_enabled=0)",
|
|
)
|
|
logger.info(f"Skipping queued disabled action: {action_name}")
|
|
return False
|
|
except Exception as e:
|
|
logger.debug(f"Could not verify b_enabled for {action_name}: {e}")
|
|
|
|
if action_name not in self.actions:
|
|
self.shared_data.db.update_queue_status(
|
|
queue_id,
|
|
'cancelled',
|
|
f"Action {action_name} not loaded",
|
|
)
|
|
logger.warning(f"Skipping queued action not loaded: {action_name}")
|
|
return False
|
|
|
|
# ┌─────────────────────────────────────────────────────────┐
|
|
# │ STEP 1: Capture state BEFORE action (all modes) │
|
|
# └─────────────────────────────────────────────────────────┘
|
|
state_before = None
|
|
if self.feature_logger:
|
|
try:
|
|
state_before = self._build_host_state(mac)
|
|
logger.debug(f"State before captured for {mac}")
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"State capture skipped: {e}",
|
|
key="orch_state_before_failed",
|
|
interval_s=120.0,
|
|
)
|
|
|
|
# Update status to running
|
|
self.shared_data.db.update_queue_status(queue_id, 'running')
|
|
|
|
# ┌─────────────────────────────────────────────────────────┐
|
|
# │ EXECUTE ACTION (existing code) │
|
|
# └─────────────────────────────────────────────────────────┘
|
|
start_time = time.time()
|
|
success = False
|
|
|
|
try:
|
|
action = self.actions[action_name]
|
|
|
|
# Prepare row data for compatibility
|
|
row = {
|
|
"MAC Address": mac,
|
|
"IPs": ip,
|
|
"Ports": str(port) if port else "",
|
|
"Alive": 1
|
|
}
|
|
|
|
# Prepare status details
|
|
if ip and ip != "0.0.0.0":
|
|
port_str = str(port).strip() if port is not None else ""
|
|
has_port = bool(port_str) and port_str.lower() != "none"
|
|
target_display = f"{ip}:{port_str}" if has_port else ip
|
|
status_msg = f"{action_name} on {ip}"
|
|
details = f"Target: {target_display}"
|
|
self.shared_data.action_target_ip = target_display
|
|
else:
|
|
status_msg = f"{action_name} (Global)"
|
|
details = "Scanning network..."
|
|
self.shared_data.action_target_ip = ""
|
|
|
|
# Update shared status for display
|
|
self.shared_data.bjorn_orch_status = action_name
|
|
self.shared_data.bjorn_status_text2 = self.shared_data.action_target_ip or ip
|
|
|
|
self.shared_data.update_status(status_msg, details)
|
|
|
|
# --- AI Dashboard Metadata (AI mode only) ---
|
|
if (
|
|
self.ai_enabled
|
|
and self.shared_data.operation_mode == "AI"
|
|
and self._is_action_eligible_for_ai_learning(action_name)
|
|
):
|
|
decision_method = metadata.get('decision_method', 'heuristic')
|
|
self.shared_data.active_action = action_name
|
|
self.shared_data.last_decision_method = decision_method
|
|
self.shared_data.last_ai_decision = metadata.get('ai_debug', {})
|
|
ai_exec_payload = {
|
|
"action": action_name,
|
|
"method": decision_method,
|
|
"origin": decision_origin,
|
|
"target": ip,
|
|
"ai_method": ai_method,
|
|
"ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None,
|
|
"ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None,
|
|
"ai_model_loaded": ai_model_loaded,
|
|
"reason": ai_reason,
|
|
}
|
|
logger.info(f"[AI_EXEC] {json.dumps(ai_exec_payload)}")
|
|
|
|
# Check if global action (metadata already parsed above)
|
|
if metadata.get('is_global') and hasattr(action, 'scan'):
|
|
# Execute global scan
|
|
action.scan()
|
|
result = 'success'
|
|
else:
|
|
# Execute targeted action
|
|
result = action.execute(
|
|
ip,
|
|
str(port) if port else "",
|
|
row,
|
|
action_name
|
|
)
|
|
|
|
# Determine success
|
|
success = (result == 'success')
|
|
|
|
# Update queue status based on result
|
|
if success:
|
|
self.shared_data.db.update_queue_status(queue_id, 'success')
|
|
logger.success(f"Action {action_name} completed successfully for {ip}")
|
|
else:
|
|
self.shared_data.db.update_queue_status(queue_id, 'failed')
|
|
logger.warning(f"Action {action_name} failed for {ip}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error executing action {action_name}: {e}")
|
|
self.shared_data.db.update_queue_status(queue_id, 'failed', str(e))
|
|
success = False
|
|
|
|
finally:
|
|
if (
|
|
self.ai_enabled
|
|
and self.shared_data.operation_mode == "AI"
|
|
and self._is_action_eligible_for_ai_learning(action_name)
|
|
):
|
|
ai_done_payload = {
|
|
"action": action_name,
|
|
"success": bool(success),
|
|
"method": source,
|
|
"origin": decision_origin,
|
|
}
|
|
logger.info(f"[AI_DONE] {json.dumps(ai_done_payload)}")
|
|
self.shared_data.active_action = None
|
|
|
|
# Clear status text
|
|
self.shared_data.bjorn_status_text2 = ""
|
|
self.shared_data.action_target_ip = ""
|
|
# Reset Status to Thinking/Idle
|
|
self.shared_data.update_status("Thinking", "Processing results...")
|
|
|
|
duration = time.time() - start_time
|
|
|
|
# ┌─────────────────────────────────────────────────────────┐
|
|
# │ STEP 2: Log execution features (AUTO + AI modes) │
|
|
# └─────────────────────────────────────────────────────────┘
|
|
if self.feature_logger and state_before and self._is_action_eligible_for_ai_learning(action_name):
|
|
try:
|
|
reward = self._calculate_reward(
|
|
action_name=action_name,
|
|
success=success,
|
|
duration=duration,
|
|
mac=mac,
|
|
state_before=state_before,
|
|
state_after=self._build_host_state(mac),
|
|
)
|
|
|
|
self.feature_logger.log_action_execution(
|
|
mac_address=mac,
|
|
ip_address=ip,
|
|
action_name=action_name,
|
|
success=success,
|
|
duration=duration,
|
|
reward=reward,
|
|
raw_event={
|
|
'port': port,
|
|
'action': action_name,
|
|
'queue_id': queue_id,
|
|
# metadata already parsed — no second json.loads
|
|
'metadata': metadata,
|
|
# Tag decision source so the training pipeline can weight
|
|
# human choices (MANUAL would be logged if orchestrator
|
|
# ever ran in that mode) vs automated ones.
|
|
'decision_source': self.shared_data.operation_mode,
|
|
'human_override': False,
|
|
},
|
|
)
|
|
|
|
logger.debug(f"Features logged for {action_name} (mode={self.shared_data.operation_mode})")
|
|
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"Feature logging skipped: {e}",
|
|
key="orch_feature_log_failed",
|
|
interval_s=120.0,
|
|
)
|
|
elif self.feature_logger and state_before:
|
|
logger.debug(f"Feature logging disabled for {action_name} (excluded from AI learning)")
|
|
|
|
return success
|
|
|
|
def run(self):
|
|
"""Main loop: start scheduler and consume queue"""
|
|
|
|
# Start the scheduler
|
|
self.start_scheduler()
|
|
|
|
# Wait for scheduler initialization
|
|
time.sleep(2)
|
|
|
|
# Main execution loop
|
|
idle_time = 0
|
|
consecutive_idle_logs = 0
|
|
self._last_background_task = 0
|
|
|
|
while not self.shared_data.orchestrator_should_exit:
|
|
try:
|
|
# Allow live mode switching from the UI without restarting the process.
|
|
self._sync_ai_components()
|
|
|
|
# Get next action from queue
|
|
next_action = self.get_next_action()
|
|
|
|
if next_action:
|
|
# Reset idle counters
|
|
idle_time = 0
|
|
consecutive_idle_logs = 0
|
|
self._loop_error_backoff = 1.0
|
|
|
|
# Execute the action
|
|
self.execute_queued_action(next_action)
|
|
else:
|
|
# IDLE mode
|
|
idle_time += 1
|
|
|
|
self.shared_data.bjorn_orch_status = "IDLE"
|
|
self.shared_data.bjorn_status_text2 = ""
|
|
self.shared_data.action_target_ip = ""
|
|
|
|
# Log periodically (less spam)
|
|
if idle_time % 30 == 0: # Every 30 seconds
|
|
consecutive_idle_logs += 1
|
|
if consecutive_idle_logs <= 3: # Limit consecutive logs
|
|
logger.debug(f"Queue empty, idling... ({idle_time}s)")
|
|
|
|
# Event-driven wait (max 5s to check for exit signals)
|
|
self.shared_data.queue_event.wait(timeout=5)
|
|
self.shared_data.queue_event.clear()
|
|
|
|
# Periodically process background tasks (even if busy)
|
|
current_time = time.time()
|
|
sync_interval = int(getattr(self.shared_data, "ai_sync_interval", 60))
|
|
if current_time - self._last_background_task > sync_interval:
|
|
self._process_background_tasks()
|
|
self._last_background_task = current_time
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in orchestrator loop: {e}")
|
|
time.sleep(self._loop_error_backoff)
|
|
self._loop_error_backoff = min(self._loop_error_backoff * 2.0, 10.0)
|
|
|
|
# Cleanup on exit (OUTSIDE while loop)
|
|
if self.scheduler:
|
|
self.scheduler.stop()
|
|
self.shared_data.queue_event.set()
|
|
if self.scheduler_thread and self.scheduler_thread.is_alive():
|
|
self.scheduler_thread.join(timeout=10.0)
|
|
if self.scheduler_thread.is_alive():
|
|
logger.warning("ActionScheduler thread did not exit cleanly")
|
|
|
|
logger.info("Orchestrator stopped")
|
|
|
|
def _process_background_tasks(self):
|
|
"""Run periodic tasks like consolidation, upload retries, and model updates (AI mode only)."""
|
|
if not (self.ai_enabled and self.shared_data.operation_mode == "AI"):
|
|
return
|
|
|
|
ai_server_contact_events: List[bool] = []
|
|
|
|
try:
|
|
# Consolidate features
|
|
batch_size = int(getattr(self.shared_data, "ai_batch_size", 100))
|
|
max_batches = max(1, int(getattr(self.shared_data, "ai_consolidation_max_batches", 2)))
|
|
stats = self.data_consolidator.consolidate_features(
|
|
batch_size=batch_size,
|
|
max_batches=max_batches,
|
|
)
|
|
|
|
if stats.get("records_processed", 0) > 0:
|
|
logger.info(f"AI Consolidation: {stats['records_processed']} records processed")
|
|
logger.debug(f"DEBUG STATS: {stats}")
|
|
|
|
# Auto-export after consolidation
|
|
max_export_records = max(100, int(getattr(self.shared_data, "ai_export_max_records", 1000)))
|
|
filepath, count = self.data_consolidator.export_for_training(
|
|
format="csv",
|
|
compress=True,
|
|
max_records=max_export_records,
|
|
)
|
|
if filepath:
|
|
logger.info(f"AI export ready: {count} records -> {filepath}")
|
|
self.data_consolidator.upload_to_server(filepath)
|
|
if getattr(self.data_consolidator, "last_server_attempted", False):
|
|
ai_server_contact_events.append(
|
|
bool(getattr(self.data_consolidator, "last_server_contact_ok", False))
|
|
)
|
|
|
|
# Always retry any pending uploads when the server comes back.
|
|
self.data_consolidator.flush_pending_uploads(max_files=2)
|
|
if getattr(self.data_consolidator, "last_server_attempted", False):
|
|
ai_server_contact_events.append(
|
|
bool(getattr(self.data_consolidator, "last_server_contact_ok", False))
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.info_throttled(
|
|
f"AI background tasks skipped: {e}",
|
|
key="orch_ai_background_failed",
|
|
interval_s=120.0,
|
|
)
|
|
|
|
# Check for model updates (tolerant when server is offline)
|
|
try:
|
|
if self.ai_engine and self.ai_engine.check_for_updates():
|
|
logger.info("AI model updated from server")
|
|
if self.ai_engine and getattr(self.ai_engine, "last_server_attempted", False):
|
|
ai_server_contact_events.append(
|
|
bool(getattr(self.ai_engine, "last_server_contact_ok", False))
|
|
)
|
|
elif self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)):
|
|
# No model loaded and no successful server contact path this cycle.
|
|
ai_server_contact_events.append(False)
|
|
except Exception as e:
|
|
logger.debug(f"AI model update check skipped: {e}")
|
|
|
|
self._update_ai_server_health(ai_server_contact_events)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
orchestrator = Orchestrator()
|
|
orchestrator.run()
|