# orchestrator.py # Action queue consumer for Bjorn - executes actions from the scheduler queue import importlib import time import logging import threading import json from datetime import datetime, timedelta from typing import Any, Dict, List, Optional from init_shared import shared_data from logger import Logger from action_scheduler import ActionScheduler from ai_engine import get_or_create_ai_engine, invalidate_ai_engine from feature_logger import FeatureLogger from data_consolidator import DataConsolidator logger = Logger(name="orchestrator.py", level=logging.DEBUG) class Orchestrator: """Orchestrator that consumes the action queue generated by the scheduler""" def __init__(self): self.shared_data = shared_data self.actions = {} # Dictionary of action instances self.network_scanner = None self.scheduler = None self.scheduler_thread = None self._loop_error_backoff = 1.0 # ┌─────────────────────────────────────────────────────────┐ # │ AI / Feature-logging Components │ # └─────────────────────────────────────────────────────────┘ # feature_logger runs in AUTO and AI mode to collect training data # from ALL automated executions. # ai_engine + data_consolidator run only in AI mode. self.ai_engine = None self.data_consolidator = None self.ai_enabled = bool(self.shared_data.operation_mode == "AI") self._ai_server_failure_streak = 0 # FeatureLogger: active as long as the orchestrator runs (AUTO or AI) self.feature_logger = None if self.shared_data.operation_mode in ("AUTO", "AI"): try: self.feature_logger = FeatureLogger(self.shared_data) logger.info("FeatureLogger initialized (data collection active)") except Exception as e: logger.info_throttled( f"FeatureLogger unavailable; execution data will not be logged: {e}", key="orch_feature_logger_init_failed", interval_s=300.0, ) self.feature_logger = None if self.ai_enabled: try: self.ai_engine = get_or_create_ai_engine(self.shared_data) self.data_consolidator = DataConsolidator(self.shared_data) logger.info("AI engine + DataConsolidator initialized (AI mode)") except Exception as e: logger.info_throttled( f"AI mode active but AI components unavailable; continuing heuristic-only: {e}", key="orch_ai_init_failed", interval_s=300.0, ) self.ai_engine = None self.data_consolidator = None self.ai_enabled = False # Load all available actions self.load_actions() logger.info(f"Actions loaded: {list(self.actions.keys())}") def _is_enabled_value(self, value: Any) -> bool: """Robust parser for b_enabled values coming from DB.""" if value is None: return True if isinstance(value, bool): return value if isinstance(value, (int, float)): return int(value) == 1 s = str(value).strip().lower() if s in {"1", "true", "yes", "on"}: return True if s in {"0", "false", "no", "off"}: return False try: return int(float(s)) == 1 except Exception: return True def _is_action_eligible_for_ai_learning(self, action_name: str) -> bool: """Exclude control-plane actions from AI training/reward.""" return str(action_name) not in {"NetworkScanner"} def _update_ai_server_health(self, contact_events: List[bool]) -> None: """ Update consecutive AI server failure counter and fallback to AUTO when needed. `contact_events` contains one bool per attempted contact in this cycle. """ if not contact_events: return contacted_ok = any(contact_events) if contacted_ok: if self._ai_server_failure_streak > 0: logger.info("AI server contact recovered; reset failure streak") self._ai_server_failure_streak = 0 return self._ai_server_failure_streak += 1 max_failures = max( 1, int(getattr(self.shared_data, "ai_server_max_failures_before_auto", 3)), ) model_loaded = bool(getattr(self.ai_engine, "model_loaded", False)) if self.shared_data.operation_mode == "AI" and (not model_loaded): remaining_cycles = max(0, max_failures - self._ai_server_failure_streak) if remaining_cycles > 0: logger.info_throttled( f"AI server unreachable ({self._ai_server_failure_streak}/{max_failures}) and no local model loaded; " f"AUTO fallback in {remaining_cycles} cycle(s) if server remains offline", key="orch_ai_unreachable_no_model_pre_fallback", interval_s=60.0, ) if ( self.shared_data.operation_mode == "AI" and self._ai_server_failure_streak >= max_failures and (not model_loaded) ): logger.warning( f"AI server unreachable for {self._ai_server_failure_streak} consecutive cycles and no local AI model is loaded; " "switching operation mode to AUTO (heuristics-only)" ) self.shared_data.operation_mode = "AUTO" self._disable_ai_components() def load_actions(self): """Load all actions from database""" actions_config = self.shared_data.get_actions_config() for action in actions_config: module_name = action["b_module"] b_class = action["b_class"] # 🔴 Skip disabled actions if not int(action.get("b_enabled", 1)): logger.info(f"Skipping disabled action: {b_class}") continue try: # Import the module dynamically module = importlib.import_module(f'actions.{module_name}') # Global actions (NetworkScanner) if module_name == "scanning": scanner_class = getattr(module, b_class) self.network_scanner = scanner_class(self.shared_data) self.actions[b_class] = self.network_scanner else: # Normal actions action_class = getattr(module, b_class) action_instance = action_class(self.shared_data) action_instance.action_name = b_class action_instance.port = action.get("b_port") action_instance.b_parent_action = action.get("b_parent") self.actions[b_class] = action_instance except Exception as e: logger.error(f"Failed to load action {b_class}: {e}") # ----------------------------------------------------------------- AI mode def _ensure_feature_logger(self) -> None: """Init FeatureLogger if not yet running (called when entering AUTO or AI mode).""" if self.feature_logger is not None: return try: self.feature_logger = FeatureLogger(self.shared_data) logger.info("FeatureLogger enabled") except Exception as e: logger.info_throttled( f"FeatureLogger unavailable: {e}", key="orch_feature_logger_enable_failed", interval_s=300.0, ) def _enable_ai_components(self) -> None: """Lazy-init AI-specific helpers when switching to AI mode at runtime.""" self._ensure_feature_logger() if self.ai_engine and self.data_consolidator: self.ai_enabled = True return try: self.ai_engine = get_or_create_ai_engine(self.shared_data) self.data_consolidator = DataConsolidator(self.shared_data) self.ai_enabled = True if self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)): logger.warning( "AI mode active but no local model loaded yet; " "will fallback to AUTO if server stays unreachable" ) logger.info("AI engine + DataConsolidator enabled") except Exception as e: self.ai_engine = None self.data_consolidator = None self.ai_enabled = False logger.info_throttled( f"AI components not available; staying heuristic-only: {e}", key="orch_ai_enable_failed", interval_s=300.0, ) def _disable_ai_components(self) -> None: """Drop AI-specific helpers when leaving AI mode. FeatureLogger is kept alive so AUTO mode still collects data.""" self.ai_enabled = False self.ai_engine = None self.data_consolidator = None # Release cached AI engine singleton so model weights can be freed in AUTO mode. try: invalidate_ai_engine(self.shared_data) except Exception: pass def _sync_ai_components(self) -> None: """Keep runtime AI helpers aligned with shared_data.operation_mode.""" mode = self.shared_data.operation_mode if mode == "AI": if not self.ai_enabled: self._enable_ai_components() else: if self.ai_enabled: self._disable_ai_components() # Ensure feature_logger is alive in AUTO mode too if mode == "AUTO": self._ensure_feature_logger() def start_scheduler(self): """Start the scheduler in background""" if self.scheduler_thread and self.scheduler_thread.is_alive(): logger.info("ActionScheduler thread already running") return logger.info("Starting ActionScheduler in background...") self.scheduler = ActionScheduler(self.shared_data) self.scheduler_thread = threading.Thread( target=self.scheduler.run, daemon=True, name="ActionScheduler" ) self.scheduler_thread.start() logger.info("ActionScheduler started") def get_next_action(self) -> Optional[Dict[str, Any]]: """Get next action from queue""" action = self.shared_data.db.get_next_queued_action() if action: logger.info( f"Next action: {action['action_name']} for {action['mac_address']} " f"(priority={action.get('priority_effective')})" ) return action def _build_host_state(self, mac_address: str) -> Dict: """ Build RL state dict from host data in database. Args: mac_address: Target MAC address Returns: Dict with keys: mac, ports, hostname """ try: # Get host from database host = self.shared_data.db.get_host_by_mac(mac_address) if not host: logger.warning(f"Host not found for MAC: {mac_address}") return {'mac': mac_address, 'ports': [], 'hostname': ''} # Parse ports ports_str = host.get('ports', '') ports = [] if ports_str: for p in ports_str.split(';'): p = p.strip() if p.isdigit(): ports.append(int(p)) # Get first hostname hostnames_str = host.get('hostnames', '') hostname = hostnames_str.split(';')[0] if hostnames_str else '' return { 'mac': mac_address, 'ports': ports, 'hostname': hostname } except Exception as e: logger.error(f"Error building host state: {e}") return {'mac': mac_address, 'ports': [], 'hostname': ''} def _calculate_reward( self, action_name: str, success: bool, duration: float, mac: str, state_before: Dict, state_after: Dict ) -> float: """ Calculate reward for RL update. Reward structure: - Base: +50 for success, -5 for failure - Credentials found: +100 - New services: +20 per service - Time bonus: +20 if <30s, -10 if >120s - New ports discovered: +15 per port Args: action_name: Name of action executed success: Did action succeed? duration: Execution time in seconds mac: Target MAC address state_before: State dict before action state_after: State dict after action Returns: Reward value (float) """ if not self._is_action_eligible_for_ai_learning(action_name): return 0.0 # Base reward reward = 50.0 if success else -5.0 if not success: # Penalize time waste on failure reward -= (duration * 0.1) return reward # ───────────────────────────────────────────────────────── # Check for credentials found (high value!) # ───────────────────────────────────────────────────────── try: recent_creds = self.shared_data.db.query(""" SELECT COUNT(*) as cnt FROM creds WHERE mac_address=? AND first_seen > datetime('now', '-1 minute') """, (mac,)) if recent_creds and recent_creds[0]['cnt'] > 0: creds_count = recent_creds[0]['cnt'] reward += 100 * creds_count # 100 per credential! logger.info(f"RL: +{100*creds_count} reward for {creds_count} credentials") except Exception as e: logger.error(f"Error checking credentials: {e}") # ───────────────────────────────────────────────────────── # Check for new services discovered # ───────────────────────────────────────────────────────── try: # Compare ports before/after ports_before = set(state_before.get('ports', [])) ports_after = set(state_after.get('ports', [])) new_ports = ports_after - ports_before if new_ports: reward += 15 * len(new_ports) logger.info(f"RL: +{15*len(new_ports)} reward for {len(new_ports)} new ports") except Exception as e: logger.error(f"Error checking new ports: {e}") # ───────────────────────────────────────────────────────── # Time efficiency bonus/penalty # ───────────────────────────────────────────────────────── if duration < 30: reward += 20 # Fast execution bonus elif duration > 120: reward -= 10 # Slow execution penalty # ───────────────────────────────────────────────────────── # Action-specific bonuses # ───────────────────────────────────────────────────────── if action_name == "SSHBruteforce" and success: # Extra bonus for SSH success (difficult action) reward += 30 logger.debug(f"RL Reward calculated: {reward:.1f} for {action_name}") return reward def execute_queued_action(self, queued_action: Dict[str, Any]) -> bool: """Execute a single queued action with RL integration""" queue_id = queued_action['id'] action_name = queued_action['action_name'] mac = queued_action['mac_address'] ip = queued_action['ip'] port = queued_action['port'] # Parse metadata once — used throughout this function metadata = json.loads(queued_action.get('metadata', '{}')) source = str(metadata.get('decision_method', 'unknown')) source_label = f"[{source.upper()}]" if source != 'unknown' else "" decision_origin = str(metadata.get('decision_origin', 'unknown')) ai_confidence = metadata.get('ai_confidence') ai_threshold = metadata.get('ai_threshold', getattr(self.shared_data, "ai_confirm_threshold", 0.3)) ai_reason = str(metadata.get('ai_reason', 'n/a')) ai_method = metadata.get('ai_method') if not ai_method: ai_method = (metadata.get('ai_debug') or {}).get('method') ai_method = str(ai_method or 'n/a') ai_model_loaded = bool(metadata.get('ai_model_loaded', bool(getattr(self.ai_engine, "model_loaded", False)) if self.ai_engine else False)) decision_scope = str(metadata.get('decision_scope', 'unknown')) exec_payload = { "action": action_name, "target": ip, "port": port, "decision_method": source, "decision_origin": decision_origin, "decision_scope": decision_scope, "ai_method": ai_method, "ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None, "ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None, "ai_model_loaded": ai_model_loaded, "ai_reason": ai_reason, } logger.info(f"Executing {source_label}: {action_name} for {ip}:{port}") logger.info(f"[DECISION_EXEC] {json.dumps(exec_payload)}") # Guard rail: stale queue rows can exist for disabled or not-loaded actions. try: action_row = self.shared_data.db.get_action_by_class(action_name) if action_row and not self._is_enabled_value(action_row.get("b_enabled", 1)): self.shared_data.db.update_queue_status( queue_id, 'cancelled', f"Action {action_name} disabled (b_enabled=0)", ) logger.info(f"Skipping queued disabled action: {action_name}") return False except Exception as e: logger.debug(f"Could not verify b_enabled for {action_name}: {e}") if action_name not in self.actions: self.shared_data.db.update_queue_status( queue_id, 'cancelled', f"Action {action_name} not loaded", ) logger.warning(f"Skipping queued action not loaded: {action_name}") return False # ┌─────────────────────────────────────────────────────────┐ # │ STEP 1: Capture state BEFORE action (all modes) │ # └─────────────────────────────────────────────────────────┘ state_before = None if self.feature_logger: try: state_before = self._build_host_state(mac) logger.debug(f"State before captured for {mac}") except Exception as e: logger.info_throttled( f"State capture skipped: {e}", key="orch_state_before_failed", interval_s=120.0, ) # Update status to running self.shared_data.db.update_queue_status(queue_id, 'running') # ┌─────────────────────────────────────────────────────────┐ # │ EXECUTE ACTION (existing code) │ # └─────────────────────────────────────────────────────────┘ start_time = time.time() success = False try: action = self.actions[action_name] # Prepare row data for compatibility row = { "MAC Address": mac, "IPs": ip, "Ports": str(port) if port else "", "Alive": 1 } # Prepare status details if ip and ip != "0.0.0.0": port_str = str(port).strip() if port is not None else "" has_port = bool(port_str) and port_str.lower() != "none" target_display = f"{ip}:{port_str}" if has_port else ip status_msg = f"{action_name} on {ip}" details = f"Target: {target_display}" self.shared_data.action_target_ip = target_display else: status_msg = f"{action_name} (Global)" details = "Scanning network..." self.shared_data.action_target_ip = "" # Update shared status for display self.shared_data.bjorn_orch_status = action_name self.shared_data.bjorn_status_text2 = self.shared_data.action_target_ip or ip self.shared_data.update_status(status_msg, details) # --- AI Dashboard Metadata (AI mode only) --- if ( self.ai_enabled and self.shared_data.operation_mode == "AI" and self._is_action_eligible_for_ai_learning(action_name) ): decision_method = metadata.get('decision_method', 'heuristic') self.shared_data.active_action = action_name self.shared_data.last_decision_method = decision_method self.shared_data.last_ai_decision = metadata.get('ai_debug', {}) ai_exec_payload = { "action": action_name, "method": decision_method, "origin": decision_origin, "target": ip, "ai_method": ai_method, "ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None, "ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None, "ai_model_loaded": ai_model_loaded, "reason": ai_reason, } logger.info(f"[AI_EXEC] {json.dumps(ai_exec_payload)}") # Check if global action (metadata already parsed above) if metadata.get('is_global') and hasattr(action, 'scan'): # Execute global scan action.scan() result = 'success' else: # Execute targeted action result = action.execute( ip, str(port) if port else "", row, action_name ) # Determine success success = (result == 'success') # Update queue status based on result if success: self.shared_data.db.update_queue_status(queue_id, 'success') logger.success(f"Action {action_name} completed successfully for {ip}") else: self.shared_data.db.update_queue_status(queue_id, 'failed') logger.warning(f"Action {action_name} failed for {ip}") except Exception as e: logger.error(f"Error executing action {action_name}: {e}") self.shared_data.db.update_queue_status(queue_id, 'failed', str(e)) success = False finally: if ( self.ai_enabled and self.shared_data.operation_mode == "AI" and self._is_action_eligible_for_ai_learning(action_name) ): ai_done_payload = { "action": action_name, "success": bool(success), "method": source, "origin": decision_origin, } logger.info(f"[AI_DONE] {json.dumps(ai_done_payload)}") self.shared_data.active_action = None # Clear status text self.shared_data.bjorn_status_text2 = "" self.shared_data.action_target_ip = "" # Reset Status to Thinking/Idle self.shared_data.update_status("Thinking", "Processing results...") duration = time.time() - start_time # ┌─────────────────────────────────────────────────────────┐ # │ STEP 2: Log execution features (AUTO + AI modes) │ # └─────────────────────────────────────────────────────────┘ if self.feature_logger and state_before and self._is_action_eligible_for_ai_learning(action_name): try: reward = self._calculate_reward( action_name=action_name, success=success, duration=duration, mac=mac, state_before=state_before, state_after=self._build_host_state(mac), ) self.feature_logger.log_action_execution( mac_address=mac, ip_address=ip, action_name=action_name, success=success, duration=duration, reward=reward, raw_event={ 'port': port, 'action': action_name, 'queue_id': queue_id, # metadata already parsed — no second json.loads 'metadata': metadata, # Tag decision source so the training pipeline can weight # human choices (MANUAL would be logged if orchestrator # ever ran in that mode) vs automated ones. 'decision_source': self.shared_data.operation_mode, 'human_override': False, }, ) logger.debug(f"Features logged for {action_name} (mode={self.shared_data.operation_mode})") except Exception as e: logger.info_throttled( f"Feature logging skipped: {e}", key="orch_feature_log_failed", interval_s=120.0, ) elif self.feature_logger and state_before: logger.debug(f"Feature logging disabled for {action_name} (excluded from AI learning)") return success def run(self): """Main loop: start scheduler and consume queue""" # Start the scheduler self.start_scheduler() # Wait for scheduler initialization time.sleep(2) # Main execution loop idle_time = 0 consecutive_idle_logs = 0 self._last_background_task = 0 while not self.shared_data.orchestrator_should_exit: try: # Allow live mode switching from the UI without restarting the process. self._sync_ai_components() # Get next action from queue next_action = self.get_next_action() if next_action: # Reset idle counters idle_time = 0 consecutive_idle_logs = 0 self._loop_error_backoff = 1.0 # Execute the action self.execute_queued_action(next_action) else: # IDLE mode idle_time += 1 self.shared_data.bjorn_orch_status = "IDLE" self.shared_data.bjorn_status_text2 = "" self.shared_data.action_target_ip = "" # Log periodically (less spam) if idle_time % 30 == 0: # Every 30 seconds consecutive_idle_logs += 1 if consecutive_idle_logs <= 3: # Limit consecutive logs logger.debug(f"Queue empty, idling... ({idle_time}s)") # Event-driven wait (max 5s to check for exit signals) self.shared_data.queue_event.wait(timeout=5) self.shared_data.queue_event.clear() # Periodically process background tasks (even if busy) current_time = time.time() sync_interval = int(getattr(self.shared_data, "ai_sync_interval", 60)) if current_time - self._last_background_task > sync_interval: self._process_background_tasks() self._last_background_task = current_time except Exception as e: logger.error(f"Error in orchestrator loop: {e}") time.sleep(self._loop_error_backoff) self._loop_error_backoff = min(self._loop_error_backoff * 2.0, 10.0) # Cleanup on exit (OUTSIDE while loop) if self.scheduler: self.scheduler.stop() self.shared_data.queue_event.set() if self.scheduler_thread and self.scheduler_thread.is_alive(): self.scheduler_thread.join(timeout=10.0) if self.scheduler_thread.is_alive(): logger.warning("ActionScheduler thread did not exit cleanly") logger.info("Orchestrator stopped") def _process_background_tasks(self): """Run periodic tasks like consolidation, upload retries, and model updates (AI mode only).""" if not (self.ai_enabled and self.shared_data.operation_mode == "AI"): return ai_server_contact_events: List[bool] = [] try: # Consolidate features batch_size = int(getattr(self.shared_data, "ai_batch_size", 100)) max_batches = max(1, int(getattr(self.shared_data, "ai_consolidation_max_batches", 2))) stats = self.data_consolidator.consolidate_features( batch_size=batch_size, max_batches=max_batches, ) if stats.get("records_processed", 0) > 0: logger.info(f"AI Consolidation: {stats['records_processed']} records processed") logger.debug(f"DEBUG STATS: {stats}") # Auto-export after consolidation max_export_records = max(100, int(getattr(self.shared_data, "ai_export_max_records", 1000))) filepath, count = self.data_consolidator.export_for_training( format="csv", compress=True, max_records=max_export_records, ) if filepath: logger.info(f"AI export ready: {count} records -> {filepath}") self.data_consolidator.upload_to_server(filepath) if getattr(self.data_consolidator, "last_server_attempted", False): ai_server_contact_events.append( bool(getattr(self.data_consolidator, "last_server_contact_ok", False)) ) # Always retry any pending uploads when the server comes back. self.data_consolidator.flush_pending_uploads(max_files=2) if getattr(self.data_consolidator, "last_server_attempted", False): ai_server_contact_events.append( bool(getattr(self.data_consolidator, "last_server_contact_ok", False)) ) except Exception as e: logger.info_throttled( f"AI background tasks skipped: {e}", key="orch_ai_background_failed", interval_s=120.0, ) # Check for model updates (tolerant when server is offline) try: if self.ai_engine and self.ai_engine.check_for_updates(): logger.info("AI model updated from server") if self.ai_engine and getattr(self.ai_engine, "last_server_attempted", False): ai_server_contact_events.append( bool(getattr(self.ai_engine, "last_server_contact_ok", False)) ) elif self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)): # No model loaded and no successful server contact path this cycle. ai_server_contact_events.append(False) except Exception as e: logger.debug(f"AI model update check skipped: {e}") self._update_ai_server_health(ai_server_contact_events) if __name__ == "__main__": orchestrator = Orchestrator() orchestrator.run()