Bjorn/orchestrator.py

# orchestrator.py
# Action queue consumer for Bjorn - executes actions from the scheduler queue

import importlib
import time
import logging
import threading
import json
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional

from init_shared import shared_data
from logger import Logger
from action_scheduler import ActionScheduler
from ai_engine import get_or_create_ai_engine, invalidate_ai_engine
from feature_logger import FeatureLogger
from data_consolidator import DataConsolidator

logger = Logger(name="orchestrator.py", level=logging.DEBUG)


class Orchestrator:
    """Orchestrator that consumes the action queue generated by the scheduler"""

    def __init__(self):
        self.shared_data = shared_data
        self.actions = {}  # Dictionary of action instances
        self.network_scanner = None
        self.scheduler = None
        self.scheduler_thread = None
        self._loop_error_backoff = 1.0

        # ┌─────────────────────────────────────────────────────────┐
        # │ AI / Feature-logging Components                         │
        # └─────────────────────────────────────────────────────────┘
        # feature_logger runs in AUTO and AI mode to collect training data
        # from ALL automated executions.
        # ai_engine + data_consolidator run only in AI mode.
        self.ai_engine = None
        self.data_consolidator = None
        self.ai_enabled = bool(self.shared_data.operation_mode == "AI")
        self._ai_server_failure_streak = 0

        # FeatureLogger: active as long as the orchestrator runs (AUTO or AI)
        self.feature_logger = None
        if self.shared_data.operation_mode in ("AUTO", "AI"):
            try:
                self.feature_logger = FeatureLogger(self.shared_data)
                logger.info("FeatureLogger initialized (data collection active)")
            except Exception as e:
                logger.info_throttled(
                    f"FeatureLogger unavailable; execution data will not be logged: {e}",
                    key="orch_feature_logger_init_failed",
                    interval_s=300.0,
                )
                self.feature_logger = None

        if self.ai_enabled:
            try:
                self.ai_engine = get_or_create_ai_engine(self.shared_data)
                self.data_consolidator = DataConsolidator(self.shared_data)
                logger.info("AI engine + DataConsolidator initialized (AI mode)")
            except Exception as e:
                logger.info_throttled(
                    f"AI mode active but AI components unavailable; continuing heuristic-only: {e}",
                    key="orch_ai_init_failed",
                    interval_s=300.0,
                )
                self.ai_engine = None
                self.data_consolidator = None
                self.ai_enabled = False

        # Load all available actions
        self.load_actions()
        logger.info(f"Actions loaded: {list(self.actions.keys())}")

    def _is_enabled_value(self, value: Any) -> bool:
        """Robust parser for b_enabled values coming from DB."""
        if value is None:
            return True
        if isinstance(value, bool):
            return value
        if isinstance(value, (int, float)):
            return int(value) == 1
        s = str(value).strip().lower()
        if s in {"1", "true", "yes", "on"}:
            return True
        if s in {"0", "false", "no", "off"}:
            return False
        try:
            return int(float(s)) == 1
        except Exception:
            return True

    def _is_action_eligible_for_ai_learning(self, action_name: str) -> bool:
        """Exclude control-plane actions from AI training/reward."""
        return str(action_name) not in {"NetworkScanner"}

    def _update_ai_server_health(self, contact_events: List[bool]) -> None:
        """
        Update consecutive AI server failure counter and fallback to AUTO when needed.
        `contact_events` contains one bool per attempted contact in this cycle.
        """
        if not contact_events:
            return

        contacted_ok = any(contact_events)
        if contacted_ok:
            if self._ai_server_failure_streak > 0:
                logger.info("AI server contact recovered; reset failure streak")
            self._ai_server_failure_streak = 0
            return

        self._ai_server_failure_streak += 1
        max_failures = max(
            1,
            int(getattr(self.shared_data, "ai_server_max_failures_before_auto", 3)),
        )
        model_loaded = bool(getattr(self.ai_engine, "model_loaded", False))

        if self.shared_data.operation_mode == "AI" and (not model_loaded):
            remaining_cycles = max(0, max_failures - self._ai_server_failure_streak)
            if remaining_cycles > 0:
                logger.info_throttled(
                    f"AI server unreachable ({self._ai_server_failure_streak}/{max_failures}) and no local model loaded; "
                    f"AUTO fallback in {remaining_cycles} cycle(s) if server remains offline",
                    key="orch_ai_unreachable_no_model_pre_fallback",
                    interval_s=60.0,
                )

        if (
            self.shared_data.operation_mode == "AI"
            and self._ai_server_failure_streak >= max_failures
            and (not model_loaded)
        ):
            logger.warning(
                f"AI server unreachable for {self._ai_server_failure_streak} consecutive cycles and no local AI model is loaded; "
                "switching operation mode to AUTO (heuristics-only)"
            )
            self.shared_data.operation_mode = "AUTO"
            self._disable_ai_components()

    def load_actions(self):
        """Load all actions from database"""
        actions_config = self.shared_data.get_actions_config()

        for action in actions_config:
            module_name = action["b_module"]
            b_class = action["b_class"]

            # 🔴 Skip disabled actions
            if not int(action.get("b_enabled", 1)):
                logger.info(f"Skipping disabled action: {b_class}")
                continue

            try:
                # Import the module dynamically
                module = importlib.import_module(f'actions.{module_name}')

                # Global actions (NetworkScanner)
                if module_name == "scanning":
                    scanner_class = getattr(module, b_class)
                    self.network_scanner = scanner_class(self.shared_data)
                    self.actions[b_class] = self.network_scanner
                else:
                    # Normal actions
                    action_class = getattr(module, b_class)
                    action_instance = action_class(self.shared_data)
                    action_instance.action_name = b_class
                    action_instance.port = action.get("b_port")
                    action_instance.b_parent_action = action.get("b_parent")
                    self.actions[b_class] = action_instance

            except Exception as e:
                logger.error(f"Failed to load action {b_class}: {e}")

    # ----------------------------------------------------------------- AI mode

    def _ensure_feature_logger(self) -> None:
        """Init FeatureLogger if not yet running (called when entering AUTO or AI mode)."""
        if self.feature_logger is not None:
            return
        try:
            self.feature_logger = FeatureLogger(self.shared_data)
            logger.info("FeatureLogger enabled")
        except Exception as e:
            logger.info_throttled(
                f"FeatureLogger unavailable: {e}",
                key="orch_feature_logger_enable_failed",
                interval_s=300.0,
            )

    def _enable_ai_components(self) -> None:
        """Lazy-init AI-specific helpers when switching to AI mode at runtime."""
        self._ensure_feature_logger()

        if self.ai_engine and self.data_consolidator:
            self.ai_enabled = True
            return

        try:
            self.ai_engine = get_or_create_ai_engine(self.shared_data)
            self.data_consolidator = DataConsolidator(self.shared_data)
            self.ai_enabled = True
            if self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)):
                logger.warning(
                    "AI mode active but no local model loaded yet; "
                    "will fallback to AUTO if server stays unreachable"
                )
            logger.info("AI engine + DataConsolidator enabled")
        except Exception as e:
            self.ai_engine = None
            self.data_consolidator = None
            self.ai_enabled = False
            logger.info_throttled(
                f"AI components not available; staying heuristic-only: {e}",
                key="orch_ai_enable_failed",
                interval_s=300.0,
            )

    def _disable_ai_components(self) -> None:
        """Drop AI-specific helpers when leaving AI mode.
        FeatureLogger is kept alive so AUTO mode still collects data."""
        self.ai_enabled = False
        self.ai_engine = None
        self.data_consolidator = None
        # Release cached AI engine singleton so model weights can be freed in AUTO mode.
        try:
            invalidate_ai_engine(self.shared_data)
        except Exception:
            pass

    def _sync_ai_components(self) -> None:
        """Keep runtime AI helpers aligned with shared_data.operation_mode."""
        mode = self.shared_data.operation_mode
        if mode == "AI":
            if not self.ai_enabled:
                self._enable_ai_components()
        else:
            if self.ai_enabled:
                self._disable_ai_components()
            # Ensure feature_logger is alive in AUTO mode too
            if mode == "AUTO":
                self._ensure_feature_logger()


    def start_scheduler(self):
        """Start the scheduler in background"""
        if self.scheduler_thread and self.scheduler_thread.is_alive():
            logger.info("ActionScheduler thread already running")
            return

        logger.info("Starting ActionScheduler in background...")
        self.scheduler = ActionScheduler(self.shared_data)
        self.scheduler_thread = threading.Thread(
            target=self.scheduler.run,
            daemon=True,
            name="ActionScheduler"
        )
        self.scheduler_thread.start()
        logger.info("ActionScheduler started")

    def get_next_action(self) -> Optional[Dict[str, Any]]:
        """Get next action from queue"""
        action = self.shared_data.db.get_next_queued_action()
        if action:
            logger.info(
                f"Next action: {action['action_name']} for {action['mac_address']} "
                f"(priority={action.get('priority_effective')})"
            )
        return action

    def _build_host_state(self, mac_address: str) -> Dict:
        """
        Build RL state dict from host data in database.

        Args:
            mac_address: Target MAC address

        Returns:
            Dict with keys: mac, ports, hostname
        """
        try:
            # Get host from database
            host = self.shared_data.db.get_host_by_mac(mac_address)

            if not host:
                logger.warning(f"Host not found for MAC: {mac_address}")
                return {'mac': mac_address, 'ports': [], 'hostname': ''}

            # Parse ports
            ports_str = host.get('ports', '')
            ports = []
            if ports_str:
                for p in ports_str.split(';'):
                    p = p.strip()
                    if p.isdigit():
                        ports.append(int(p))

            # Get first hostname
            hostnames_str = host.get('hostnames', '')
            hostname = hostnames_str.split(';')[0] if hostnames_str else ''

            return {
                'mac': mac_address,
                'ports': ports,
                'hostname': hostname
            }

        except Exception as e:
            logger.error(f"Error building host state: {e}")
            return {'mac': mac_address, 'ports': [], 'hostname': ''}

    def _calculate_reward(
        self,
        action_name: str,
        success: bool,
        duration: float,
        mac: str,
        state_before: Dict,
        state_after: Dict
    ) -> float:
        """
        Calculate reward for RL update.

        Reward structure:
            - Base: +50 for success, -5 for failure
            - Credentials found: +100
            - New services: +20 per service
            - Time bonus: +20 if <30s, -10 if >120s
            - New ports discovered: +15 per port

        Args:
            action_name: Name of action executed
            success: Did action succeed?
            duration: Execution time in seconds
            mac: Target MAC address
            state_before: State dict before action
            state_after: State dict after action

        Returns:
            Reward value (float)
        """
        if not self._is_action_eligible_for_ai_learning(action_name):
            return 0.0

        # Base reward
        reward = 50.0 if success else -5.0

        if not success:
            # Penalize time waste on failure
            reward -= (duration * 0.1)
            return reward

        # ─────────────────────────────────────────────────────────
        # Check for credentials found (high value!)
        # ─────────────────────────────────────────────────────────
        try:
            recent_creds = self.shared_data.db.query("""
                SELECT COUNT(*) as cnt FROM creds
                WHERE mac_address=?
                AND first_seen > datetime('now', '-1 minute')
            """, (mac,))

            if recent_creds and recent_creds[0]['cnt'] > 0:
                creds_count = recent_creds[0]['cnt']
                reward += 100 * creds_count  # 100 per credential!
                logger.info(f"RL: +{100*creds_count} reward for {creds_count} credentials")
        except Exception as e:
            logger.error(f"Error checking credentials: {e}")

        # ─────────────────────────────────────────────────────────
        # Check for new services discovered
        # ─────────────────────────────────────────────────────────
        try:
            # Compare ports before/after
            ports_before = set(state_before.get('ports', []))
            ports_after = set(state_after.get('ports', []))
            new_ports = ports_after - ports_before

            if new_ports:
                reward += 15 * len(new_ports)
                logger.info(f"RL: +{15*len(new_ports)} reward for {len(new_ports)} new ports")
        except Exception as e:
            logger.error(f"Error checking new ports: {e}")

        # ─────────────────────────────────────────────────────────
        # Time efficiency bonus/penalty
        # ─────────────────────────────────────────────────────────
        if duration < 30:
            reward += 20  # Fast execution bonus
        elif duration > 120:
            reward -= 10  # Slow execution penalty

        # ─────────────────────────────────────────────────────────
        # Action-specific bonuses
        # ─────────────────────────────────────────────────────────
        if action_name == "SSHBruteforce" and success:
            # Extra bonus for SSH success (difficult action)
            reward += 30

        logger.debug(f"RL Reward calculated: {reward:.1f} for {action_name}")
        return reward

    def execute_queued_action(self, queued_action: Dict[str, Any]) -> bool:
        """Execute a single queued action with RL integration"""
        queue_id = queued_action['id']
        action_name = queued_action['action_name']
        mac = queued_action['mac_address']
        ip = queued_action['ip']
        port = queued_action['port']

        # Parse metadata once — used throughout this function
        metadata = json.loads(queued_action.get('metadata', '{}'))
        source = str(metadata.get('decision_method', 'unknown'))
        source_label = f"[{source.upper()}]" if source != 'unknown' else ""

        decision_origin = str(metadata.get('decision_origin', 'unknown'))
        ai_confidence = metadata.get('ai_confidence')
        ai_threshold = metadata.get('ai_threshold', getattr(self.shared_data, "ai_confirm_threshold", 0.3))
        ai_reason = str(metadata.get('ai_reason', 'n/a'))
        ai_method = metadata.get('ai_method')
        if not ai_method:
            ai_method = (metadata.get('ai_debug') or {}).get('method')
        ai_method = str(ai_method or 'n/a')
        ai_model_loaded = bool(metadata.get('ai_model_loaded', bool(getattr(self.ai_engine, "model_loaded", False)) if self.ai_engine else False))
        decision_scope = str(metadata.get('decision_scope', 'unknown'))

        exec_payload = {
            "action": action_name,
            "target": ip,
            "port": port,
            "decision_method": source,
            "decision_origin": decision_origin,
            "decision_scope": decision_scope,
            "ai_method": ai_method,
            "ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None,
            "ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None,
            "ai_model_loaded": ai_model_loaded,
            "ai_reason": ai_reason,
        }

        logger.info(f"Executing {source_label}: {action_name} for {ip}:{port}")
        logger.info(f"[DECISION_EXEC] {json.dumps(exec_payload)}")

        # Guard rail: stale queue rows can exist for disabled or not-loaded actions.
        try:
            action_row = self.shared_data.db.get_action_by_class(action_name)
            if action_row and not self._is_enabled_value(action_row.get("b_enabled", 1)):
                self.shared_data.db.update_queue_status(
                    queue_id,
                    'cancelled',
                    f"Action {action_name} disabled (b_enabled=0)",
                )
                logger.info(f"Skipping queued disabled action: {action_name}")
                return False
        except Exception as e:
            logger.debug(f"Could not verify b_enabled for {action_name}: {e}")

        if action_name not in self.actions:
            self.shared_data.db.update_queue_status(
                queue_id,
                'cancelled',
                f"Action {action_name} not loaded",
            )
            logger.warning(f"Skipping queued action not loaded: {action_name}")
            return False

        # ┌─────────────────────────────────────────────────────────┐
        # │ STEP 1: Capture state BEFORE action (all modes)         │
        # └─────────────────────────────────────────────────────────┘
        state_before = None
        if self.feature_logger:
            try:
                state_before = self._build_host_state(mac)
                logger.debug(f"State before captured for {mac}")
            except Exception as e:
                logger.info_throttled(
                    f"State capture skipped: {e}",
                    key="orch_state_before_failed",
                    interval_s=120.0,
                )

        # Update status to running
        self.shared_data.db.update_queue_status(queue_id, 'running')

        # ┌─────────────────────────────────────────────────────────┐
        # │ EXECUTE ACTION (existing code)                           │
        # └─────────────────────────────────────────────────────────┘
        start_time = time.time()
        success = False

        try:
            action = self.actions[action_name]

            # Prepare row data for compatibility
            row = {
                "MAC Address": mac,
                "IPs": ip,
                "Ports": str(port) if port else "",
                "Alive": 1
            }

            # Prepare status details
            if ip and ip != "0.0.0.0":
                port_str = str(port).strip() if port is not None else ""
                has_port = bool(port_str) and port_str.lower() != "none"
                target_display = f"{ip}:{port_str}" if has_port else ip
                status_msg = f"{action_name} on {ip}"
                details = f"Target: {target_display}"
                self.shared_data.action_target_ip = target_display
            else:
                status_msg = f"{action_name} (Global)"
                details = "Scanning network..."
                self.shared_data.action_target_ip = ""

            # Update shared status for display
            self.shared_data.bjorn_orch_status = action_name
            self.shared_data.bjorn_status_text2 = self.shared_data.action_target_ip or ip

            self.shared_data.update_status(status_msg, details)

            # --- AI Dashboard Metadata (AI mode only) ---
            if (
                self.ai_enabled
                and self.shared_data.operation_mode == "AI"
                and self._is_action_eligible_for_ai_learning(action_name)
            ):
                decision_method = metadata.get('decision_method', 'heuristic')
                self.shared_data.active_action = action_name
                self.shared_data.last_decision_method = decision_method
                self.shared_data.last_ai_decision = metadata.get('ai_debug', {})
                ai_exec_payload = {
                    "action": action_name,
                    "method": decision_method,
                    "origin": decision_origin,
                    "target": ip,
                    "ai_method": ai_method,
                    "ai_confidence": ai_confidence if isinstance(ai_confidence, (int, float)) else None,
                    "ai_threshold": ai_threshold if isinstance(ai_threshold, (int, float)) else None,
                    "ai_model_loaded": ai_model_loaded,
                    "reason": ai_reason,
                }
                logger.info(f"[AI_EXEC] {json.dumps(ai_exec_payload)}")

            # Check if global action (metadata already parsed above)
            if metadata.get('is_global') and hasattr(action, 'scan'):
                # Execute global scan
                action.scan()
                result = 'success'
            else:
                # Execute targeted action
                result = action.execute(
                    ip,
                    str(port) if port else "",
                    row,
                    action_name
                )

            # Determine success
            success = (result == 'success')

            # Update queue status based on result
            if success:
                self.shared_data.db.update_queue_status(queue_id, 'success')
                logger.success(f"Action {action_name} completed successfully for {ip}")
            else:
                self.shared_data.db.update_queue_status(queue_id, 'failed')
                logger.warning(f"Action {action_name} failed for {ip}")

        except Exception as e:
            logger.error(f"Error executing action {action_name}: {e}")
            self.shared_data.db.update_queue_status(queue_id, 'failed', str(e))
            success = False

        finally:
            if (
                self.ai_enabled
                and self.shared_data.operation_mode == "AI"
                and self._is_action_eligible_for_ai_learning(action_name)
            ):
                ai_done_payload = {
                    "action": action_name,
                    "success": bool(success),
                    "method": source,
                    "origin": decision_origin,
                }
                logger.info(f"[AI_DONE] {json.dumps(ai_done_payload)}")
                self.shared_data.active_action = None

            # Clear status text
            self.shared_data.bjorn_status_text2 = ""
            self.shared_data.action_target_ip = ""
            # Reset Status to Thinking/Idle
            self.shared_data.update_status("Thinking", "Processing results...")

        duration = time.time() - start_time

        # ┌─────────────────────────────────────────────────────────┐
        # │ STEP 2: Log execution features (AUTO + AI modes)       │
        # └─────────────────────────────────────────────────────────┘
        if self.feature_logger and state_before and self._is_action_eligible_for_ai_learning(action_name):
            try:
                reward = self._calculate_reward(
                    action_name=action_name,
                    success=success,
                    duration=duration,
                    mac=mac,
                    state_before=state_before,
                    state_after=self._build_host_state(mac),
                )

                self.feature_logger.log_action_execution(
                    mac_address=mac,
                    ip_address=ip,
                    action_name=action_name,
                    success=success,
                    duration=duration,
                    reward=reward,
                    raw_event={
                        'port': port,
                        'action': action_name,
                        'queue_id': queue_id,
                        # metadata already parsed — no second json.loads
                        'metadata': metadata,
                        # Tag decision source so the training pipeline can weight
                        # human choices (MANUAL would be logged if orchestrator
                        # ever ran in that mode) vs automated ones.
                        'decision_source': self.shared_data.operation_mode,
                        'human_override': False,
                    },
                )

                logger.debug(f"Features logged for {action_name} (mode={self.shared_data.operation_mode})")

            except Exception as e:
                logger.info_throttled(
                    f"Feature logging skipped: {e}",
                    key="orch_feature_log_failed",
                    interval_s=120.0,
                )
        elif self.feature_logger and state_before:
            logger.debug(f"Feature logging disabled for {action_name} (excluded from AI learning)")

        return success

    def run(self):
        """Main loop: start scheduler and consume queue"""

        # Start the scheduler
        self.start_scheduler()

        # Wait for scheduler initialization
        time.sleep(2)

        # Main execution loop
        idle_time = 0
        consecutive_idle_logs = 0
        self._last_background_task = 0

        while not self.shared_data.orchestrator_should_exit:
            try:
                # Allow live mode switching from the UI without restarting the process.
                self._sync_ai_components()

                # Get next action from queue
                next_action = self.get_next_action()

                if next_action:
                    # Reset idle counters
                    idle_time = 0
                    consecutive_idle_logs = 0
                    self._loop_error_backoff = 1.0

                    # Execute the action
                    self.execute_queued_action(next_action)
                else:
                    # IDLE mode
                    idle_time += 1

                    self.shared_data.bjorn_orch_status = "IDLE"
                    self.shared_data.bjorn_status_text2 = ""
                    self.shared_data.action_target_ip = ""

                    # Log periodically (less spam)
                    if idle_time % 30 == 0:  # Every 30 seconds
                        consecutive_idle_logs += 1
                        if consecutive_idle_logs <= 3:  # Limit consecutive logs
                            logger.debug(f"Queue empty, idling... ({idle_time}s)")

                    # Event-driven wait (max 5s to check for exit signals)
                    self.shared_data.queue_event.wait(timeout=5)
                    self.shared_data.queue_event.clear()

                # Periodically process background tasks (even if busy)
                current_time = time.time()
                sync_interval = int(getattr(self.shared_data, "ai_sync_interval", 60))
                if current_time - self._last_background_task > sync_interval:
                    self._process_background_tasks()
                    self._last_background_task = current_time

            except Exception as e:
                logger.error(f"Error in orchestrator loop: {e}")
                time.sleep(self._loop_error_backoff)
                self._loop_error_backoff = min(self._loop_error_backoff * 2.0, 10.0)

        # Cleanup on exit (OUTSIDE while loop)
        if self.scheduler:
            self.scheduler.stop()
            self.shared_data.queue_event.set()
            if self.scheduler_thread and self.scheduler_thread.is_alive():
                self.scheduler_thread.join(timeout=10.0)
                if self.scheduler_thread.is_alive():
                    logger.warning("ActionScheduler thread did not exit cleanly")

        logger.info("Orchestrator stopped")

    def _process_background_tasks(self):
        """Run periodic tasks like consolidation, upload retries, and model updates (AI mode only)."""
        if not (self.ai_enabled and self.shared_data.operation_mode == "AI"):
            return

        ai_server_contact_events: List[bool] = []

        try:
            # Consolidate features
            batch_size = int(getattr(self.shared_data, "ai_batch_size", 100))
            max_batches = max(1, int(getattr(self.shared_data, "ai_consolidation_max_batches", 2)))
            stats = self.data_consolidator.consolidate_features(
                batch_size=batch_size,
                max_batches=max_batches,
            )

            if stats.get("records_processed", 0) > 0:
                logger.info(f"AI Consolidation: {stats['records_processed']} records processed")
                logger.debug(f"DEBUG STATS: {stats}")

                # Auto-export after consolidation
                max_export_records = max(100, int(getattr(self.shared_data, "ai_export_max_records", 1000)))
                filepath, count = self.data_consolidator.export_for_training(
                    format="csv",
                    compress=True,
                    max_records=max_export_records,
                )
                if filepath:
                    logger.info(f"AI export ready: {count} records -> {filepath}")
                    self.data_consolidator.upload_to_server(filepath)
                    if getattr(self.data_consolidator, "last_server_attempted", False):
                        ai_server_contact_events.append(
                            bool(getattr(self.data_consolidator, "last_server_contact_ok", False))
                        )

            # Always retry any pending uploads when the server comes back.
            self.data_consolidator.flush_pending_uploads(max_files=2)
            if getattr(self.data_consolidator, "last_server_attempted", False):
                ai_server_contact_events.append(
                    bool(getattr(self.data_consolidator, "last_server_contact_ok", False))
                )

        except Exception as e:
            logger.info_throttled(
                f"AI background tasks skipped: {e}",
                key="orch_ai_background_failed",
                interval_s=120.0,
            )

        # Check for model updates (tolerant when server is offline)
        try:
            if self.ai_engine and self.ai_engine.check_for_updates():
                logger.info("AI model updated from server")
            if self.ai_engine and getattr(self.ai_engine, "last_server_attempted", False):
                ai_server_contact_events.append(
                    bool(getattr(self.ai_engine, "last_server_contact_ok", False))
                )
            elif self.ai_engine and not bool(getattr(self.ai_engine, "model_loaded", False)):
                # No model loaded and no successful server contact path this cycle.
                ai_server_contact_events.append(False)
        except Exception as e:
            logger.debug(f"AI model update check skipped: {e}")

        self._update_ai_server_health(ai_server_contact_events)


if __name__ == "__main__":
    orchestrator = Orchestrator()
    orchestrator.run()