Add RLUtils class for managing RL/AI dashboard endpoints

- Implemented methods for fetching AI stats, training history, and recent experiences.
- Added functionality to set operation mode (MANUAL, AUTO, AI) with appropriate handling.
- Included helper methods for querying the database and sending JSON responses.
- Integrated model metadata extraction for visualization purposes.
This commit is contained in:
Fabien POLLY
2026-02-18 22:36:10 +01:00
parent b8a13cc698
commit eb20b168a6
684 changed files with 53278 additions and 27977 deletions

View File

@@ -1,436 +1,259 @@
"""
EPD Manager - Singleton manager for e-Paper display
FIXED VERSION: Added operation timeouts, better error recovery, thread safety
"""
EPD Manager - singleton wrapper around Waveshare drivers.
Hardened for runtime stability:
- no per-operation worker-thread timeouts (prevents leaked stuck SPI threads)
- serialized SPI access
- bounded retry + recovery
- health metrics for monitoring
"""
import threading
import importlib
import logging
import threading
import time
from PIL import Image
from logger import Logger
logger = Logger(name="epd_manager.py", level=logging.DEBUG)
logger = Logger(name="epd_manager.py")
# ============================================================================
# DEBUG CONFIGURATION
# ============================================================================
DEBUG_MANAGER = False # Set to True to enable EPD Manager debugging
DEBUG_MANAGER = False
def debug_log(message, level='debug'):
"""Conditional debug logging for manager"""
if DEBUG_MANAGER:
if level == 'info':
logger.info(f"[EPD_MANAGER] {message}")
elif level == 'warning':
logger.warning(f"[EPD_MANAGER] {message}")
elif level == 'error':
logger.error(f"[EPD_MANAGER] {message}")
else:
logger.debug(f"[EPD_MANAGER] {message}")
def debug_log(message, level="debug"):
if not DEBUG_MANAGER:
return
if level == "info":
logger.info(f"[EPD_MANAGER] {message}")
elif level == "warning":
logger.warning(f"[EPD_MANAGER] {message}")
elif level == "error":
logger.error(f"[EPD_MANAGER] {message}")
else:
logger.debug(f"[EPD_MANAGER] {message}")
class EPDManager:
"""
Singleton EPD Manager with robust timeout handling and error recovery
"""
_instance = None
_lock = threading.Lock() # Global lock for all SPI access
# Error handling configuration
_instance_lock = threading.Lock()
_spi_lock = threading.RLock()
MAX_CONSECUTIVE_ERRORS = 3
RESET_COOLDOWN = 5.0 # seconds between hard resets
OPERATION_TIMEOUT = 15.0 # CRITICAL: max seconds for any EPD operation
INIT_TIMEOUT = 20.0 # Longer timeout for initialization
RESET_COOLDOWN = 5.0
def __new__(cls, epd_type: str):
if cls._instance is None:
debug_log("Creating new EPDManager instance", 'info')
cls._instance = super().__new__(cls)
cls._instance._init_driver(epd_type)
else:
debug_log("Returning existing EPDManager instance", 'info')
with cls._instance_lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def _init_driver(self, epd_type: str):
"""Initialize EPD driver"""
debug_log(f"Initializing driver: {epd_type}", 'info')
def __init__(self, epd_type: str):
if self._initialized:
if epd_type != self.epd_type:
logger.warning(
f"EPDManager already initialized with {self.epd_type}, "
f"ignoring requested type {epd_type}"
)
return
self.epd_type = epd_type
self.epd = None
self.last_reset = time.time()
self.error_count = 0
self.last_error_time = 0
self.operation_start_time = 0
self.last_error_time = 0.0
self.total_operations = 0
self.successful_operations = 0
self.last_operation_duration = 0.0
self.total_operation_duration = 0.0
self.timeout_count = 0
try:
epd_module_name = f"resources.waveshare_epd.{self.epd_type}"
epd_module = importlib.import_module(epd_module_name)
self.epd = epd_module.EPD()
debug_log(f"EPD driver {self.epd_type} loaded successfully", 'info')
except Exception as e:
logger.error(f"Failed to load EPD driver {self.epd_type}: {e}")
raise
self.recovery_attempts = 0
self.recovery_failures = 0
def _safe_call(self, func, *args, timeout=None, **kwargs):
"""
Execute EPD function with timeout and error handling
CRITICAL: Uses threading to implement timeout
"""
if timeout is None:
timeout = self.OPERATION_TIMEOUT
with EPDManager._lock:
self._load_driver()
self._initialized = True
# ------------------------------------------------------------------ driver
def _load_driver(self):
debug_log(f"Loading EPD driver {self.epd_type}", "info")
epd_module_name = f"resources.waveshare_epd.{self.epd_type}"
epd_module = importlib.import_module(epd_module_name)
self.epd = epd_module.EPD()
# ------------------------------------------------------------------ calls
def _safe_call(self, func, *args, **kwargs):
with EPDManager._spi_lock:
self.total_operations += 1
self.operation_start_time = time.time()
debug_log(f"Executing operation #{self.total_operations}: {func.__name__} (timeout={timeout}s)")
# Execute in separate thread to allow timeout
result_container = {'result': None, 'error': None, 'completed': False}
def execute_operation():
try:
result_container['result'] = func(*args, **kwargs)
result_container['completed'] = True
except Exception as e:
result_container['error'] = e
result_container['completed'] = True
operation_thread = threading.Thread(target=execute_operation, daemon=True)
operation_thread.start()
operation_thread.join(timeout=timeout)
operation_time = time.time() - self.operation_start_time
# Check if operation completed
if not result_container['completed']:
# TIMEOUT occurred
self.timeout_count += 1
started = time.monotonic()
try:
result = func(*args, **kwargs)
except Exception as exc:
self.error_count += 1
logger.error(f"EPD operation TIMEOUT after {timeout}s (timeout #{self.timeout_count})")
# Perform recovery if too many timeouts
if self.error_count >= self.MAX_CONSECUTIVE_ERRORS:
return self._perform_recovery(func, args, kwargs,
TimeoutError(f"Operation timed out after {timeout}s"))
else:
raise TimeoutError(f"EPD operation timed out after {timeout}s")
# Check if operation had an error
if result_container['error'] is not None:
self.error_count += 1
logger.error(f"EPD operation failed (error #{self.error_count}): {result_container['error']}")
debug_log(f"Failed operation took {operation_time:.3f}s", 'error')
# Check if we need to perform recovery
if self.error_count >= self.MAX_CONSECUTIVE_ERRORS:
return self._perform_recovery(func, args, kwargs, result_container['error'])
else:
# Simple retry without full reset
return self._simple_retry(func, args, kwargs, result_container['error'])
# Operation successful
self.last_error_time = time.time()
logger.error(f"EPD operation failed ({func.__name__}): {exc}")
if self.error_count < self.MAX_CONSECUTIVE_ERRORS:
return self._simple_retry(func, args, kwargs, exc)
return self._perform_recovery(func, args, kwargs, exc)
self.successful_operations += 1
self.error_count = 0
debug_log(f"Operation completed successfully in {operation_time:.3f}s", 'info')
return result_container['result']
self.last_operation_duration = time.monotonic() - started
self.total_operation_duration += self.last_operation_duration
return result
def _simple_retry(self, func, args, kwargs, original_error):
"""Attempt simple retry without full reset"""
debug_log("Attempting simple retry after error", 'warning')
time.sleep(0.3)
try:
time.sleep(0.5) # Brief delay before retry
# Use shorter timeout for retry
result_container = {'result': None, 'error': None, 'completed': False}
def execute_retry():
try:
result_container['result'] = func(*args, **kwargs)
result_container['completed'] = True
except Exception as e:
result_container['error'] = e
result_container['completed'] = True
retry_thread = threading.Thread(target=execute_retry, daemon=True)
retry_thread.start()
retry_thread.join(timeout=self.OPERATION_TIMEOUT)
if result_container['completed'] and result_container['error'] is None:
debug_log("Simple retry successful", 'info')
self.error_count = 0
self.successful_operations += 1
return result_container['result']
# Retry failed
logger.error(f"Simple retry failed: {result_container.get('error', 'timeout')}")
raise original_error
except Exception as e:
logger.error(f"Simple retry failed: {e}")
result = func(*args, **kwargs)
self.successful_operations += 1
self.error_count = 0
return result
except Exception as retry_error:
logger.error(f"EPD retry failed ({func.__name__}): {retry_error}")
raise original_error
def _perform_recovery(self, func, args, kwargs, original_error):
"""Perform full recovery with hard reset"""
current_time = time.time()
time_since_last_reset = current_time - self.last_reset
debug_log(f"Too many errors ({self.error_count}), initiating recovery", 'warning')
# Enforce cooldown between resets
if time_since_last_reset < self.RESET_COOLDOWN:
wait_time = self.RESET_COOLDOWN - time_since_last_reset
logger.warning(f"Reset cooldown active, waiting {wait_time:.1f}s")
time.sleep(wait_time)
# Attempt hard reset
now = time.time()
wait_s = max(0.0, self.RESET_COOLDOWN - (now - self.last_reset))
if wait_s > 0:
time.sleep(wait_s)
self.recovery_attempts += 1
try:
debug_log("Performing hard reset...", 'warning')
self.hard_reset()
result = func(*args, **kwargs)
self.successful_operations += 1
self.error_count = 0
# Retry operation after reset with timeout
debug_log("Retrying operation after hard reset")
result_container = {'result': None, 'error': None, 'completed': False}
def execute_after_reset():
try:
result_container['result'] = func(*args, **kwargs)
result_container['completed'] = True
except Exception as e:
result_container['error'] = e
result_container['completed'] = True
reset_retry_thread = threading.Thread(target=execute_after_reset, daemon=True)
reset_retry_thread.start()
reset_retry_thread.join(timeout=self.OPERATION_TIMEOUT)
if result_container['completed'] and result_container['error'] is None:
debug_log("Recovery successful", 'info')
self.successful_operations += 1
return result_container['result']
# Recovery failed
logger.critical(f"Recovery failed: {result_container.get('error', 'timeout')}")
except Exception as e:
logger.critical(f"Recovery failed catastrophically: {e}")
# Calculate success rate
if self.total_operations > 0:
success_rate = (self.successful_operations / self.total_operations) * 100
logger.error(f"EPD success rate: {success_rate:.1f}% "
f"({self.successful_operations}/{self.total_operations}), "
f"timeouts: {self.timeout_count}")
self.error_count = 0 # Reset to prevent infinite recovery attempts
raise original_error
return result
except Exception as exc:
self.recovery_failures += 1
logger.critical(f"EPD recovery failed: {exc}")
self.error_count = 0
raise original_error
def hard_reset(self):
"""
Perform complete hardware and software reset with timeout protection
"""
debug_log("Starting hard reset sequence", 'warning')
reset_start = time.time()
try:
# Step 1: Clean shutdown of existing SPI connection
debug_log("Step 1: Closing existing SPI connection")
try:
if hasattr(self.epd, 'epdconfig'):
self.epd.epdconfig.module_exit()
time.sleep(0.5)
except Exception as e:
debug_log(f"Error during SPI shutdown: {e}", 'warning')
# Step 2: Hardware reset
debug_log("Step 2: Hardware reset")
try:
self.epd.reset()
time.sleep(0.2)
except Exception as e:
debug_log(f"Error during hardware reset: {e}", 'warning')
# Step 3: Reset initialization flags
debug_log("Step 3: Resetting initialization flags")
self.epd.is_initialized = False
if hasattr(self.epd, 'is_partial_configured'):
self.epd.is_partial_configured = False
# Step 4: Reinitialize SPI with timeout
debug_log("Step 4: Reinitializing SPI")
if hasattr(self.epd, 'epdconfig'):
def reinit_spi():
ret = self.epd.epdconfig.module_init()
if ret != 0:
raise RuntimeError("SPI reinitialization failed")
time.sleep(0.5)
reinit_thread = threading.Thread(target=reinit_spi, daemon=True)
reinit_thread.start()
reinit_thread.join(timeout=5.0)
if reinit_thread.is_alive():
raise TimeoutError("SPI reinitialization timed out")
# Step 5: Reinitialize EPD with timeout
debug_log("Step 5: Reinitializing EPD")
def reinit_epd():
self.epd.init()
epd_init_thread = threading.Thread(target=reinit_epd, daemon=True)
epd_init_thread.start()
epd_init_thread.join(timeout=self.INIT_TIMEOUT)
if epd_init_thread.is_alive():
raise TimeoutError("EPD reinitialization timed out")
# Update reset timestamp
self.last_reset = time.time()
reset_duration = self.last_reset - reset_start
logger.warning(f"EPD hard reset completed successfully in {reset_duration:.2f}s")
debug_log("Hard reset sequence complete", 'info')
except Exception as e:
logger.critical(f"Hard reset failed catastrophically: {e}")
raise
def check_health(self):
"""
Check EPD manager health status
Returns: dict with health metrics
"""
current_time = time.time()
uptime = current_time - self.last_reset
if self.total_operations > 0:
success_rate = (self.successful_operations / self.total_operations) * 100
else:
success_rate = 100.0
health = {
'uptime_seconds': uptime,
'total_operations': self.total_operations,
'successful_operations': self.successful_operations,
'success_rate': success_rate,
'consecutive_errors': self.error_count,
'timeout_count': self.timeout_count,
'last_reset': self.last_reset,
'is_healthy': self.error_count == 0 and success_rate > 95.0
}
debug_log(f"Health check: {health}", 'info')
return health
# ========================================================================
# Public API Methods with Timeout Protection
# ========================================================================
# -------------------------------------------------------------- public api
def init_full_update(self):
"""Initialize EPD for full update mode"""
debug_log("API: init_full_update", 'info')
return self._safe_call(self._init_full, timeout=self.INIT_TIMEOUT)
return self._safe_call(self._init_full)
def init_partial_update(self):
"""Initialize EPD for partial update mode"""
debug_log("API: init_partial_update")
return self._safe_call(self._init_partial, timeout=self.INIT_TIMEOUT)
return self._safe_call(self._init_partial)
def display_partial(self, image):
"""Display image using partial update"""
debug_log("API: display_partial")
return self._safe_call(self._display_partial, image)
def display_full(self, image):
"""Display image using full update"""
debug_log("API: display_full", 'info')
return self._safe_call(self._display_full, image)
def clear(self):
"""Clear display"""
debug_log("API: clear", 'info')
return self._safe_call(self._clear)
def sleep(self):
"""Put display to sleep"""
debug_log("API: sleep", 'info')
return self._safe_call(self._sleep, timeout=5.0)
return self._safe_call(self._sleep)
# ========================================================================
# Protected Implementation Methods
# ========================================================================
def check_health(self):
uptime = time.time() - self.last_reset
success_rate = 100.0
avg_ms = 0.0
if self.total_operations > 0:
success_rate = (self.successful_operations / self.total_operations) * 100.0
avg_ms = (self.total_operation_duration / self.total_operations) * 1000.0
return {
"uptime_seconds": round(uptime, 3),
"total_operations": int(self.total_operations),
"successful_operations": int(self.successful_operations),
"success_rate": round(success_rate, 2),
"consecutive_errors": int(self.error_count),
"timeout_count": int(self.timeout_count),
"last_reset": self.last_reset,
"last_operation_duration_ms": round(self.last_operation_duration * 1000.0, 2),
"avg_operation_duration_ms": round(avg_ms, 2),
"recovery_attempts": int(self.recovery_attempts),
"recovery_failures": int(self.recovery_failures),
"is_healthy": self.error_count == 0,
}
# ------------------------------------------------------------- impl methods
def _init_full(self):
"""Initialize for full update (protected)"""
debug_log("Initializing full update mode")
if hasattr(self.epd, "FULL_UPDATE"):
self.epd.init(self.epd.FULL_UPDATE)
elif hasattr(self.epd, "lut_full_update"):
self.epd.init(self.epd.lut_full_update)
else:
self.epd.init()
debug_log("Full update mode initialized")
def _init_partial(self):
"""Initialize for partial update (protected)"""
debug_log("Initializing partial update mode")
if hasattr(self.epd, "PART_UPDATE"):
self.epd.init(self.epd.PART_UPDATE)
elif hasattr(self.epd, "lut_partial_update"):
self.epd.init(self.epd.lut_partial_update)
else:
self.epd.init()
debug_log("Partial update mode initialized")
def _display_partial(self, image):
"""Display using partial update (protected)"""
debug_log("Executing partial display")
if hasattr(self.epd, "displayPartial"):
self.epd.displayPartial(self.epd.getbuffer(image))
else:
debug_log("No displayPartial method, using standard display", 'warning')
self.epd.display(self.epd.getbuffer(image))
def _display_full(self, image):
"""Display using full update (protected)"""
debug_log("Executing full display")
self.epd.display(self.epd.getbuffer(image))
def _clear(self):
"""Clear display (protected)"""
debug_log("Clearing display")
if hasattr(self.epd, "Clear"):
self.epd.Clear()
else:
debug_log("No Clear method, displaying white image", 'warning')
w, h = self.epd.width, self.epd.height
blank = Image.new("1", (w, h), 255)
return
w, h = self.epd.width, self.epd.height
blank = Image.new("1", (w, h), 255)
try:
self._display_partial(blank)
finally:
blank.close()
def _sleep(self):
"""Put display to sleep (protected)"""
debug_log("Putting display to sleep")
if hasattr(self.epd, "sleep"):
self.epd.sleep()
else:
debug_log("No sleep method available", 'warning')
def hard_reset(self, force: bool = False):
with EPDManager._spi_lock:
started = time.monotonic()
try:
if self.epd and hasattr(self.epd, "epdconfig"):
try:
self.epd.epdconfig.module_exit(cleanup=True)
except TypeError:
self.epd.epdconfig.module_exit()
except Exception as exc:
logger.warning(f"EPD module_exit during reset failed: {exc}")
self._load_driver()
# Validate the new driver with a full init.
if hasattr(self.epd, "FULL_UPDATE"):
self.epd.init(self.epd.FULL_UPDATE)
else:
self.epd.init()
self.last_reset = time.time()
self.error_count = 0
if force:
logger.warning(
f"EPD forced hard reset completed in {time.monotonic() - started:.2f}s"
)
else:
logger.warning(
f"EPD hard reset completed in {time.monotonic() - started:.2f}s"
)
except Exception as exc:
logger.critical(f"EPD hard reset failed: {exc}")
raise
### END OF FILE ###
### END OF FILE ###