changedetection.io/changedetectionio/queue_handlers.py

from blinker import signal
from loguru import logger
from typing import Dict, List, Any, Optional
import heapq
import queue
import threading

# Janus is no longer required - we use pure threading.Queue for multi-loop support
# try:
#     import janus
# except ImportError:
#     pass  # Not needed anymore


class RecheckPriorityQueue:
    """
    Thread-safe priority queue supporting multiple async event loops.

    ARCHITECTURE:
    - Multiple async workers, each with its own event loop in its own thread
    - Hybrid sync/async design for maximum scalability
    - Sync interface for ticker thread (threading.Queue)
    - Async interface for workers (asyncio.Event - NO executor threads!)

    SCALABILITY:
    - Scales to 100-200+ workers without executor thread exhaustion
    - Async workers wait on asyncio.Event (pure coroutines, no threads)
    - Sync callers use threading.Queue (backward compatible)

    WHY NOT JANUS:
    - Janus binds to ONE event loop at creation time
    - Our architecture has 15+ workers, each with separate event loops
    - Workers in different threads/loops cannot share janus async interface

    WHY NOT RUN_IN_EXECUTOR:
    - With 200 workers, run_in_executor() would block 200 threads
    - Exhausts ThreadPoolExecutor, starves Flask HTTP handlers
    - Pure async approach uses 0 threads while waiting
    """

    def __init__(self, maxsize: int = 0):
        try:
            import asyncio

            # Sync interface: threading.Queue for ticker thread and Flask routes
            self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)

            # Priority storage - thread-safe
            self._priority_items = []
            self._lock = threading.RLock()

            # No event signaling needed - pure polling approach
            # Workers check queue every 50ms (latency acceptable: 0-500ms)
            # Scales to 1000+ workers: each sleeping worker = ~4KB coroutine, not thread

            # Signals for UI updates
            self.queue_length_signal = signal('queue_length')

            logger.debug("RecheckPriorityQueue initialized successfully")
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
            raise

    # SYNC INTERFACE (for ticker thread)
    def put(self, item, block: bool = True, timeout: Optional[float] = None):
        """Thread-safe sync put with priority ordering"""
        logger.trace(f"RecheckQueue.put() called for item: {self._get_item_uuid(item)}, block={block}, timeout={timeout}")
        try:
            # CRITICAL: Add to both priority storage AND notification queue atomically
            # to prevent desynchronization where item exists but no notification
            with self._lock:
                heapq.heappush(self._priority_items, item)

                # Add notification - use blocking with timeout for safety
                # Notification queue is unlimited size, so should never block in practice
                # but timeout ensures we detect any unexpected issues (deadlock, etc)
                try:
                    self._notification_queue.put(True, block=True, timeout=5.0)
                except Exception as notif_e:
                    # Notification failed - MUST remove from priority_items to keep in sync
                    # This prevents "Priority queue inconsistency" errors in get()
                    logger.critical(f"CRITICAL: Notification queue put failed, removing from priority_items: {notif_e}")
                    self._priority_items.remove(item)
                    heapq.heapify(self._priority_items)
                    raise  # Re-raise to be caught by outer exception handler

            # Signal emission after successful queue - log but don't fail the operation
            # Item is already safely queued, so signal failure shouldn't affect queue state
            try:
                self._emit_put_signals(item)
            except Exception as signal_e:
                logger.error(f"Failed to emit put signals but item queued successfully: {signal_e}")

            logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
            return True

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {type(e).__name__}: {str(e)}")
            # Item should have been cleaned up in the inner try/except if notification failed
            return False

    def get(self, block: bool = True, timeout: Optional[float] = None):
        """Thread-safe sync get with priority ordering"""
        logger.trace(f"RecheckQueue.get() called, block={block}, timeout={timeout}")
        import queue as queue_module
        try:
            # Wait for notification (this doesn't return the actual item, just signals availability)
            self._notification_queue.get(block=block, timeout=timeout)

            # Get highest priority item
            with self._lock:
                if not self._priority_items:
                    logger.critical(f"CRITICAL: Queue notification received but no priority items available")
                    raise Exception("Priority queue inconsistency")
                item = heapq.heappop(self._priority_items)

            # Signal emission after successful retrieval - log but don't lose the item
            # Item is already retrieved, so signal failure shouldn't affect queue state
            try:
                self._emit_get_signals()
            except Exception as signal_e:
                logger.error(f"Failed to emit get signals but item retrieved successfully: {signal_e}")

            logger.trace(f"RecheckQueue.get() successfully retrieved item: {self._get_item_uuid(item)}")
            return item

        except queue_module.Empty:
            # Queue is empty with timeout - expected behavior
            logger.trace(f"RecheckQueue.get() timed out - queue is empty (timeout={timeout})")
            raise  # noqa
        except Exception as e:
            # Re-raise without logging - caller (worker) will handle and log appropriately
            logger.trace(f"RecheckQueue.get() failed with exception: {type(e).__name__}: {str(e)}")
            raise

    # ASYNC INTERFACE (for workers)
    async def async_put(self, item, executor=None):
        """Async put with priority ordering - uses thread pool to avoid blocking

        Args:
            item: Item to add to queue
            executor: Optional ThreadPoolExecutor. If None, uses default pool.
        """
        logger.trace(f"RecheckQueue.async_put() called for item: {self._get_item_uuid(item)}, executor={executor}")
        import asyncio
        try:
            # Use run_in_executor to call sync put without blocking event loop
            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(
                executor,  # Use provided executor or default
                lambda: self.put(item, block=True, timeout=5.0)
            )

            logger.trace(f"RecheckQueue.async_put() successfully queued item: {self._get_item_uuid(item)}")
            return result

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
            return False

    async def async_get(self, executor=None, timeout=1.0):
        """
        Efficient async get using executor for blocking call.

        HYBRID APPROACH: Best of both worlds
        - Uses run_in_executor for efficient blocking (no polling overhead)
        - Single timeout (no double-timeout race condition)
        - Scales well: executor sized to match worker count

        With FETCH_WORKERS=10: 10 threads blocked max (acceptable)
        With FETCH_WORKERS=200: Need executor with 200+ threads (see worker_pool.py)

        Args:
            executor: ThreadPoolExecutor (sized to match worker count)
            timeout: Maximum time to wait in seconds

        Returns:
            Item from queue

        Raises:
            queue.Empty: If timeout expires with no item available
        """
        logger.trace(f"RecheckQueue.async_get() called, timeout={timeout}")
        import asyncio
        try:
            # Use run_in_executor to call sync get efficiently
            # No outer asyncio.wait_for wrapper = no double timeout issue!
            loop = asyncio.get_event_loop()
            item = await loop.run_in_executor(
                executor,
                lambda: self.get(block=True, timeout=timeout)
            )

            logger.trace(f"RecheckQueue.async_get() successfully retrieved item: {self._get_item_uuid(item)}")
            return item

        except queue.Empty:
            logger.trace(f"RecheckQueue.async_get() timed out - queue is empty")
            raise
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to async get item from queue: {type(e).__name__}: {str(e)}")
            raise

    # UTILITY METHODS
    def qsize(self) -> int:
        """Get current queue size"""
        try:
            with self._lock:
                return len(self._priority_items)
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
            return 0

    def empty(self) -> bool:
        """Check if queue is empty"""
        return self.qsize() == 0

    def get_queued_uuids(self) -> list:
        """Get list of all queued UUIDs efficiently with single lock"""
        try:
            with self._lock:
                return [item.item['uuid'] for item in self._priority_items if hasattr(item, 'item') and 'uuid' in item.item]
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get queued UUIDs: {str(e)}")
            return []

    def clear(self):
        """Clear all items from both priority storage and notification queue"""
        try:
            with self._lock:
                # Clear priority items
                self._priority_items.clear()

                # Drain all notifications to prevent stale notifications
                # This is critical for test cleanup to prevent queue desynchronization
                drained = 0
                while not self._notification_queue.empty():
                    try:
                        self._notification_queue.get_nowait()
                        drained += 1
                    except queue.Empty:
                        break

                if drained > 0:
                    logger.debug(f"Cleared queue: removed {drained} notifications")

            return True
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to clear queue: {str(e)}")
            return False

    def close(self):
        """Close the queue"""
        try:
            # Nothing to close for threading.Queue
            logger.debug("RecheckPriorityQueue closed successfully")
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")

    # COMPATIBILITY METHODS (from original implementation)
    @property
    def queue(self):
        """Provide compatibility with original queue access"""
        try:
            with self._lock:
                return list(self._priority_items)
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
            return []

    def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
        """Find position of UUID in queue"""
        try:
            with self._lock:
                queue_list = list(self._priority_items)
                total_items = len(queue_list)

                if total_items == 0:
                    return {'position': None, 'total_items': 0, 'priority': None, 'found': False}

                # Find target item
                for item in queue_list:
                    if (hasattr(item, 'item') and isinstance(item.item, dict) and
                        item.item.get('uuid') == target_uuid):

                        # Count items with higher priority
                        position = sum(1 for other in queue_list if other.priority < item.priority)
                        return {
                            'position': position,
                            'total_items': total_items,
                            'priority': item.priority,
                            'found': True
                        }

                return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
            return {'position': None, 'total_items': 0, 'priority': None, 'found': False}

    def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
        """Get all queued UUIDs with pagination"""
        try:
            with self._lock:
                queue_list = sorted(self._priority_items)  # Sort by priority
                total_items = len(queue_list)

                if total_items == 0:
                    return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}

                # Apply pagination
                end_idx = min(offset + limit, total_items) if limit else total_items
                items_to_process = queue_list[offset:end_idx]

                result = []
                for position, item in enumerate(items_to_process, start=offset):
                    if (hasattr(item, 'item') and isinstance(item.item, dict) and
                        'uuid' in item.item):
                        result.append({
                            'uuid': item.item['uuid'],
                            'position': position,
                            'priority': item.priority
                        })

                return {
                    'items': result,
                    'total_items': total_items,
                    'returned_items': len(result),
                    'has_more': (offset + len(result)) < total_items
                }

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
            return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}

    def get_queue_summary(self) -> Dict[str, Any]:
        """Get queue summary statistics"""
        try:
            with self._lock:
                queue_list = list(self._priority_items)
                total_items = len(queue_list)

                if total_items == 0:
                    return {
                        'total_items': 0, 'priority_breakdown': {},
                        'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
                    }

                immediate_items = clone_items = scheduled_items = 0
                priority_counts = {}

                for item in queue_list:
                    priority = item.priority
                    priority_counts[priority] = priority_counts.get(priority, 0) + 1

                    if priority == 1:
                        immediate_items += 1
                    elif priority == 5:
                        clone_items += 1
                    elif priority > 100:
                        scheduled_items += 1

                return {
                    'total_items': total_items,
                    'priority_breakdown': priority_counts,
                    'immediate_items': immediate_items,
                    'clone_items': clone_items,
                    'scheduled_items': scheduled_items,
                    'min_priority': min(priority_counts.keys()) if priority_counts else None,
                    'max_priority': max(priority_counts.keys()) if priority_counts else None
                }

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
            return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
                   'clone_items': 0, 'scheduled_items': 0}

    # PRIVATE METHODS
    def _get_item_uuid(self, item) -> str:
        """Safely extract UUID from item for logging"""
        try:
            if hasattr(item, 'item') and isinstance(item.item, dict):
                return item.item.get('uuid', 'unknown')
        except Exception:
            pass
        return 'unknown'

    def _emit_put_signals(self, item):
        """Emit signals when item is added"""
        try:
            # Watch update signal
            if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
                watch_check_update = signal('watch_check_update')
                if watch_check_update:
                    watch_check_update.send(watch_uuid=item.item['uuid'])

            # Queue length signal
            if self.queue_length_signal:
                self.queue_length_signal.send(length=self.qsize())

        except Exception as e:
            logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")

    def _emit_get_signals(self):
        """Emit signals when item is removed"""
        try:
            if self.queue_length_signal:
                self.queue_length_signal.send(length=self.qsize())
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")


class NotificationQueue:
    """
    Ultra-reliable notification queue using pure janus.

    CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
    - sync_q: Used by Flask routes, ticker threads, and other synchronous code
    - async_q: Used by async workers and coroutines

    DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
    See RecheckPriorityQueue docstring above for detailed explanation.

    Simple wrapper around janus with bulletproof error handling.
    """

    def __init__(self, maxsize: int = 0, datastore=None):
        try:
            # Use pure threading.Queue to avoid event loop binding issues
            self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
            self.notification_event_signal = signal('notification_event')
            self.datastore = datastore  # For checking all_muted setting
            self._lock = threading.RLock()
            logger.debug("NotificationQueue initialized successfully")
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
            raise

    def set_datastore(self, datastore):
        """Set datastore reference after initialization (for circular dependency handling)"""
        self.datastore = datastore

    def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
        """Thread-safe sync put with signal emission"""
        logger.trace(f"NotificationQueue.put() called for item: {item.get('uuid', 'unknown')}, block={block}, timeout={timeout}")
        try:
            # Check if all notifications are muted
            if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
                logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
                return False

            with self._lock:
                self._notification_queue.put(item, block=block, timeout=timeout)
            self._emit_notification_signal(item)
            logger.trace(f"NotificationQueue.put() successfully queued notification: {item.get('uuid', 'unknown')}")
            return True
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
            return False

    async def async_put(self, item: Dict[str, Any], executor=None):
        """Async put with signal emission - uses thread pool

        Args:
            item: Notification item to queue
            executor: Optional ThreadPoolExecutor
        """
        logger.trace(f"NotificationQueue.async_put() called for item: {item.get('uuid', 'unknown')}, executor={executor}")
        import asyncio
        try:
            # Check if all notifications are muted
            if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
                logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
                return False

            loop = asyncio.get_event_loop()
            await loop.run_in_executor(executor, lambda: self.put(item, block=True, timeout=5.0))
            logger.trace(f"NotificationQueue.async_put() successfully queued notification: {item.get('uuid', 'unknown')}")
            return True
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
            return False

    def get(self, block: bool = True, timeout: Optional[float] = None):
        """Thread-safe sync get"""
        logger.trace(f"NotificationQueue.get() called, block={block}, timeout={timeout}")
        try:
            with self._lock:
                item = self._notification_queue.get(block=block, timeout=timeout)
            logger.trace(f"NotificationQueue.get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
            return item
        except queue.Empty as e:
            logger.trace(f"NotificationQueue.get() timed out - queue is empty (timeout={timeout})")
            raise e
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get notification: {type(e).__name__}: {str(e)}")
            raise e

    async def async_get(self, executor=None):
        """Async get - uses thread pool

        Args:
            executor: Optional ThreadPoolExecutor
        """
        logger.trace(f"NotificationQueue.async_get() called, executor={executor}")
        import asyncio
        try:
            loop = asyncio.get_event_loop()
            item = await loop.run_in_executor(executor, lambda: self.get(block=True, timeout=1.0))
            logger.trace(f"NotificationQueue.async_get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
            return item
        except queue.Empty as e:
            logger.trace(f"NotificationQueue.async_get() timed out - queue is empty")
            raise e
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to async get notification: {type(e).__name__}: {str(e)}")
            raise e

    def qsize(self) -> int:
        """Get current queue size"""
        try:
            with self._lock:
                return self._notification_queue.qsize()
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
            return 0

    def empty(self) -> bool:
        """Check if queue is empty"""
        return self.qsize() == 0

    def close(self):
        """Close the queue"""
        try:
            # Nothing to close for threading.Queue
            logger.debug("NotificationQueue closed successfully")
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")

    def _emit_notification_signal(self, item: Dict[str, Any]):
        """Emit notification signal"""
        try:
            if self.notification_event_signal and isinstance(item, dict):
                watch_uuid = item.get('uuid')
                if watch_uuid:
                    self.notification_event_signal.send(watch_uuid=watch_uuid)
                else:
                    self.notification_event_signal.send()
        except Exception as e:
            logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")