Cluster/core/nodes/preprocess_node.py

"""
Preprocessing node implementation for data transformation operations.

This module provides the PreprocessNode class which handles data preprocessing
operations in the pipeline, including image resizing, normalization, cropping,
and other transformation operations.

Main Components:
    - PreprocessNode: Core preprocessing node implementation
    - Image and data transformation operations
    - Preprocessing configuration and validation

Usage:
    from cluster4npu_ui.core.nodes.preprocess_node import PreprocessNode

    node = PreprocessNode()
    node.set_property('resize_width', 640)
    node.set_property('resize_height', 480)
"""

from .base_node import BaseNodeWithProperties


class PreprocessNode(BaseNodeWithProperties):
    """
    Preprocessing node for data transformation operations.

    This node handles various preprocessing operations including image resizing,
    normalization, cropping, and other transformations required before model inference.
    """

    __identifier__ = 'com.cluster.preprocess_node'
    NODE_NAME = 'Preprocess Node'

    def __init__(self):
        super().__init__()

        # Setup node connections
        self.add_input('input', multi_input=False, color=(255, 140, 0))
        self.add_output('output', color=(0, 255, 0))
        self.set_color(45, 126, 72)

        # Initialize properties
        self.setup_properties()

    def setup_properties(self):
        """Initialize preprocessing-specific properties."""
        # Image resizing
        self.create_business_property('resize_width', 640, {
            'min': 64,
            'max': 4096,
            'description': 'Target width for image resizing'
        })

        self.create_business_property('resize_height', 480, {
            'min': 64,
            'max': 4096,
            'description': 'Target height for image resizing'
        })

        self.create_business_property('maintain_aspect_ratio', True, {
            'description': 'Maintain aspect ratio during resizing'
        })

        # Normalization
        self.create_business_property('normalize', True, {
            'description': 'Apply normalization to input data'
        })

        self.create_business_property('normalization_type', 'zero_one', [
            'zero_one',      # [0, 1]
            'neg_one_one',   # [-1, 1]
            'imagenet',      # ImageNet mean/std
            'custom'         # Custom mean/std
        ])

        self.create_business_property('custom_mean', '0.485,0.456,0.406', {
            'placeholder': 'comma-separated values for RGB channels',
            'description': 'Custom normalization mean values'
        })

        self.create_business_property('custom_std', '0.229,0.224,0.225', {
            'placeholder': 'comma-separated values for RGB channels',
            'description': 'Custom normalization std values'
        })

        # Cropping
        self.create_business_property('crop_enabled', False, {
            'description': 'Enable image cropping'
        })

        self.create_business_property('crop_type', 'center', [
            'center',      # Center crop
            'random',      # Random crop
            'custom'       # Custom coordinates
        ])

        self.create_business_property('crop_width', 224, {
            'min': 32,
            'max': 2048,
            'description': 'Crop width in pixels'
        })

        self.create_business_property('crop_height', 224, {
            'min': 32,
            'max': 2048,
            'description': 'Crop height in pixels'
        })

        # Color space conversion
        self.create_business_property('color_space', 'RGB', [
            'RGB', 'BGR', 'HSV', 'LAB', 'YUV', 'GRAY'
        ])

        # Operations chain
        self.create_business_property('operations', 'resize,normalize', {
            'placeholder': 'comma-separated: resize,normalize,crop,flip,rotate',
            'description': 'Ordered list of preprocessing operations'
        })

        # Advanced options
        self.create_business_property('enable_augmentation', False, {
            'description': 'Enable data augmentation during preprocessing'
        })

        self.create_business_property('interpolation_method', 'bilinear', [
            'nearest', 'bilinear', 'bicubic', 'lanczos'
        ])

    def validate_configuration(self) -> tuple[bool, str]:
        """
        Validate the current node configuration.

        Returns:
            Tuple of (is_valid, error_message)
        """
        # Check resize dimensions
        resize_width = self.get_property('resize_width')
        resize_height = self.get_property('resize_height')

        if not isinstance(resize_width, int) or resize_width < 64:
            return False, "Resize width must be at least 64 pixels"

        if not isinstance(resize_height, int) or resize_height < 64:
            return False, "Resize height must be at least 64 pixels"

        # Check crop dimensions if cropping is enabled
        if self.get_property('crop_enabled'):
            crop_width = self.get_property('crop_width')
            crop_height = self.get_property('crop_height')

            if crop_width > resize_width or crop_height > resize_height:
                return False, "Crop dimensions cannot exceed resize dimensions"

        # Validate operations string
        operations = self.get_property('operations')
        valid_operations = ['resize', 'normalize', 'crop', 'flip', 'rotate', 'blur', 'sharpen']

        if operations:
            ops_list = [op.strip() for op in operations.split(',')]
            invalid_ops = [op for op in ops_list if op not in valid_operations]
            if invalid_ops:
                return False, f"Invalid operations: {', '.join(invalid_ops)}"

        return True, ""

    def get_preprocessing_config(self) -> dict:
        """
        Get preprocessing configuration for pipeline execution.

        Returns:
            Dictionary containing preprocessing configuration
        """
        return {
            'node_id': self.id,
            'node_name': self.name(),
            'resize_width': self.get_property('resize_width'),
            'resize_height': self.get_property('resize_height'),
            'maintain_aspect_ratio': self.get_property('maintain_aspect_ratio'),
            'normalize': self.get_property('normalize'),
            'normalization_type': self.get_property('normalization_type'),
            'custom_mean': self._parse_float_list(self.get_property('custom_mean')),
            'custom_std': self._parse_float_list(self.get_property('custom_std')),
            'crop_enabled': self.get_property('crop_enabled'),
            'crop_type': self.get_property('crop_type'),
            'crop_width': self.get_property('crop_width'),
            'crop_height': self.get_property('crop_height'),
            'color_space': self.get_property('color_space'),
            'operations': self._parse_operations_list(self.get_property('operations')),
            'enable_augmentation': self.get_property('enable_augmentation'),
            'interpolation_method': self.get_property('interpolation_method')
        }

    def _parse_float_list(self, value_str: str) -> list[float]:
        """Parse comma-separated float values."""
        try:
            return [float(x.strip()) for x in value_str.split(',') if x.strip()]
        except (ValueError, AttributeError):
            return []

    def _parse_operations_list(self, operations_str: str) -> list[str]:
        """Parse comma-separated operations list."""
        if not operations_str:
            return []
        return [op.strip() for op in operations_str.split(',') if op.strip()]

    def get_estimated_processing_time(self, input_size: tuple = None) -> float:
        """
        Estimate processing time for given input size.

        Args:
            input_size: Tuple of (width, height) for input image

        Returns:
            Estimated processing time in milliseconds
        """
        if input_size is None:
            input_size = (1920, 1080)  # Default HD resolution

        width, height = input_size
        pixel_count = width * height

        # Base processing time (ms per megapixel)
        base_time = 5.0

        # Operation-specific time factors
        operations = self._parse_operations_list(self.get_property('operations'))
        operation_factors = {
            'resize': 1.0,
            'normalize': 0.5,
            'crop': 0.2,
            'flip': 0.1,
            'rotate': 1.5,
            'blur': 2.0,
            'sharpen': 2.0
        }

        total_factor = sum(operation_factors.get(op, 1.0) for op in operations)

        return (pixel_count / 1000000) * base_time * total_factor