""" Data models for the Indicator system. Defines schemas for input/output specifications, computation context, and metadata for AI agent discovery. """ from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field from datasource.schema import ColumnInfo class InputSchema(BaseModel): """ Declares the required input columns for an Indicator. Indicators match against any data source (DataSource or other Indicator) that provides columns satisfying this schema. """ model_config = {"extra": "forbid"} required_columns: List[ColumnInfo] = Field( description="Columns that must be present in the input data" ) optional_columns: List[ColumnInfo] = Field( default_factory=list, description="Columns that may be used if present but are not required" ) time_column: str = Field( default="time", description="Name of the timestamp column (must be present)" ) def matches(self, available_columns: List[ColumnInfo]) -> bool: """ Check if available columns satisfy this input schema. Args: available_columns: Columns provided by a data source Returns: True if all required columns are present with compatible types """ available_map = {col.name: col for col in available_columns} # Check time column exists if self.time_column not in available_map: return False # Check all required columns exist with compatible types for required in self.required_columns: if required.name not in available_map: return False available = available_map[required.name] if available.type != required.type: return False return True def get_missing_columns(self, available_columns: List[ColumnInfo]) -> List[str]: """ Get list of missing required column names. Args: available_columns: Columns provided by a data source Returns: List of missing column names """ available_names = {col.name for col in available_columns} missing = [] if self.time_column not in available_names: missing.append(self.time_column) for required in self.required_columns: if required.name not in available_names: missing.append(required.name) return missing class OutputSchema(BaseModel): """ Declares the output columns produced by an Indicator. Column names will be automatically prefixed with the indicator instance name to avoid collisions in the pipeline. """ model_config = {"extra": "forbid"} columns: List[ColumnInfo] = Field( description="Output columns produced by this indicator" ) time_column: str = Field( default="time", description="Name of the timestamp column (passed through from input)" ) def with_prefix(self, prefix: str) -> "OutputSchema": """ Create a new OutputSchema with all column names prefixed. Args: prefix: Prefix to add (e.g., indicator instance name) Returns: New OutputSchema with prefixed column names """ prefixed_columns = [ ColumnInfo( name=f"{prefix}_{col.name}" if col.name != self.time_column else col.name, type=col.type, description=col.description, unit=col.unit, nullable=col.nullable ) for col in self.columns ] return OutputSchema( columns=prefixed_columns, time_column=self.time_column ) class IndicatorParameter(BaseModel): """ Metadata for a configurable indicator parameter. Used for AI agent discovery and dynamic indicator instantiation. """ model_config = {"extra": "forbid"} name: str = Field(description="Parameter name") type: Literal["int", "float", "string", "bool"] = Field(description="Parameter type") description: str = Field(description="Human and LLM-readable description") default: Optional[Any] = Field(default=None, description="Default value if not specified") required: bool = Field(default=False, description="Whether this parameter is required") min_value: Optional[float] = Field(default=None, description="Minimum value (for numeric types)") max_value: Optional[float] = Field(default=None, description="Maximum value (for numeric types)") class IndicatorMetadata(BaseModel): """ Rich metadata for an Indicator class. Enables AI agents to discover, understand, and instantiate indicators. """ model_config = {"extra": "forbid"} name: str = Field(description="Unique indicator class name (e.g., 'RSI', 'SMA', 'BollingerBands')") display_name: str = Field(description="Human-readable display name") description: str = Field(description="Detailed description of what this indicator computes and why it's useful") category: str = Field( description="Indicator category (e.g., 'momentum', 'trend', 'volatility', 'volume', 'custom')" ) parameters: List[IndicatorParameter] = Field( default_factory=list, description="Configurable parameters for this indicator" ) use_cases: List[str] = Field( default_factory=list, description="Common use cases and trading scenarios where this indicator is helpful" ) references: List[str] = Field( default_factory=list, description="URLs or citations for indicator methodology" ) tags: List[str] = Field( default_factory=list, description="Searchable tags (e.g., 'oscillator', 'mean-reversion', 'price-based')" ) class ComputeContext(BaseModel): """ Context passed to an Indicator's compute() method. Contains the input data and metadata about what changed (for incremental updates). """ model_config = {"extra": "forbid"} data: List[Dict[str, Any]] = Field( description="Input data rows (time-ordered). Each dict is {column_name: value, time: timestamp}" ) is_incremental: bool = Field( default=False, description="True if this is an incremental update (only new/changed rows), False for full recompute" ) updated_from_time: Optional[int] = Field( default=None, description="Unix timestamp (ms) of the earliest updated row (for incremental updates)" ) def get_column(self, name: str) -> List[Any]: """ Extract a single column as a list of values. Args: name: Column name Returns: List of values in time order """ return [row.get(name) for row in self.data] def get_times(self) -> List[int]: """ Get the time column as a list. Returns: List of timestamps in order """ return [row["time"] for row in self.data] class ComputeResult(BaseModel): """ Result from an Indicator's compute() method. Contains the computed output data with proper column naming. """ model_config = {"extra": "forbid"} data: List[Dict[str, Any]] = Field( description="Output data rows (time-ordered). Must include time column." ) is_partial: bool = Field( default=False, description="True if this result only contains updates (for incremental computation)" ) def merge_with_prefix(self, prefix: str, existing_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Merge this result into existing data with column name prefixing. Args: prefix: Prefix to add to all column names except time existing_data: Existing data to merge with (matched by time) Returns: Merged data with prefixed columns added """ # Build a time index for new data time_index = {row["time"]: row for row in self.data} # Merge into existing data result = [] for existing_row in existing_data: row_time = existing_row["time"] merged_row = existing_row.copy() if row_time in time_index: new_row = time_index[row_time] for key, value in new_row.items(): if key != "time": merged_row[f"{prefix}_{key}"] = value result.append(merged_row) return result