backend redesign

2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions
--- a/backend.old/src/indicator/schema.py
+++ b/backend.old/src/indicator/schema.py
@@ -0,0 +1,269 @@
+"""
+Data models for the Indicator system.
+
+Defines schemas for input/output specifications, computation context,
+and metadata for AI agent discovery.
+"""
+
+from typing import Any, Dict, List, Literal, Optional
+
+from pydantic import BaseModel, Field
+
+from datasource.schema import ColumnInfo
+
+
+class InputSchema(BaseModel):
+    """
+    Declares the required input columns for an Indicator.
+
+    Indicators match against any data source (DataSource or other Indicator)
+    that provides columns satisfying this schema.
+    """
+
+    model_config = {"extra": "forbid"}
+
+    required_columns: List[ColumnInfo] = Field(
+        description="Columns that must be present in the input data"
+    )
+    optional_columns: List[ColumnInfo] = Field(
+        default_factory=list,
+        description="Columns that may be used if present but are not required"
+    )
+    time_column: str = Field(
+        default="time",
+        description="Name of the timestamp column (must be present)"
+    )
+
+    def matches(self, available_columns: List[ColumnInfo]) -> bool:
+        """
+        Check if available columns satisfy this input schema.
+
+        Args:
+            available_columns: Columns provided by a data source
+
+        Returns:
+            True if all required columns are present with compatible types
+        """
+        available_map = {col.name: col for col in available_columns}
+
+        # Check time column exists
+        if self.time_column not in available_map:
+            return False
+
+        # Check all required columns exist with compatible types
+        for required in self.required_columns:
+            if required.name not in available_map:
+                return False
+            available = available_map[required.name]
+            if available.type != required.type:
+                return False
+
+        return True
+
+    def get_missing_columns(self, available_columns: List[ColumnInfo]) -> List[str]:
+        """
+        Get list of missing required column names.
+
+        Args:
+            available_columns: Columns provided by a data source
+
+        Returns:
+            List of missing column names
+        """
+        available_names = {col.name for col in available_columns}
+        missing = []
+
+        if self.time_column not in available_names:
+            missing.append(self.time_column)
+
+        for required in self.required_columns:
+            if required.name not in available_names:
+                missing.append(required.name)
+
+        return missing
+
+
+class OutputSchema(BaseModel):
+    """
+    Declares the output columns produced by an Indicator.
+
+    Column names will be automatically prefixed with the indicator instance name
+    to avoid collisions in the pipeline.
+    """
+
+    model_config = {"extra": "forbid"}
+
+    columns: List[ColumnInfo] = Field(
+        description="Output columns produced by this indicator"
+    )
+    time_column: str = Field(
+        default="time",
+        description="Name of the timestamp column (passed through from input)"
+    )
+
+    def with_prefix(self, prefix: str) -> "OutputSchema":
+        """
+        Create a new OutputSchema with all column names prefixed.
+
+        Args:
+            prefix: Prefix to add (e.g., indicator instance name)
+
+        Returns:
+            New OutputSchema with prefixed column names
+        """
+        prefixed_columns = [
+            ColumnInfo(
+                name=f"{prefix}_{col.name}" if col.name != self.time_column else col.name,
+                type=col.type,
+                description=col.description,
+                unit=col.unit,
+                nullable=col.nullable
+            )
+            for col in self.columns
+        ]
+        return OutputSchema(
+            columns=prefixed_columns,
+            time_column=self.time_column
+        )
+
+
+class IndicatorParameter(BaseModel):
+    """
+    Metadata for a configurable indicator parameter.
+
+    Used for AI agent discovery and dynamic indicator instantiation.
+    """
+
+    model_config = {"extra": "forbid"}
+
+    name: str = Field(description="Parameter name")
+    type: Literal["int", "float", "string", "bool"] = Field(description="Parameter type")
+    description: str = Field(description="Human and LLM-readable description")
+    default: Optional[Any] = Field(default=None, description="Default value if not specified")
+    required: bool = Field(default=False, description="Whether this parameter is required")
+    min_value: Optional[float] = Field(default=None, description="Minimum value (for numeric types)")
+    max_value: Optional[float] = Field(default=None, description="Maximum value (for numeric types)")
+
+
+class IndicatorMetadata(BaseModel):
+    """
+    Rich metadata for an Indicator class.
+
+    Enables AI agents to discover, understand, and instantiate indicators.
+    """
+
+    model_config = {"extra": "forbid"}
+
+    name: str = Field(description="Unique indicator class name (e.g., 'RSI', 'SMA', 'BollingerBands')")
+    display_name: str = Field(description="Human-readable display name")
+    description: str = Field(description="Detailed description of what this indicator computes and why it's useful")
+    category: str = Field(
+        description="Indicator category (e.g., 'momentum', 'trend', 'volatility', 'volume', 'custom')"
+    )
+    parameters: List[IndicatorParameter] = Field(
+        default_factory=list,
+        description="Configurable parameters for this indicator"
+    )
+    use_cases: List[str] = Field(
+        default_factory=list,
+        description="Common use cases and trading scenarios where this indicator is helpful"
+    )
+    references: List[str] = Field(
+        default_factory=list,
+        description="URLs or citations for indicator methodology"
+    )
+    tags: List[str] = Field(
+        default_factory=list,
+        description="Searchable tags (e.g., 'oscillator', 'mean-reversion', 'price-based')"
+    )
+
+
+class ComputeContext(BaseModel):
+    """
+    Context passed to an Indicator's compute() method.
+
+    Contains the input data and metadata about what changed (for incremental updates).
+    """
+
+    model_config = {"extra": "forbid"}
+
+    data: List[Dict[str, Any]] = Field(
+        description="Input data rows (time-ordered). Each dict is {column_name: value, time: timestamp}"
+    )
+    is_incremental: bool = Field(
+        default=False,
+        description="True if this is an incremental update (only new/changed rows), False for full recompute"
+    )
+    updated_from_time: Optional[int] = Field(
+        default=None,
+        description="Unix timestamp (ms) of the earliest updated row (for incremental updates)"
+    )
+
+    def get_column(self, name: str) -> List[Any]:
+        """
+        Extract a single column as a list of values.
+
+        Args:
+            name: Column name
+
+        Returns:
+            List of values in time order
+        """
+        return [row.get(name) for row in self.data]
+
+    def get_times(self) -> List[int]:
+        """
+        Get the time column as a list.
+
+        Returns:
+            List of timestamps in order
+        """
+        return [row["time"] for row in self.data]
+
+
+class ComputeResult(BaseModel):
+    """
+    Result from an Indicator's compute() method.
+
+    Contains the computed output data with proper column naming.
+    """
+
+    model_config = {"extra": "forbid"}
+
+    data: List[Dict[str, Any]] = Field(
+        description="Output data rows (time-ordered). Must include time column."
+    )
+    is_partial: bool = Field(
+        default=False,
+        description="True if this result only contains updates (for incremental computation)"
+    )
+
+    def merge_with_prefix(self, prefix: str, existing_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Merge this result into existing data with column name prefixing.
+
+        Args:
+            prefix: Prefix to add to all column names except time
+            existing_data: Existing data to merge with (matched by time)
+
+        Returns:
+            Merged data with prefixed columns added
+        """
+        # Build a time index for new data
+        time_index = {row["time"]: row for row in self.data}
+
+        # Merge into existing data
+        result = []
+        for existing_row in existing_data:
+            row_time = existing_row["time"]
+            merged_row = existing_row.copy()
+
+            if row_time in time_index:
+                new_row = time_index[row_time]
+                for key, value in new_row.items():
+                    if key != "time":
+                        merged_row[f"{prefix}_{key}"] = value
+
+            result.append(merged_row)
+
+        return result