backend redesign

This commit is contained in:
2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions

View File

@@ -0,0 +1,269 @@
"""
Data models for the Indicator system.
Defines schemas for input/output specifications, computation context,
and metadata for AI agent discovery.
"""
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
from datasource.schema import ColumnInfo
class InputSchema(BaseModel):
"""
Declares the required input columns for an Indicator.
Indicators match against any data source (DataSource or other Indicator)
that provides columns satisfying this schema.
"""
model_config = {"extra": "forbid"}
required_columns: List[ColumnInfo] = Field(
description="Columns that must be present in the input data"
)
optional_columns: List[ColumnInfo] = Field(
default_factory=list,
description="Columns that may be used if present but are not required"
)
time_column: str = Field(
default="time",
description="Name of the timestamp column (must be present)"
)
def matches(self, available_columns: List[ColumnInfo]) -> bool:
"""
Check if available columns satisfy this input schema.
Args:
available_columns: Columns provided by a data source
Returns:
True if all required columns are present with compatible types
"""
available_map = {col.name: col for col in available_columns}
# Check time column exists
if self.time_column not in available_map:
return False
# Check all required columns exist with compatible types
for required in self.required_columns:
if required.name not in available_map:
return False
available = available_map[required.name]
if available.type != required.type:
return False
return True
def get_missing_columns(self, available_columns: List[ColumnInfo]) -> List[str]:
"""
Get list of missing required column names.
Args:
available_columns: Columns provided by a data source
Returns:
List of missing column names
"""
available_names = {col.name for col in available_columns}
missing = []
if self.time_column not in available_names:
missing.append(self.time_column)
for required in self.required_columns:
if required.name not in available_names:
missing.append(required.name)
return missing
class OutputSchema(BaseModel):
"""
Declares the output columns produced by an Indicator.
Column names will be automatically prefixed with the indicator instance name
to avoid collisions in the pipeline.
"""
model_config = {"extra": "forbid"}
columns: List[ColumnInfo] = Field(
description="Output columns produced by this indicator"
)
time_column: str = Field(
default="time",
description="Name of the timestamp column (passed through from input)"
)
def with_prefix(self, prefix: str) -> "OutputSchema":
"""
Create a new OutputSchema with all column names prefixed.
Args:
prefix: Prefix to add (e.g., indicator instance name)
Returns:
New OutputSchema with prefixed column names
"""
prefixed_columns = [
ColumnInfo(
name=f"{prefix}_{col.name}" if col.name != self.time_column else col.name,
type=col.type,
description=col.description,
unit=col.unit,
nullable=col.nullable
)
for col in self.columns
]
return OutputSchema(
columns=prefixed_columns,
time_column=self.time_column
)
class IndicatorParameter(BaseModel):
"""
Metadata for a configurable indicator parameter.
Used for AI agent discovery and dynamic indicator instantiation.
"""
model_config = {"extra": "forbid"}
name: str = Field(description="Parameter name")
type: Literal["int", "float", "string", "bool"] = Field(description="Parameter type")
description: str = Field(description="Human and LLM-readable description")
default: Optional[Any] = Field(default=None, description="Default value if not specified")
required: bool = Field(default=False, description="Whether this parameter is required")
min_value: Optional[float] = Field(default=None, description="Minimum value (for numeric types)")
max_value: Optional[float] = Field(default=None, description="Maximum value (for numeric types)")
class IndicatorMetadata(BaseModel):
"""
Rich metadata for an Indicator class.
Enables AI agents to discover, understand, and instantiate indicators.
"""
model_config = {"extra": "forbid"}
name: str = Field(description="Unique indicator class name (e.g., 'RSI', 'SMA', 'BollingerBands')")
display_name: str = Field(description="Human-readable display name")
description: str = Field(description="Detailed description of what this indicator computes and why it's useful")
category: str = Field(
description="Indicator category (e.g., 'momentum', 'trend', 'volatility', 'volume', 'custom')"
)
parameters: List[IndicatorParameter] = Field(
default_factory=list,
description="Configurable parameters for this indicator"
)
use_cases: List[str] = Field(
default_factory=list,
description="Common use cases and trading scenarios where this indicator is helpful"
)
references: List[str] = Field(
default_factory=list,
description="URLs or citations for indicator methodology"
)
tags: List[str] = Field(
default_factory=list,
description="Searchable tags (e.g., 'oscillator', 'mean-reversion', 'price-based')"
)
class ComputeContext(BaseModel):
"""
Context passed to an Indicator's compute() method.
Contains the input data and metadata about what changed (for incremental updates).
"""
model_config = {"extra": "forbid"}
data: List[Dict[str, Any]] = Field(
description="Input data rows (time-ordered). Each dict is {column_name: value, time: timestamp}"
)
is_incremental: bool = Field(
default=False,
description="True if this is an incremental update (only new/changed rows), False for full recompute"
)
updated_from_time: Optional[int] = Field(
default=None,
description="Unix timestamp (ms) of the earliest updated row (for incremental updates)"
)
def get_column(self, name: str) -> List[Any]:
"""
Extract a single column as a list of values.
Args:
name: Column name
Returns:
List of values in time order
"""
return [row.get(name) for row in self.data]
def get_times(self) -> List[int]:
"""
Get the time column as a list.
Returns:
List of timestamps in order
"""
return [row["time"] for row in self.data]
class ComputeResult(BaseModel):
"""
Result from an Indicator's compute() method.
Contains the computed output data with proper column naming.
"""
model_config = {"extra": "forbid"}
data: List[Dict[str, Any]] = Field(
description="Output data rows (time-ordered). Must include time column."
)
is_partial: bool = Field(
default=False,
description="True if this result only contains updates (for incremental computation)"
)
def merge_with_prefix(self, prefix: str, existing_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Merge this result into existing data with column name prefixing.
Args:
prefix: Prefix to add to all column names except time
existing_data: Existing data to merge with (matched by time)
Returns:
Merged data with prefixed columns added
"""
# Build a time index for new data
time_index = {row["time"]: row for row in self.data}
# Merge into existing data
result = []
for existing_row in existing_data:
row_time = existing_row["time"]
merged_row = existing_row.copy()
if row_time in time_index:
new_row = time_index[row_time]
for key, value in new_row.items():
if key != "time":
merged_row[f"{prefix}_{key}"] = value
result.append(merged_row)
return result