270 lines
8.4 KiB
Python
270 lines
8.4 KiB
Python
"""
|
|
Data models for the Indicator system.
|
|
|
|
Defines schemas for input/output specifications, computation context,
|
|
and metadata for AI agent discovery.
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Literal, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from datasource.schema import ColumnInfo
|
|
|
|
|
|
class InputSchema(BaseModel):
|
|
"""
|
|
Declares the required input columns for an Indicator.
|
|
|
|
Indicators match against any data source (DataSource or other Indicator)
|
|
that provides columns satisfying this schema.
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
required_columns: List[ColumnInfo] = Field(
|
|
description="Columns that must be present in the input data"
|
|
)
|
|
optional_columns: List[ColumnInfo] = Field(
|
|
default_factory=list,
|
|
description="Columns that may be used if present but are not required"
|
|
)
|
|
time_column: str = Field(
|
|
default="time",
|
|
description="Name of the timestamp column (must be present)"
|
|
)
|
|
|
|
def matches(self, available_columns: List[ColumnInfo]) -> bool:
|
|
"""
|
|
Check if available columns satisfy this input schema.
|
|
|
|
Args:
|
|
available_columns: Columns provided by a data source
|
|
|
|
Returns:
|
|
True if all required columns are present with compatible types
|
|
"""
|
|
available_map = {col.name: col for col in available_columns}
|
|
|
|
# Check time column exists
|
|
if self.time_column not in available_map:
|
|
return False
|
|
|
|
# Check all required columns exist with compatible types
|
|
for required in self.required_columns:
|
|
if required.name not in available_map:
|
|
return False
|
|
available = available_map[required.name]
|
|
if available.type != required.type:
|
|
return False
|
|
|
|
return True
|
|
|
|
def get_missing_columns(self, available_columns: List[ColumnInfo]) -> List[str]:
|
|
"""
|
|
Get list of missing required column names.
|
|
|
|
Args:
|
|
available_columns: Columns provided by a data source
|
|
|
|
Returns:
|
|
List of missing column names
|
|
"""
|
|
available_names = {col.name for col in available_columns}
|
|
missing = []
|
|
|
|
if self.time_column not in available_names:
|
|
missing.append(self.time_column)
|
|
|
|
for required in self.required_columns:
|
|
if required.name not in available_names:
|
|
missing.append(required.name)
|
|
|
|
return missing
|
|
|
|
|
|
class OutputSchema(BaseModel):
|
|
"""
|
|
Declares the output columns produced by an Indicator.
|
|
|
|
Column names will be automatically prefixed with the indicator instance name
|
|
to avoid collisions in the pipeline.
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
columns: List[ColumnInfo] = Field(
|
|
description="Output columns produced by this indicator"
|
|
)
|
|
time_column: str = Field(
|
|
default="time",
|
|
description="Name of the timestamp column (passed through from input)"
|
|
)
|
|
|
|
def with_prefix(self, prefix: str) -> "OutputSchema":
|
|
"""
|
|
Create a new OutputSchema with all column names prefixed.
|
|
|
|
Args:
|
|
prefix: Prefix to add (e.g., indicator instance name)
|
|
|
|
Returns:
|
|
New OutputSchema with prefixed column names
|
|
"""
|
|
prefixed_columns = [
|
|
ColumnInfo(
|
|
name=f"{prefix}_{col.name}" if col.name != self.time_column else col.name,
|
|
type=col.type,
|
|
description=col.description,
|
|
unit=col.unit,
|
|
nullable=col.nullable
|
|
)
|
|
for col in self.columns
|
|
]
|
|
return OutputSchema(
|
|
columns=prefixed_columns,
|
|
time_column=self.time_column
|
|
)
|
|
|
|
|
|
class IndicatorParameter(BaseModel):
|
|
"""
|
|
Metadata for a configurable indicator parameter.
|
|
|
|
Used for AI agent discovery and dynamic indicator instantiation.
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
name: str = Field(description="Parameter name")
|
|
type: Literal["int", "float", "string", "bool"] = Field(description="Parameter type")
|
|
description: str = Field(description="Human and LLM-readable description")
|
|
default: Optional[Any] = Field(default=None, description="Default value if not specified")
|
|
required: bool = Field(default=False, description="Whether this parameter is required")
|
|
min_value: Optional[float] = Field(default=None, description="Minimum value (for numeric types)")
|
|
max_value: Optional[float] = Field(default=None, description="Maximum value (for numeric types)")
|
|
|
|
|
|
class IndicatorMetadata(BaseModel):
|
|
"""
|
|
Rich metadata for an Indicator class.
|
|
|
|
Enables AI agents to discover, understand, and instantiate indicators.
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
name: str = Field(description="Unique indicator class name (e.g., 'RSI', 'SMA', 'BollingerBands')")
|
|
display_name: str = Field(description="Human-readable display name")
|
|
description: str = Field(description="Detailed description of what this indicator computes and why it's useful")
|
|
category: str = Field(
|
|
description="Indicator category (e.g., 'momentum', 'trend', 'volatility', 'volume', 'custom')"
|
|
)
|
|
parameters: List[IndicatorParameter] = Field(
|
|
default_factory=list,
|
|
description="Configurable parameters for this indicator"
|
|
)
|
|
use_cases: List[str] = Field(
|
|
default_factory=list,
|
|
description="Common use cases and trading scenarios where this indicator is helpful"
|
|
)
|
|
references: List[str] = Field(
|
|
default_factory=list,
|
|
description="URLs or citations for indicator methodology"
|
|
)
|
|
tags: List[str] = Field(
|
|
default_factory=list,
|
|
description="Searchable tags (e.g., 'oscillator', 'mean-reversion', 'price-based')"
|
|
)
|
|
|
|
|
|
class ComputeContext(BaseModel):
|
|
"""
|
|
Context passed to an Indicator's compute() method.
|
|
|
|
Contains the input data and metadata about what changed (for incremental updates).
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
data: List[Dict[str, Any]] = Field(
|
|
description="Input data rows (time-ordered). Each dict is {column_name: value, time: timestamp}"
|
|
)
|
|
is_incremental: bool = Field(
|
|
default=False,
|
|
description="True if this is an incremental update (only new/changed rows), False for full recompute"
|
|
)
|
|
updated_from_time: Optional[int] = Field(
|
|
default=None,
|
|
description="Unix timestamp (ms) of the earliest updated row (for incremental updates)"
|
|
)
|
|
|
|
def get_column(self, name: str) -> List[Any]:
|
|
"""
|
|
Extract a single column as a list of values.
|
|
|
|
Args:
|
|
name: Column name
|
|
|
|
Returns:
|
|
List of values in time order
|
|
"""
|
|
return [row.get(name) for row in self.data]
|
|
|
|
def get_times(self) -> List[int]:
|
|
"""
|
|
Get the time column as a list.
|
|
|
|
Returns:
|
|
List of timestamps in order
|
|
"""
|
|
return [row["time"] for row in self.data]
|
|
|
|
|
|
class ComputeResult(BaseModel):
|
|
"""
|
|
Result from an Indicator's compute() method.
|
|
|
|
Contains the computed output data with proper column naming.
|
|
"""
|
|
|
|
model_config = {"extra": "forbid"}
|
|
|
|
data: List[Dict[str, Any]] = Field(
|
|
description="Output data rows (time-ordered). Must include time column."
|
|
)
|
|
is_partial: bool = Field(
|
|
default=False,
|
|
description="True if this result only contains updates (for incremental computation)"
|
|
)
|
|
|
|
def merge_with_prefix(self, prefix: str, existing_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Merge this result into existing data with column name prefixing.
|
|
|
|
Args:
|
|
prefix: Prefix to add to all column names except time
|
|
existing_data: Existing data to merge with (matched by time)
|
|
|
|
Returns:
|
|
Merged data with prefixed columns added
|
|
"""
|
|
# Build a time index for new data
|
|
time_index = {row["time"]: row for row in self.data}
|
|
|
|
# Merge into existing data
|
|
result = []
|
|
for existing_row in existing_data:
|
|
row_time = existing_row["time"]
|
|
merged_row = existing_row.copy()
|
|
|
|
if row_time in time_index:
|
|
new_row = time_index[row_time]
|
|
for key, value in new_row.items():
|
|
if key != "time":
|
|
merged_row[f"{prefix}_{key}"] = value
|
|
|
|
result.append(merged_row)
|
|
|
|
return result
|