backend redesign
This commit is contained in:
269
backend.old/src/indicator/schema.py
Normal file
269
backend.old/src/indicator/schema.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Data models for the Indicator system.
|
||||
|
||||
Defines schemas for input/output specifications, computation context,
|
||||
and metadata for AI agent discovery.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from datasource.schema import ColumnInfo
|
||||
|
||||
|
||||
class InputSchema(BaseModel):
|
||||
"""
|
||||
Declares the required input columns for an Indicator.
|
||||
|
||||
Indicators match against any data source (DataSource or other Indicator)
|
||||
that provides columns satisfying this schema.
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
required_columns: List[ColumnInfo] = Field(
|
||||
description="Columns that must be present in the input data"
|
||||
)
|
||||
optional_columns: List[ColumnInfo] = Field(
|
||||
default_factory=list,
|
||||
description="Columns that may be used if present but are not required"
|
||||
)
|
||||
time_column: str = Field(
|
||||
default="time",
|
||||
description="Name of the timestamp column (must be present)"
|
||||
)
|
||||
|
||||
def matches(self, available_columns: List[ColumnInfo]) -> bool:
|
||||
"""
|
||||
Check if available columns satisfy this input schema.
|
||||
|
||||
Args:
|
||||
available_columns: Columns provided by a data source
|
||||
|
||||
Returns:
|
||||
True if all required columns are present with compatible types
|
||||
"""
|
||||
available_map = {col.name: col for col in available_columns}
|
||||
|
||||
# Check time column exists
|
||||
if self.time_column not in available_map:
|
||||
return False
|
||||
|
||||
# Check all required columns exist with compatible types
|
||||
for required in self.required_columns:
|
||||
if required.name not in available_map:
|
||||
return False
|
||||
available = available_map[required.name]
|
||||
if available.type != required.type:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_missing_columns(self, available_columns: List[ColumnInfo]) -> List[str]:
|
||||
"""
|
||||
Get list of missing required column names.
|
||||
|
||||
Args:
|
||||
available_columns: Columns provided by a data source
|
||||
|
||||
Returns:
|
||||
List of missing column names
|
||||
"""
|
||||
available_names = {col.name for col in available_columns}
|
||||
missing = []
|
||||
|
||||
if self.time_column not in available_names:
|
||||
missing.append(self.time_column)
|
||||
|
||||
for required in self.required_columns:
|
||||
if required.name not in available_names:
|
||||
missing.append(required.name)
|
||||
|
||||
return missing
|
||||
|
||||
|
||||
class OutputSchema(BaseModel):
|
||||
"""
|
||||
Declares the output columns produced by an Indicator.
|
||||
|
||||
Column names will be automatically prefixed with the indicator instance name
|
||||
to avoid collisions in the pipeline.
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
columns: List[ColumnInfo] = Field(
|
||||
description="Output columns produced by this indicator"
|
||||
)
|
||||
time_column: str = Field(
|
||||
default="time",
|
||||
description="Name of the timestamp column (passed through from input)"
|
||||
)
|
||||
|
||||
def with_prefix(self, prefix: str) -> "OutputSchema":
|
||||
"""
|
||||
Create a new OutputSchema with all column names prefixed.
|
||||
|
||||
Args:
|
||||
prefix: Prefix to add (e.g., indicator instance name)
|
||||
|
||||
Returns:
|
||||
New OutputSchema with prefixed column names
|
||||
"""
|
||||
prefixed_columns = [
|
||||
ColumnInfo(
|
||||
name=f"{prefix}_{col.name}" if col.name != self.time_column else col.name,
|
||||
type=col.type,
|
||||
description=col.description,
|
||||
unit=col.unit,
|
||||
nullable=col.nullable
|
||||
)
|
||||
for col in self.columns
|
||||
]
|
||||
return OutputSchema(
|
||||
columns=prefixed_columns,
|
||||
time_column=self.time_column
|
||||
)
|
||||
|
||||
|
||||
class IndicatorParameter(BaseModel):
|
||||
"""
|
||||
Metadata for a configurable indicator parameter.
|
||||
|
||||
Used for AI agent discovery and dynamic indicator instantiation.
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
name: str = Field(description="Parameter name")
|
||||
type: Literal["int", "float", "string", "bool"] = Field(description="Parameter type")
|
||||
description: str = Field(description="Human and LLM-readable description")
|
||||
default: Optional[Any] = Field(default=None, description="Default value if not specified")
|
||||
required: bool = Field(default=False, description="Whether this parameter is required")
|
||||
min_value: Optional[float] = Field(default=None, description="Minimum value (for numeric types)")
|
||||
max_value: Optional[float] = Field(default=None, description="Maximum value (for numeric types)")
|
||||
|
||||
|
||||
class IndicatorMetadata(BaseModel):
|
||||
"""
|
||||
Rich metadata for an Indicator class.
|
||||
|
||||
Enables AI agents to discover, understand, and instantiate indicators.
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
name: str = Field(description="Unique indicator class name (e.g., 'RSI', 'SMA', 'BollingerBands')")
|
||||
display_name: str = Field(description="Human-readable display name")
|
||||
description: str = Field(description="Detailed description of what this indicator computes and why it's useful")
|
||||
category: str = Field(
|
||||
description="Indicator category (e.g., 'momentum', 'trend', 'volatility', 'volume', 'custom')"
|
||||
)
|
||||
parameters: List[IndicatorParameter] = Field(
|
||||
default_factory=list,
|
||||
description="Configurable parameters for this indicator"
|
||||
)
|
||||
use_cases: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Common use cases and trading scenarios where this indicator is helpful"
|
||||
)
|
||||
references: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="URLs or citations for indicator methodology"
|
||||
)
|
||||
tags: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Searchable tags (e.g., 'oscillator', 'mean-reversion', 'price-based')"
|
||||
)
|
||||
|
||||
|
||||
class ComputeContext(BaseModel):
|
||||
"""
|
||||
Context passed to an Indicator's compute() method.
|
||||
|
||||
Contains the input data and metadata about what changed (for incremental updates).
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
data: List[Dict[str, Any]] = Field(
|
||||
description="Input data rows (time-ordered). Each dict is {column_name: value, time: timestamp}"
|
||||
)
|
||||
is_incremental: bool = Field(
|
||||
default=False,
|
||||
description="True if this is an incremental update (only new/changed rows), False for full recompute"
|
||||
)
|
||||
updated_from_time: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Unix timestamp (ms) of the earliest updated row (for incremental updates)"
|
||||
)
|
||||
|
||||
def get_column(self, name: str) -> List[Any]:
|
||||
"""
|
||||
Extract a single column as a list of values.
|
||||
|
||||
Args:
|
||||
name: Column name
|
||||
|
||||
Returns:
|
||||
List of values in time order
|
||||
"""
|
||||
return [row.get(name) for row in self.data]
|
||||
|
||||
def get_times(self) -> List[int]:
|
||||
"""
|
||||
Get the time column as a list.
|
||||
|
||||
Returns:
|
||||
List of timestamps in order
|
||||
"""
|
||||
return [row["time"] for row in self.data]
|
||||
|
||||
|
||||
class ComputeResult(BaseModel):
|
||||
"""
|
||||
Result from an Indicator's compute() method.
|
||||
|
||||
Contains the computed output data with proper column naming.
|
||||
"""
|
||||
|
||||
model_config = {"extra": "forbid"}
|
||||
|
||||
data: List[Dict[str, Any]] = Field(
|
||||
description="Output data rows (time-ordered). Must include time column."
|
||||
)
|
||||
is_partial: bool = Field(
|
||||
default=False,
|
||||
description="True if this result only contains updates (for incremental computation)"
|
||||
)
|
||||
|
||||
def merge_with_prefix(self, prefix: str, existing_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Merge this result into existing data with column name prefixing.
|
||||
|
||||
Args:
|
||||
prefix: Prefix to add to all column names except time
|
||||
existing_data: Existing data to merge with (matched by time)
|
||||
|
||||
Returns:
|
||||
Merged data with prefixed columns added
|
||||
"""
|
||||
# Build a time index for new data
|
||||
time_index = {row["time"]: row for row in self.data}
|
||||
|
||||
# Merge into existing data
|
||||
result = []
|
||||
for existing_row in existing_data:
|
||||
row_time = existing_row["time"]
|
||||
merged_row = existing_row.copy()
|
||||
|
||||
if row_time in time_index:
|
||||
new_row = time_index[row_time]
|
||||
for key, value in new_row.items():
|
||||
if key != "time":
|
||||
merged_row[f"{prefix}_{key}"] = value
|
||||
|
||||
result.append(merged_row)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user