xml-pipeline/xml_pipeline/config/split_loader.py
dullfig e653d63bc1 Rename agentserver to xml_pipeline, add console example
OSS restructuring for open-core model:
- Rename package from agentserver/ to xml_pipeline/
- Update all imports (44 Python files, 31 docs/configs)
- Update pyproject.toml for OSS distribution (v0.3.0)
- Move prompt_toolkit from core to optional [console] extra
- Remove auth/server/lsp from core optional deps (-> Nextra)

New console example in examples/console/:
- Self-contained demo with handlers and config
- Uses prompt_toolkit (optional, falls back to input())
- No password auth, no TUI, no LSP — just the basics
- Shows how to use xml-pipeline as a library

Import changes:
- from agentserver.* -> from xml_pipeline.*
- CLI entry points updated: xml_pipeline.cli:main

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 21:41:19 -08:00

322 lines
8.7 KiB
Python

"""
Split configuration loader.
Loads organism configuration from multiple files:
- organism.yaml: Core settings (name, port, llm backends)
- listeners/*.yaml: Per-listener configurations
This enables:
- Cleaner separation of concerns
- Per-listener LSP-assisted editing
- Modular configuration management
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
import glob as glob_module
import yaml
from .listeners.store import ListenerConfigStore, LISTENERS_DIR
class SplitConfigError(Exception):
"""Configuration loading/validation error."""
pass
@dataclass
class OrganismCoreConfig:
"""
Core organism configuration (from organism.yaml).
Does not include listener details - those come from split files.
"""
name: str
port: int = 8765
version: str = "0.1.0"
description: str = ""
# Thread scheduling
thread_scheduling: str = "breadth-first"
# Concurrency limits
max_concurrent_pipelines: int = 100
max_concurrent_handlers: int = 50
max_concurrent_per_agent: int = 5
# Listeners directory configuration
listeners_directory: Optional[str] = None
listeners_include: list[str] = field(default_factory=lambda: ["*.yaml"])
# LLM configuration (kept in organism.yaml)
llm: dict[str, Any] = field(default_factory=dict)
# Server configuration
server: dict[str, Any] = field(default_factory=dict)
# Auth configuration
auth: dict[str, Any] = field(default_factory=dict)
# Meta configuration
meta: dict[str, Any] = field(default_factory=dict)
@dataclass
class SplitOrganismConfig:
"""
Complete organism configuration assembled from split files.
"""
# Core config from organism.yaml
core: OrganismCoreConfig
# Listener configs (from split files or inlined)
listeners: list[dict[str, Any]] = field(default_factory=list)
# Source paths for debugging
organism_path: Optional[Path] = None
listener_paths: list[Path] = field(default_factory=list)
def load_organism_yaml(path: Path) -> tuple[dict[str, Any], OrganismCoreConfig]:
"""
Load organism.yaml and extract core config.
Returns (raw_data, core_config) tuple.
"""
with open(path) as f:
raw = yaml.safe_load(f)
if not isinstance(raw, dict):
raise SplitConfigError(f"Config must be a YAML mapping, got {type(raw)}")
# Extract organism section
org_raw = raw.get("organism", {})
if not org_raw.get("name"):
raise SplitConfigError("organism.name is required")
# Extract listeners config
listeners_raw = raw.get("listeners", {})
if isinstance(listeners_raw, dict):
# New split format: listeners: { directory: ..., include: [...] }
listeners_dir = listeners_raw.get("directory")
listeners_include = listeners_raw.get("include", ["*.yaml"])
else:
# Legacy format: listeners is a list - no split loading
listeners_dir = None
listeners_include = ["*.yaml"]
core = OrganismCoreConfig(
name=org_raw["name"],
port=org_raw.get("port", 8765),
version=org_raw.get("version", "0.1.0"),
description=org_raw.get("description", ""),
thread_scheduling=org_raw.get("thread_scheduling", "breadth-first"),
max_concurrent_pipelines=org_raw.get("max_concurrent_pipelines", 100),
max_concurrent_handlers=org_raw.get("max_concurrent_handlers", 50),
max_concurrent_per_agent=org_raw.get("max_concurrent_per_agent", 5),
listeners_directory=listeners_dir,
listeners_include=listeners_include,
llm=raw.get("llm", {}),
server=raw.get("server", {}),
auth=raw.get("auth", {}),
meta=raw.get("meta", {}),
)
return raw, core
def load_listener_files(
directory: Path,
patterns: list[str],
) -> list[tuple[Path, dict[str, Any]]]:
"""
Load all listener YAML files matching patterns.
Returns list of (path, data) tuples.
"""
results = []
for pattern in patterns:
full_pattern = str(directory / pattern)
for filepath in glob_module.glob(full_pattern):
path = Path(filepath)
try:
with open(path) as f:
data = yaml.safe_load(f)
if isinstance(data, dict):
# Ensure name is set from filename if not in file
if "name" not in data:
data["name"] = path.stem
results.append((path, data))
except Exception as e:
raise SplitConfigError(
f"Failed to load listener file {path}: {e}"
)
return results
def resolve_listeners_directory(
config_dir: Optional[str],
organism_path: Optional[Path] = None,
) -> Path:
"""
Resolve the listeners directory path.
Handles:
- None -> default ~/.xml-pipeline/listeners
- Absolute path -> use as-is
- Relative path -> relative to organism.yaml location
- ~ expansion
"""
if config_dir is None:
return LISTENERS_DIR
# Expand user home
expanded = Path(config_dir).expanduser()
if expanded.is_absolute():
return expanded
# Relative to organism.yaml location
if organism_path is not None:
return organism_path.parent / expanded
return expanded
def load_split_config(organism_path: Path) -> SplitOrganismConfig:
"""
Load complete organism configuration from split files.
If organism.yaml has listeners as a dict with 'directory' key,
loads listener configs from that directory.
If listeners is a list (legacy format), uses those directly.
"""
raw, core = load_organism_yaml(organism_path)
listeners: list[dict[str, Any]] = []
listener_paths: list[Path] = []
listeners_raw = raw.get("listeners", {})
if isinstance(listeners_raw, list):
# Legacy format: inline listeners
listeners = listeners_raw
elif isinstance(listeners_raw, dict):
# Split format: load from directory
listeners_dir = resolve_listeners_directory(
core.listeners_directory,
organism_path,
)
if listeners_dir.exists():
loaded = load_listener_files(listeners_dir, core.listeners_include)
for path, data in loaded:
listeners.append(data)
listener_paths.append(path)
else:
raise SplitConfigError(
f"listeners must be a list or dict, got {type(listeners_raw)}"
)
return SplitOrganismConfig(
core=core,
listeners=listeners,
organism_path=organism_path,
listener_paths=listener_paths,
)
def get_organism_yaml_path() -> Optional[Path]:
"""
Get the default organism.yaml path.
Searches in order:
1. ~/.xml-pipeline/organism.yaml
2. ./organism.yaml
3. ./config/organism.yaml
"""
candidates = [
Path.home() / ".xml-pipeline" / "organism.yaml",
Path("organism.yaml"),
Path("config/organism.yaml"),
]
for path in candidates:
if path.exists():
return path
return None
def save_organism_yaml(config: OrganismCoreConfig, path: Path) -> None:
"""
Save organism core config to YAML file.
Preserves the split-file structure if listeners_directory is set.
"""
data: dict[str, Any] = {
"organism": {
"name": config.name,
"port": config.port,
"version": config.version,
}
}
if config.description:
data["organism"]["description"] = config.description
if config.thread_scheduling != "breadth-first":
data["organism"]["thread_scheduling"] = config.thread_scheduling
# Add listeners directory reference
if config.listeners_directory:
data["listeners"] = {
"directory": config.listeners_directory,
"include": config.listeners_include,
}
# Add other sections if non-empty
if config.llm:
data["llm"] = config.llm
if config.server:
data["server"] = config.server
if config.auth:
data["auth"] = config.auth
if config.meta:
data["meta"] = config.meta
with open(path, "w") as f:
yaml.dump(
data,
f,
default_flow_style=False,
sort_keys=False,
allow_unicode=True,
)
def load_organism_yaml_content(path: Path) -> str:
"""Load organism.yaml content as string for editing."""
with open(path) as f:
return f.read()
def save_organism_yaml_content(path: Path, content: str) -> None:
"""Save organism.yaml content from string."""
# Validate YAML before saving
yaml.safe_load(content)
with open(path, "w") as f:
f.write(content)