From 4aa40ed29bbbb8c390dd2c2d9d8e56fa6d7ec127 Mon Sep 17 00:00:00 2001 From: Donna Date: Mon, 26 Jan 2026 06:46:17 +0000 Subject: [PATCH] Add edge analysis API spec - AI-assisted field mapping - POST /api/v1/flows/{id}/analyze-edge endpoint spec - Confidence levels: high (green), medium (yellow), low (red) - Heuristic + LLM analysis paths - Database schema for edge_mappings - Sequencer integration notes - Future enhancements roadmap Co-authored-by: Dan --- docs/edge-analysis-spec.md | 324 +++++++++++++++++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 docs/edge-analysis-spec.md diff --git a/docs/edge-analysis-spec.md b/docs/edge-analysis-spec.md new file mode 100644 index 0000000..d5a9126 --- /dev/null +++ b/docs/edge-analysis-spec.md @@ -0,0 +1,324 @@ +# Edge Analysis API Specification + +**Status:** Draft +**Author:** Donna (with Dan) +**Date:** 2026-01-26 + +## Overview + +When users connect nodes in the visual flow editor, an AI analyzes the schema compatibility and proposes field mappings. This provides immediate visual feedback (green/yellow/red) and reduces manual configuration for common cases. + +## User Experience + +### Visual Feedback + +When a user draws a connection from Node A to Node B: + +``` +┌─────────────┐ ┌─────────────┐ +│ Node A │ 🟢 ────────────────▶ │ Node B │ +│ │ High confidence │ │ +└─────────────┘ └─────────────┘ + +┌─────────────┐ ┌─────────────┐ +│ Node C │ 🟡 ─ ─ ─ ─ ─ ─ ─ ─▶ │ Node D │ +│ │ Review suggested │ │ +└─────────────┘ └─────────────┘ + +┌─────────────┐ ┌─────────────┐ +│ Node E │ 🔴 ─ ─ ─ ✕ ─ ─ ─ ─▶ │ Node F │ +│ │ Manual mapping needed │ │ +└─────────────┘ └─────────────┘ +``` + +### Confidence Levels + +| Level | Color | Threshold | Meaning | +|-------|-------|-----------|---------| +| HIGH | 🟢 Green | ≥ 0.8 | All required inputs mapped, types compatible | +| MEDIUM | 🟡 Yellow | 0.4 - 0.79 | Some mappings uncertain or missing optional fields | +| LOW | 🔴 Red | < 0.4 | Cannot determine mapping, manual intervention required | + +### Interaction Flow + +1. User drags connection from A output → B input +2. Frontend calls `POST /api/v1/flows/{id}/analyze-edge` +3. Backend analyzes schemas (cached if previously computed) +4. Frontend renders line with confidence color +5. User clicks line → mapping panel opens +6. User can accept, modify, or manually define mappings +7. Changes saved to flow's `canvas_state` + +## API Endpoint + +### POST /api/v1/flows/{flow_id}/analyze-edge + +Analyze compatibility between two nodes and propose field mappings. + +#### Request + +```json +{ + "from_node": "calculator.add", + "to_node": "calculator.multiply", + "from_output_schema": "", + "to_input_schema": "" +} +``` + +**Notes:** +- If schemas not provided, fetched from node registry +- Schemas can be XSD strings or references to registered schemas + +#### Response + +```json +{ + "edge_id": "add_to_multiply", + "confidence": 0.85, + "level": "high", + + "proposed_mapping": { + "mappings": [ + { + "from_field": "output.sum", + "to_field": "input.value", + "confidence": 0.95, + "reason": "Exact name match, compatible types (int → int)" + }, + { + "from_field": "output.operands", + "to_field": "input.factors", + "confidence": 0.6, + "reason": "Semantic similarity, both arrays of numbers" + } + ], + "unmapped_required": [ + { + "field": "input.precision", + "type": "int", + "default": 2, + "suggestion": "Set constant value or map from upstream" + } + ], + "unmapped_optional": [ + { + "field": "input.label", + "type": "string" + } + ] + }, + + "warnings": [ + "input.precision has no source, using default value 2", + "output.metadata will be discarded (no matching input field)" + ], + + "errors": [], + + "analysis_method": "llm", // or "heuristic" for simple cases + "cached": false, + "analysis_time_ms": 245 +} +``` + +#### Error Response + +```json +{ + "error": "schema_not_found", + "message": "Node 'calculator.add' has no registered output schema", + "details": { + "node": "calculator.add" + } +} +``` + +### GET /api/v1/flows/{flow_id}/edges + +List all edges in a flow with their current mapping status. + +#### Response + +```json +{ + "edges": [ + { + "id": "edge_1", + "from_node": "input", + "to_node": "calculator.add", + "confidence": 0.92, + "level": "high", + "mapping_status": "auto", + "last_analyzed": "2026-01-26T06:30:00Z" + }, + { + "id": "edge_2", + "from_node": "calculator.add", + "to_node": "formatter", + "confidence": 0.45, + "level": "medium", + "mapping_status": "user_modified", + "last_analyzed": "2026-01-26T06:30:00Z" + } + ] +} +``` + +### PUT /api/v1/flows/{flow_id}/edges/{edge_id}/mapping + +Save user-defined or modified mapping for an edge. + +#### Request + +```json +{ + "mappings": [ + { + "from_field": "output.sum", + "to_field": "input.value" + }, + { + "to_field": "input.factor", + "constant": 5 + }, + { + "to_field": "input.label", + "expression": "concat('Result: ', output.sum)" + } + ], + "user_confirmed": true +} +``` + +## Analysis Engine + +### Heuristic Analysis (Fast Path) + +Used when schemas are simple and mapping is obvious: + +1. **Exact name match** — `output.value` → `input.value` (confidence: 0.95) +2. **Case-insensitive match** — `output.Value` → `input.value` (confidence: 0.9) +3. **Common aliases** — `output.result` → `input.value` (confidence: 0.7) +4. **Type compatibility** — int → float OK, string → int NOT OK + +### LLM Analysis (Deep Path) + +Used when heuristics produce low confidence: + +``` +System: You are analyzing data flow compatibility between two XML schemas. + +Given: +- Source schema (output of previous step): {from_schema} +- Target schema (input of next step): {to_schema} + +Propose a field mapping. For each target field, identify: +1. The best source field to map from (if any) +2. Confidence (0-1) in the mapping +3. Brief reason for the mapping + +If a required target field cannot be mapped, flag it. +If source fields will be discarded, note them. + +Respond in JSON format. +``` + +### Caching Strategy + +- Cache key: `hash(from_schema) + hash(to_schema)` +- TTL: 24 hours (schemas rarely change) +- Invalidate on: node schema update, user clear cache +- Store: Redis or in-memory LRU + +## Database Schema + +### Edge Mappings Table + +```sql +CREATE TABLE edge_mappings ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + flow_id UUID NOT NULL REFERENCES flows(id) ON DELETE CASCADE, + + -- Edge identification + from_node VARCHAR(100) NOT NULL, + to_node VARCHAR(100) NOT NULL, + + -- Analysis results + confidence NUMERIC(3,2), + level VARCHAR(10), -- 'high', 'medium', 'low' + analysis_method VARCHAR(20), -- 'heuristic', 'llm' + + -- The actual mapping (JSON) + proposed_mapping JSONB, + user_mapping JSONB, -- User overrides, if any + + -- Status + user_confirmed BOOLEAN DEFAULT FALSE, + + -- Timestamps + analyzed_at TIMESTAMP WITH TIME ZONE, + confirmed_at TIMESTAMP WITH TIME ZONE, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + + UNIQUE(flow_id, from_node, to_node) +); + +CREATE INDEX idx_edge_mappings_flow ON edge_mappings(flow_id); +``` + +## Sequencer Integration + +When a sequence is executed, the sequencer factory: + +1. Loads all edge mappings for the flow +2. For each edge, generates a transformer function: + +```python +def generate_transformer(edge_mapping: EdgeMapping) -> Callable: + """ + Generate a function that transforms A's output to B's input. + """ + def transform(source_xml: str) -> str: + source = parse_xml(source_xml) + target = {} + + for mapping in edge_mapping.effective_mapping: + if mapping.from_field: + target[mapping.to_field] = extract(source, mapping.from_field) + elif mapping.constant is not None: + target[mapping.to_field] = mapping.constant + elif mapping.expression: + target[mapping.to_field] = evaluate(mapping.expression, source) + + return serialize_xml(target, edge_mapping.to_schema) + + return transform +``` + +3. Transformer is called between each step in the sequence + +## Future Enhancements + +### v1.1 — Type Coercion +- Automatic int → string, date formatting, etc. +- Warnings when lossy conversion occurs + +### v1.2 — Expression Builder +- Visual expression editor for complex mappings +- Functions: `concat()`, `format()`, `split()`, `lookup()` + +### v1.3 — Learning from Corrections +- Track when users override AI suggestions +- Fine-tune confidence thresholds +- Eventually: personalized mapping suggestions + +### v2.0 — Multi-Output Nodes +- Some nodes produce multiple outputs +- UI shows multiple output ports +- User wires specific port to specific input + +--- + +*This spec is a living document. Update as implementation progresses.*