Source code for orka.agents.validation_and_structuring_agent

"""
Validation and Structuring Agent
==============================

This module provides the ValidationAndStructuringAgent class, which is responsible for
validating answers and structuring them into a memory format. The agent ensures answers
are correct and contextually coherent, then extracts key information into a structured
memory object.

Classes
-------
ValidationAndStructuringAgent
    Agent that validates answers and structures them into memory objects.
"""

import json
from typing import Any, Dict, Optional

from jinja2 import Template

from .base_agent import BaseAgent
from .llm_agents import OpenAIAnswerBuilder


[docs] class ValidationAndStructuringAgent(BaseAgent): """ Agent that validates answers and structures them into memory objects. This agent performs two main functions: 1. Validates if an answer is correct and contextually coherent 2. Structures valid answers into a memory object format The agent uses an LLM (Language Model) to perform validation and structuring. It returns a dictionary containing: - valid: Boolean indicating if the answer is valid - reason: Explanation of the validation decision - memory_object: Structured memory object if valid, None otherwise Parameters ---------- params : Dict[str, Any], optional Configuration parameters for the agent, including: - prompt: The base prompt for the LLM - queue: Optional queue for async operations - agent_id: Unique identifier for the agent - store_structure: Optional template for memory object structure Attributes ---------- llm_agent : OpenAIAnswerBuilder The LLM agent used for validation and structuring """
[docs] def __init__(self, params: Dict[str, Any] = None): """Initialize the agent with an OpenAIAnswerBuilder for LLM calls.""" super().__init__(params) # Initialize LLM agent with required parameters prompt = params.get("prompt", "") if params else "" queue = params.get("queue") if params else None agent_id = params.get("agent_id", "validation_agent") if params else "validation_agent" self.llm_agent = OpenAIAnswerBuilder( agent_id=f"{agent_id}_llm", prompt=prompt, queue=queue, )
[docs] def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]: """ Process the input data to validate and structure the answer. Args: input_data: Dictionary containing: - question: The original question - full_context: The context used to generate the answer - latest_answer: The answer to validate and structure - store_structure: Optional structure template for memory objects Returns: Dictionary containing: - valid: Boolean indicating if the answer is valid - reason: Explanation of validation decision - memory_object: Structured memory object if valid, None otherwise """ question = input_data.get("input", "") # Extract clean response text from complex agent outputs context_output = input_data.get("previous_outputs", {}).get("context-collector", {}) if isinstance(context_output, dict) and "result" in context_output: context = context_output["result"].get("response", "NONE") else: context = str(context_output) if context_output else "NONE" answer_output = input_data.get("previous_outputs", {}).get("answer-builder", {}) if isinstance(answer_output, dict) and "result" in answer_output: answer = answer_output["result"].get("response", "NONE") else: answer = str(answer_output) if answer_output else "NONE" store_structure = self.params.get("store_structure") # Check if we have a custom prompt that needs template rendering if ( hasattr(self.llm_agent, "prompt") and self.llm_agent.prompt and self.llm_agent.prompt.strip() ): # Use custom prompt with template rendering try: template = Template(self.llm_agent.prompt) prompt = template.render(**input_data) except Exception: # Fallback to original prompt if rendering fails prompt = self.llm_agent.prompt else: # Use default prompt building logic prompt = self.build_prompt(question, context, answer, store_structure) # Create LLM input with prompt but disable automatic JSON parsing # We'll handle JSON parsing manually since we expect a different schema llm_input = {"prompt": prompt, "parse_json": False} # Get response from LLM response = self.llm_agent.run(llm_input) # Extract the raw LLM output if isinstance(response, dict): raw_llm_output = response.get("response", "") else: raw_llm_output = str(response) try: # Manual JSON extraction from markdown code blocks import re # Look for JSON in markdown code blocks first json_match = re.search(r"```json\s*(.*?)\s*```", raw_llm_output, re.DOTALL) if json_match: json_text = json_match.group(1) else: # Look for JSON object directly - use a more robust pattern # Find the first { and match to the corresponding } start_idx = raw_llm_output.find("{") if start_idx != -1: # Count braces to find the matching closing brace brace_count = 0 end_idx = start_idx for i, char in enumerate(raw_llm_output[start_idx:], start_idx): if char == "{": brace_count += 1 elif char == "}": brace_count -= 1 if brace_count == 0: end_idx = i + 1 break if brace_count == 0: # Found matching closing brace json_text = raw_llm_output[start_idx:end_idx] else: raise ValueError("Unmatched braces in JSON") else: raise ValueError("No JSON structure found in response") # Parse the extracted JSON # Clean up the JSON text to handle potential formatting issues json_text = json_text.strip() # Try to fix common JSON issues # Replace single quotes with double quotes (if any) json_text = re.sub(r"'([^']*)':", r'"\1":', json_text) result = json.loads(json_text) # Check if we have the expected validation format if isinstance(result, dict) and "valid" in result: # Perfect - we have the expected format result["prompt"] = prompt result["formatted_prompt"] = prompt result["raw_llm_output"] = raw_llm_output return result elif isinstance(result, dict) and "response" in result: # LLM returned wrong format - convert it to validation format return { "valid": False, "reason": f"LLM returned wrong JSON format. Response: {result.get('response', 'Unknown')}", "memory_object": None, "prompt": prompt, "formatted_prompt": prompt, "raw_llm_output": raw_llm_output, } else: # Unknown JSON structure raise ValueError("Invalid JSON structure - unrecognized format") except Exception as e: # Fallback error handling return { "valid": False, "reason": f"Failed to parse model output: {e}. Raw output: {raw_llm_output}", "memory_object": None, "prompt": prompt, "formatted_prompt": prompt, "raw_llm_output": raw_llm_output, }
[docs] def build_prompt( self, question: str, context: str, answer: str, store_structure: Optional[str] = None, ) -> str: """ Build the prompt for the validation and structuring task. Args: question: The original question context: The context used to generate the answer answer: The answer to validate and structure store_structure: Optional structure template for memory objects Returns: The complete prompt for the LLM """ # If we have a custom prompt from the configuration, use it instead of the default logic if ( hasattr(self.llm_agent, "prompt") and self.llm_agent.prompt and self.llm_agent.prompt.strip() ): # Use the custom prompt from the YAML configuration # The custom prompt should handle template variables itself return self.llm_agent.prompt # Fallback to default prompt building logic if no custom prompt is provided # Handle cases where context or answer is "NONE" or empty if context in ["NONE", "", None]: context = "No context available" if answer in ["NONE", "", None]: answer = "No answer provided" # Special handling for "NONE" responses - treat them as valid but low confidence if answer == "No answer provided" and context == "No context available": prompt = f"""Validate the following situation and structure it into a memory format. Question: {question} Context: {context} Answer to validate: {answer} This appears to be a case where no information was found for the question. Please validate this as a legitimate "no information available" response and structure it appropriately. IMPORTANT: You MUST respond with the exact JSON format specified below. Do not use any other format. For cases where no information is available, you should: 1. Mark as valid=true (since "no information available" is a valid response) 2. Set confidence to 0.1 (low but not zero) 3. Create a memory object that captures the fact that no information was found {self._get_structure_instructions(store_structure)} Return your response in the following JSON format: {{ "valid": true/false, "reason": "explanation of validation decision", "memory_object": {{ // structured memory object if valid, null if invalid }} }}""" else: prompt = f"""Validate the following answer and structure it into a memory format. Question: {question} Context: {context} Answer to validate: {answer} Please validate if the answer is correct and contextually coherent. Then structure the information into a memory object. IMPORTANT: You MUST respond with the exact JSON format specified below. Do not use any other format. {self._get_structure_instructions(store_structure)} Return your response in the following JSON format: {{ "valid": true/false, "reason": "explanation of validation decision", "memory_object": {{ // structured memory object if valid, null if invalid }} }}""" return prompt
def _get_structure_instructions(self, store_structure: Optional[str] = None) -> str: """ Get the structure instructions for the memory object. Args: store_structure: Optional structure template for memory objects Returns: Instructions for structuring the memory object """ if store_structure: return f"""Structure the memory object according to this template: {store_structure} Ensure all required fields are present and properly formatted.""" else: return """Structure the memory object with these fields: - fact: The validated fact or information - category: The category or type of information (e.g., 'fact', 'opinion', 'data') - confidence: A number between 0 and 1 indicating confidence in the fact - source: The source of the information (e.g., 'context', 'answer', 'inferred')"""