Lesson 4: Build a Custom Agent System

In this capstone workshop, you will pull together everything you have learned about **Tool Calling**, **ReAct Planning Loops**, and **System Instructions** to build a fully autonomous **Coding Agent (mini-SWE-agent)**. Your agent will write files, run unit tests, read tracebacks, and debug its own errors until all tests pass successfully!

The Concept: Self-Debugging Agents

Building a coding agent is one of the most exciting frontiers in AI Engineering. The magic is in the **Self-Correction Loop**. If you ask an LLM to write code in one shot, it might make a syntax error or a logical typo. However, if the model caninteract with a terminal, run its own code, see the traceback error message, and feed that traceback back into its context, it can autonomously correct its mistakes just like a human software developer.

Safety Warning: Sandboxing is Critical!

Autonomous agents with filesystem and terminal tools must be restricted. Always write safety checks to ensure your tools only read/write files inside a designated sandbox folder, and never allow the agent to execute unbounded commands like rm -rf /.

Architectural Design

Your custom Coding Agent requires three core architectural pillars:

The Filesystem Toolset: Simple, secure Python functions to create, edit, and read files. All paths must be strictly resolved within a local directory.
The Test Runner: A tool that triggers pytest or a python runner using the subprocessstandard library, capturing stdout and stderr.
The ReAct Orchestration Loop: A loop that processes the agent's thoughts and actions, catches errors during execution, and feeds back the terminal output as environment observations.

Coding Agent Blueprint (Python Skeleton)

Below is a highly structured, complete blueprint for the coding agent. Review this skeleton carefully before starting the project tasks:

import os
import re
import subprocess
import google.generativeai as genai

# Define the isolated sandbox workspace
WORKSPACE_DIR = os.path.abspath("./agent_workspace")
os.makedirs(WORKSPACE_DIR, exist_ok=True)

# --- 1. Define Secure Filesystem and Terminal Tools ---

def safe_path(relative_path: str) -> str:
    """Ensure paths are resolved inside the designated workspace to prevent traversal attacks."""
    abs_path = os.path.abspath(os.path.join(WORKSPACE_DIR, relative_path))
    if not abs_path.startswith(WORKSPACE_DIR):
        raise PermissionError(f"Access Denied: Path {relative_path} is outside the workspace!")
    return abs_path

def write_file(filepath: str, content: str) -> str:
    path = safe_path(filepath)
    with open(path, "w", encoding="utf-8") as f:
        f.write(content)
    return f"Successfully wrote file to: {filepath}"

def read_file(filepath: str) -> str:
    path = safe_path(filepath)
    if not os.path.exists(path):
        return f"Error: File {filepath} does not exist."
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

def run_tests(filepath: str) -> str:
    """Run standard Python tests on the target file and capture errors."""
    path = safe_path(filepath)
    print(f"⚙️ Running tests on {filepath}...")
    try:
        # Execute Python subprocess
        result = subprocess.run(
            ["python", path],
            capture_output=True,
            text=True,
            timeout=10
        )
        if result.returncode == 0:
            return "SUCCESS: All assertions passed successfully!"
        else:
            return f"FAILURE: Traceback / Errors:\n\n{result.stderr or result.stdout}"
    except Exception as e:
        return f"Error executing tests: {str(e)}"

# --- 2. System Instructions & ReAct Loop ---

SYSTEM_PROMPT = """You are an elite autonomous Software Engineer Agent. 
Your goal is to write clean, correct code and verify its accuracy by running tests.

Available tools:
- WRITE: write_file[filepath, content_string]
- READ: read_file[filepath]
- TEST: run_tests[filepath]

Structure your answers EXACTLY as follows:
Thought: Reason about your current files and progress.
Action: tool_name[arguments]
Observation: (This will be provided by the system)

If the tests execute successfully and you are satisfied that your code is perfectly correct, output:
Thought: I have verified the code works perfectly.
Final Answer: DONE
"""

def execute_agent_loop(task: str, max_rounds=8):
    model = genai.GenerativeModel("gemini-1.5-pro")
    context = SYSTEM_PROMPT + f"\n\nUser Task: {task}\n"
    
    for round_num in range(max_rounds):
        print(f"\n--- ROUND {round_num + 1} ---")
        response = model.generate_content(context)
        model_output = response.text
        print(model_output)
        
        context += model_output
        
        if "Final Answer: DONE" in model_output:
            print("\n🎉 SUCCESS: Coding Agent finished the task!")
            break
            
        # Parse Actions using structured brackets (e.g. write_file[fizzbuzz.py, code_content])
        action_match = re.search(r"Action:\s*(\w+)\[([^\],]+),\s*(.+)\]", model_output, re.DOTALL)
        test_match = re.search(r"Action:\s*run_tests\[([^\]]+)\]", model_output)
        read_match = re.search(r"Action:\s*read_file\[([^\]]+)\]", model_output)
        
        obs = ""
        if action_match and action_match.group(1) == "write_file":
            filepath = action_match.group(2).strip()
            content = action_match.group(3).strip()
            # Handle stripping quotes or backticks from model blocks
            obs = write_file(filepath, content)
        elif test_match:
            filepath = test_match.group(1).strip()
            obs = run_tests(filepath)
        elif read_match:
            filepath = read_match.group(1).strip()
            obs = read_file(filepath)
        else:
            # Fallback for loose parses
            obs = "Error: Invalid action format. Make sure to use tool_name[args]."
            
        print(f"Observation: {obs}")
        context += f"\nObservation: {obs}\n"

Project Tasks

Navigate to the code workspace on the right. Your assignment is to complete the coding agent framework and run it against a buggy program:

[ ]Task 1: Complete the path validation utility inside safe_path to catch directory traversal attacks (throw a ValueError if any path contains double-dots "..").
[ ]Task 2: Build a robust run_tests execution utility that wraps the target script in a temporary test driver file, executing assertions inside a Python subprocess.
[ ]Task 3: Refine the system prompt context parser to support multiple file operations, allowing the model to write code and test suites in separate steps.
[ ]Task 4: Execute the agent with the prompt: *"Write a python program fibonacci.py containing a function fib(n) that returns the nth fibonacci number. Write a separate test driver script test_fib.py with 3 assertion statements checking boundary conditions. Debug your code until the test script runs with zero errors!"* Check the workspace directory for the resulting outputs!

Congratulations! After completing this project, you will have built a functional autonomous software engineering agent from scratch!

import os import re import subprocess import google.generativeai as genai # Define the isolated sandbox workspace WORKSPACE_DIR = os.path.abspath("./agent_workspace") os.makedirs(WORKSPACE_DIR, exist_ok=True) # --- 1. Define Secure Filesystem and Terminal Tools --- def safe_path(relative_path: str) -> str: """Ensure paths are resolved inside the designated workspace to prevent traversal attacks.""" abs_path = os.path.abspath(os.path.join(WORKSPACE_DIR, relative_path)) if not abs_path.startswith(WORKSPACE_DIR): raise PermissionError(f"Access Denied: Path {relative_path} is outside the workspace!") return abs_path def write_file(filepath: str, content: str) -> str: path = safe_path(filepath) with open(path, "w", encoding="utf-8") as f: f.write(content) return f"Successfully wrote file to: {filepath}" def read_file(filepath: str) -> str: path = safe_path(filepath) if not os.path.exists(path): return f"Error: File {filepath} does not exist." with open(path, "r", encoding="utf-8") as f: return f.read() def run_tests(filepath: str) -> str: """Run standard Python tests on the target file and capture errors.""" path = safe_path(filepath) print(f"⚙️ Running tests on {filepath}...") try: # Execute Python subprocess result = subprocess.run( ["python", path], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: return "SUCCESS: All assertions passed successfully!" else: return f"FAILURE: Traceback / Errors:\n\n{result.stderr or result.stdout}" except Exception as e: return f"Error executing tests: {str(e)}" # --- 2. System Instructions & ReAct Loop --- SYSTEM_PROMPT = """You are an elite autonomous Software Engineer Agent. Your goal is to write clean, correct code and verify its accuracy by running tests. Available tools: - WRITE: write_file[filepath, content_string] - READ: read_file[filepath] - TEST: run_tests[filepath] Structure your answers EXACTLY as follows: Thought: Reason about your current files and progress. Action: tool_name[arguments] Observation: (This will be provided by the system) If the tests execute successfully and you are satisfied that your code is perfectly correct, output: Thought: I have verified the code works perfectly. Final Answer: DONE """ def execute_agent_loop(task: str, max_rounds=8): model = genai.GenerativeModel("gemini-1.5-pro") context = SYSTEM_PROMPT + f"\n\nUser Task: {task}\n" for round_num in range(max_rounds): print(f"\n--- ROUND {round_num + 1} ---") response = model.generate_content(context) model_output = response.text print(model_output) context += model_output if "Final Answer: DONE" in model_output: print("\n🎉 SUCCESS: Coding Agent finished the task!") break # Parse Actions using structured brackets (e.g. write_file[fizzbuzz.py, code_content]) action_match = re.search(r"Action:\s*(\w+)\[([^\],]+),\s*(.+)\]", model_output, re.DOTALL) test_match = re.search(r"Action:\s*run_tests\[([^\]]+)\]", model_output) read_match = re.search(r"Action:\s*read_file\[([^\]]+)\]", model_output) obs = "" if action_match and action_match.group(1) == "write_file": filepath = action_match.group(2).strip() content = action_match.group(3).strip() # Handle stripping quotes or backticks from model blocks obs = write_file(filepath, content) elif test_match: filepath = test_match.group(1).strip() obs = run_tests(filepath) elif read_match: filepath = read_match.group(1).strip() obs = read_file(filepath) else: # Fallback for loose parses obs = "Error: Invalid action format. Make sure to use tool_name[args]." print(f"Observation: {obs}") context += f"\nObservation: {obs}\n"