Legal Discovery Walkthrough

Build an agent for legal document review and discovery.

Overview

This walkthrough demonstrates building a legal discovery agent that:

Processes legal documents (OCR)
Searches across document collections
Extracts relevant information
Maintains privilege and compliance logs

Prerequisites

GateFlow account
Admin API key
Document collection

Step 1: Create the Agent

bash

curl -X POST https://api.gateflow.ai/v1/mcp/agents \
  -H "Authorization: Bearer gw_prod_admin_key" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Legal Discovery Agent",
    "description": "Document review and discovery for litigation",
    "permissions": {
      "tools": [
        "document/ocr",
        "document/process",
        "document/list",
        "retrieval/search",
        "retrieval/rerank",
        "llm/chat"
      ],
      "models": [
        "gpt-5.2",
        "text-embedding-3-large",
        "rerank-english-v3.0"
      ],
      "data_classification": [
        "confidential",
        "attorney_client_privilege"
      ]
    },
    "limits": {
      "ocr_pages_daily": 10000,
      "cost_daily": 1000.00
    },
    "compliance": {
      "audit_level": "full",
      "litigation_hold": true,
      "matter_id": "CASE-2026-001"
    }
  }'

Step 2: Set Up the Client

python

from gateflow_mcp import MCPClient
import base64
import os

client = MCPClient(
    agent_id="agent_legal_discovery",
    api_key="gf-agent-xyz789..."
)

# Verify setup
whoami = client.call_tool("self_inspect/whoami", {})
print(f"Agent: {whoami['name']}")
print(f"Matter ID: {whoami['compliance']['matter_id']}")

Step 3: Ingest Documents

python

def ingest_document(file_path: str, metadata: dict):
    """Ingest a document for discovery."""

    with open(file_path, "rb") as f:
        file_b64 = base64.b64encode(f.read()).decode()

    result = client.call_tool(
        name="document/process",
        arguments={
            "file": file_b64,
            "filename": os.path.basename(file_path),
            "collection": "discovery-2026-001",
            "classification": "attorney_client_privilege",
            "chunk_size": 1500,
            "chunk_overlap": 300,
            "detect_pii": True,
            "metadata": {
                "matter_id": "CASE-2026-001",
                "custodian": metadata.get("custodian"),
                "date_range": metadata.get("date_range"),
                "document_type": metadata.get("type")
            }
        }
    )

    return result

# Ingest a batch of documents
documents = [
    {"path": "contracts/agreement_001.pdf", "custodian": "John Smith", "type": "contract"},
    {"path": "emails/export_001.pdf", "custodian": "Jane Doe", "type": "email"},
    {"path": "memos/internal_001.pdf", "custodian": "Legal Team", "type": "memo"}
]

for doc in documents:
    result = ingest_document(doc["path"], doc)
    print(f"Ingested: {doc['path']} -> {result['document_id']}")
    print(f"  Pages: {result['pages']}, Chunks: {result['chunks']}")

Step 4: Search for Relevant Documents

python

def search_discovery(query: str, filters: dict = None):
    """Search across discovery documents."""

    search_args = {
        "query": query,
        "collection": "discovery-2026-001",
        "limit": 50,
        "include_content": True
    }

    if filters:
        search_args["filters"] = filters

    # Initial search
    search_result = client.call_tool(
        name="retrieval/search",
        arguments=search_args
    )

    # Rerank for better relevance
    if search_result["results"]:
        reranked = client.call_tool(
            name="retrieval/rerank",
            arguments={
                "query": query,
                "documents": [r["content"] for r in search_result["results"]],
                "top_n": 20
            }
        )

        # Combine results
        for i, r in enumerate(reranked["results"]):
            original = search_result["results"][r["index"]]
            original["relevance_score"] = r["relevance_score"]

        # Sort by rerank score
        search_result["results"].sort(
            key=lambda x: x.get("relevance_score", 0),
            reverse=True
        )

    return search_result

# Search examples
results = search_discovery(
    "breach of contract damages",
    filters={
        "metadata.document_type": {"$in": ["contract", "email"]},
        "metadata.date_range": {"$gte": "2025-01-01"}
    }
)

print(f"Found {len(results['results'])} relevant documents:")
for r in results["results"][:10]:
    print(f"  - {r['title']} (score: {r.get('relevance_score', r['score']):.3f})")

Step 5: Analyze Documents

python

def analyze_document_relevance(document_id: str, search_terms: list):
    """Analyze a document's relevance to search terms."""

    # Get document content
    doc = client.call_tool(
        name="document/status",
        arguments={"document_id": document_id}
    )

    # Analyze with LLM
    analysis = client.call_tool(
        name="llm/chat",
        arguments={
            "model": "gpt-5.2",
            "messages": [
                {
                    "role": "system",
                    "content": """You are a legal document analyst.
Analyze the document for relevance to the search terms.
Identify key passages and rate overall relevance.
Flag any privilege concerns."""
                },
                {
                    "role": "user",
                    "content": f"""Analyze this document for relevance to: {search_terms}

Document: {doc['text'][:5000]}

Provide:
1. Relevance rating (1-5)
2. Key relevant passages (with page numbers if available)
3. Privilege concerns
4. Recommended action (include/exclude/review)"""
                }
            ],
            "temperature": 0.2
        }
    )

    return {
        "document_id": document_id,
        "analysis": analysis["content"],
        "audit_id": analysis.get("metadata", {}).get("audit_id")
    }

Step 6: Privilege Review

python

def check_privilege(document_id: str):
    """Check document for attorney-client privilege."""

    doc = client.call_tool(
        name="document/status",
        arguments={"document_id": document_id}
    )

    result = client.call_tool(
        name="llm/chat",
        arguments={
            "model": "gpt-5.2",
            "messages": [
                {
                    "role": "system",
                    "content": """You are a legal privilege reviewer.
Identify potential attorney-client privilege or work product doctrine issues.
Flag any communications that may be privileged."""
                },
                {
                    "role": "user",
                    "content": f"""Review this document for privilege:

{doc['text'][:5000]}

Identify:
1. Is this potentially privileged? (Yes/No/Maybe)
2. Privilege type (attorney-client, work product, joint defense, other)
3. Specific privileged sections
4. Recommended handling"""
                }
            ],
            "temperature": 0.1
        }
    )

    return {
        "document_id": document_id,
        "privilege_review": result["content"]
    }

# Review documents for privilege
privileged_docs = []
for doc_id in document_ids:
    review = check_privilege(doc_id)
    if "Yes" in review["privilege_review"][:100]:
        privileged_docs.append(doc_id)
        print(f"PRIVILEGED: {doc_id}")

Step 7: Generate Production Report

python

def generate_production_report(search_query: str, documents: list):
    """Generate a discovery production report."""

    report_data = {
        "matter_id": "CASE-2026-001",
        "search_query": search_query,
        "date_generated": "2026-02-16",
        "documents_reviewed": len(documents),
        "documents_produced": 0,
        "documents_withheld": 0,
        "privilege_log": []
    }

    for doc in documents:
        if doc.get("privileged"):
            report_data["documents_withheld"] += 1
            report_data["privilege_log"].append({
                "document_id": doc["id"],
                "privilege_type": doc["privilege_type"],
                "description": doc["privilege_description"]
            })
        else:
            report_data["documents_produced"] += 1

    # Generate summary
    summary = client.call_tool(
        name="llm/chat",
        arguments={
            "model": "gpt-5.2",
            "messages": [{
                "role": "system",
                "content": "Generate a professional legal discovery production summary."
            }, {
                "role": "user",
                "content": f"Generate summary for: {json.dumps(report_data)}"
            }],
            "temperature": 0.3
        }
    )

    report_data["summary"] = summary["content"]
    return report_data

Step 8: Export Results

python

def export_discovery_results(search_id: str, format: str = "pdf"):
    """Export discovery results for production."""

    # Get all relevant documents
    search_result = client.call_tool(
        name="retrieval/search",
        arguments={
            "query": f"search_id:{search_id}",
            "collection": "discovery-2026-001",
            "limit": 1000
        }
    )

    # Generate export
    export_data = {
        "search_id": search_id,
        "matter_id": "CASE-2026-001",
        "export_date": "2026-02-16",
        "document_count": len(search_result["results"]),
        "documents": []
    }

    for doc in search_result["results"]:
        export_data["documents"].append({
            "bates_number": f"PROD-{doc['id'][:8].upper()}",
            "original_filename": doc.get("metadata", {}).get("filename"),
            "custodian": doc.get("metadata", {}).get("custodian"),
            "date": doc.get("metadata", {}).get("date"),
            "relevance_score": doc.get("relevance_score")
        })

    return export_data

Complete Workflow

python

def run_legal_discovery(matter_id: str, search_terms: list):
    """Complete legal discovery workflow."""

    client = MCPClient(
        agent_id="agent_legal_discovery",
        api_key="gf-agent-xyz789..."
    )

    # 1. Search for relevant documents
    print("Searching documents...")
    results = search_discovery(" ".join(search_terms))
    print(f"Found {len(results['results'])} potentially relevant documents")

    # 2. Analyze relevance
    print("Analyzing relevance...")
    analyzed = []
    for doc in results["results"][:50]:
        analysis = analyze_document_relevance(doc["document_id"], search_terms)
        analyzed.append({**doc, **analysis})

    # 3. Check privilege
    print("Checking privilege...")
    for doc in analyzed:
        privilege = check_privilege(doc["document_id"])
        doc["privilege_review"] = privilege

    # 4. Generate report
    print("Generating report...")
    report = generate_production_report(" ".join(search_terms), analyzed)

    return report

# Run discovery
report = run_legal_discovery(
    matter_id="CASE-2026-001",
    search_terms=["breach", "contract", "damages", "liability"]
)

print(json.dumps(report, indent=2))

Compliance Features

Litigation Hold - Documents preserved automatically
Audit Trail - Complete chain of custody
Privilege Logging - Automatic privilege flagging
Bates Numbering - Production numbering
Export Controls - Controlled document production

Next Steps

Healthcare Scribe - Medical transcription
RAG Agent - Knowledge retrieval
Legal Dictation Template - Legal transcription

Legal Discovery Walkthrough ​

Overview ​

Prerequisites ​

Step 1: Create the Agent ​

Step 2: Set Up the Client ​

Step 3: Ingest Documents ​

Step 4: Search for Relevant Documents ​

Step 5: Analyze Documents ​

Step 6: Privilege Review ​

Step 7: Generate Production Report ​

Step 8: Export Results ​

Complete Workflow ​

Compliance Features ​

Next Steps ​

Legal Discovery Walkthrough

Overview

Prerequisites

Step 1: Create the Agent

Step 2: Set Up the Client

Step 3: Ingest Documents

Step 4: Search for Relevant Documents

Step 5: Analyze Documents

Step 6: Privilege Review

Step 7: Generate Production Report

Step 8: Export Results

Complete Workflow

Compliance Features

Next Steps