Appearance
Legal Discovery Walkthrough
Build an agent for legal document review and discovery.
Overview
This walkthrough demonstrates building a legal discovery agent that:
- Processes legal documents (OCR)
- Searches across document collections
- Extracts relevant information
- Maintains privilege and compliance logs
Prerequisites
- GateFlow account
- Admin API key
- Document collection
Step 1: Create the Agent
bash
curl -X POST https://api.gateflow.ai/v1/mcp/agents \
-H "Authorization: Bearer gw_prod_admin_key" \
-H "Content-Type: application/json" \
-d '{
"name": "Legal Discovery Agent",
"description": "Document review and discovery for litigation",
"permissions": {
"tools": [
"document/ocr",
"document/process",
"document/list",
"retrieval/search",
"retrieval/rerank",
"llm/chat"
],
"models": [
"gpt-5.2",
"text-embedding-3-large",
"rerank-english-v3.0"
],
"data_classification": [
"confidential",
"attorney_client_privilege"
]
},
"limits": {
"ocr_pages_daily": 10000,
"cost_daily": 1000.00
},
"compliance": {
"audit_level": "full",
"litigation_hold": true,
"matter_id": "CASE-2026-001"
}
}'Step 2: Set Up the Client
python
from gateflow_mcp import MCPClient
import base64
import os
client = MCPClient(
agent_id="agent_legal_discovery",
api_key="gf-agent-xyz789..."
)
# Verify setup
whoami = client.call_tool("self_inspect/whoami", {})
print(f"Agent: {whoami['name']}")
print(f"Matter ID: {whoami['compliance']['matter_id']}")Step 3: Ingest Documents
python
def ingest_document(file_path: str, metadata: dict):
"""Ingest a document for discovery."""
with open(file_path, "rb") as f:
file_b64 = base64.b64encode(f.read()).decode()
result = client.call_tool(
name="document/process",
arguments={
"file": file_b64,
"filename": os.path.basename(file_path),
"collection": "discovery-2026-001",
"classification": "attorney_client_privilege",
"chunk_size": 1500,
"chunk_overlap": 300,
"detect_pii": True,
"metadata": {
"matter_id": "CASE-2026-001",
"custodian": metadata.get("custodian"),
"date_range": metadata.get("date_range"),
"document_type": metadata.get("type")
}
}
)
return result
# Ingest a batch of documents
documents = [
{"path": "contracts/agreement_001.pdf", "custodian": "John Smith", "type": "contract"},
{"path": "emails/export_001.pdf", "custodian": "Jane Doe", "type": "email"},
{"path": "memos/internal_001.pdf", "custodian": "Legal Team", "type": "memo"}
]
for doc in documents:
result = ingest_document(doc["path"], doc)
print(f"Ingested: {doc['path']} -> {result['document_id']}")
print(f" Pages: {result['pages']}, Chunks: {result['chunks']}")Step 4: Search for Relevant Documents
python
def search_discovery(query: str, filters: dict = None):
"""Search across discovery documents."""
search_args = {
"query": query,
"collection": "discovery-2026-001",
"limit": 50,
"include_content": True
}
if filters:
search_args["filters"] = filters
# Initial search
search_result = client.call_tool(
name="retrieval/search",
arguments=search_args
)
# Rerank for better relevance
if search_result["results"]:
reranked = client.call_tool(
name="retrieval/rerank",
arguments={
"query": query,
"documents": [r["content"] for r in search_result["results"]],
"top_n": 20
}
)
# Combine results
for i, r in enumerate(reranked["results"]):
original = search_result["results"][r["index"]]
original["relevance_score"] = r["relevance_score"]
# Sort by rerank score
search_result["results"].sort(
key=lambda x: x.get("relevance_score", 0),
reverse=True
)
return search_result
# Search examples
results = search_discovery(
"breach of contract damages",
filters={
"metadata.document_type": {"$in": ["contract", "email"]},
"metadata.date_range": {"$gte": "2025-01-01"}
}
)
print(f"Found {len(results['results'])} relevant documents:")
for r in results["results"][:10]:
print(f" - {r['title']} (score: {r.get('relevance_score', r['score']):.3f})")Step 5: Analyze Documents
python
def analyze_document_relevance(document_id: str, search_terms: list):
"""Analyze a document's relevance to search terms."""
# Get document content
doc = client.call_tool(
name="document/status",
arguments={"document_id": document_id}
)
# Analyze with LLM
analysis = client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5.2",
"messages": [
{
"role": "system",
"content": """You are a legal document analyst.
Analyze the document for relevance to the search terms.
Identify key passages and rate overall relevance.
Flag any privilege concerns."""
},
{
"role": "user",
"content": f"""Analyze this document for relevance to: {search_terms}
Document: {doc['text'][:5000]}
Provide:
1. Relevance rating (1-5)
2. Key relevant passages (with page numbers if available)
3. Privilege concerns
4. Recommended action (include/exclude/review)"""
}
],
"temperature": 0.2
}
)
return {
"document_id": document_id,
"analysis": analysis["content"],
"audit_id": analysis.get("metadata", {}).get("audit_id")
}Step 6: Privilege Review
python
def check_privilege(document_id: str):
"""Check document for attorney-client privilege."""
doc = client.call_tool(
name="document/status",
arguments={"document_id": document_id}
)
result = client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5.2",
"messages": [
{
"role": "system",
"content": """You are a legal privilege reviewer.
Identify potential attorney-client privilege or work product doctrine issues.
Flag any communications that may be privileged."""
},
{
"role": "user",
"content": f"""Review this document for privilege:
{doc['text'][:5000]}
Identify:
1. Is this potentially privileged? (Yes/No/Maybe)
2. Privilege type (attorney-client, work product, joint defense, other)
3. Specific privileged sections
4. Recommended handling"""
}
],
"temperature": 0.1
}
)
return {
"document_id": document_id,
"privilege_review": result["content"]
}
# Review documents for privilege
privileged_docs = []
for doc_id in document_ids:
review = check_privilege(doc_id)
if "Yes" in review["privilege_review"][:100]:
privileged_docs.append(doc_id)
print(f"PRIVILEGED: {doc_id}")Step 7: Generate Production Report
python
def generate_production_report(search_query: str, documents: list):
"""Generate a discovery production report."""
report_data = {
"matter_id": "CASE-2026-001",
"search_query": search_query,
"date_generated": "2026-02-16",
"documents_reviewed": len(documents),
"documents_produced": 0,
"documents_withheld": 0,
"privilege_log": []
}
for doc in documents:
if doc.get("privileged"):
report_data["documents_withheld"] += 1
report_data["privilege_log"].append({
"document_id": doc["id"],
"privilege_type": doc["privilege_type"],
"description": doc["privilege_description"]
})
else:
report_data["documents_produced"] += 1
# Generate summary
summary = client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5.2",
"messages": [{
"role": "system",
"content": "Generate a professional legal discovery production summary."
}, {
"role": "user",
"content": f"Generate summary for: {json.dumps(report_data)}"
}],
"temperature": 0.3
}
)
report_data["summary"] = summary["content"]
return report_dataStep 8: Export Results
python
def export_discovery_results(search_id: str, format: str = "pdf"):
"""Export discovery results for production."""
# Get all relevant documents
search_result = client.call_tool(
name="retrieval/search",
arguments={
"query": f"search_id:{search_id}",
"collection": "discovery-2026-001",
"limit": 1000
}
)
# Generate export
export_data = {
"search_id": search_id,
"matter_id": "CASE-2026-001",
"export_date": "2026-02-16",
"document_count": len(search_result["results"]),
"documents": []
}
for doc in search_result["results"]:
export_data["documents"].append({
"bates_number": f"PROD-{doc['id'][:8].upper()}",
"original_filename": doc.get("metadata", {}).get("filename"),
"custodian": doc.get("metadata", {}).get("custodian"),
"date": doc.get("metadata", {}).get("date"),
"relevance_score": doc.get("relevance_score")
})
return export_dataComplete Workflow
python
def run_legal_discovery(matter_id: str, search_terms: list):
"""Complete legal discovery workflow."""
client = MCPClient(
agent_id="agent_legal_discovery",
api_key="gf-agent-xyz789..."
)
# 1. Search for relevant documents
print("Searching documents...")
results = search_discovery(" ".join(search_terms))
print(f"Found {len(results['results'])} potentially relevant documents")
# 2. Analyze relevance
print("Analyzing relevance...")
analyzed = []
for doc in results["results"][:50]:
analysis = analyze_document_relevance(doc["document_id"], search_terms)
analyzed.append({**doc, **analysis})
# 3. Check privilege
print("Checking privilege...")
for doc in analyzed:
privilege = check_privilege(doc["document_id"])
doc["privilege_review"] = privilege
# 4. Generate report
print("Generating report...")
report = generate_production_report(" ".join(search_terms), analyzed)
return report
# Run discovery
report = run_legal_discovery(
matter_id="CASE-2026-001",
search_terms=["breach", "contract", "damages", "liability"]
)
print(json.dumps(report, indent=2))Compliance Features
- Litigation Hold - Documents preserved automatically
- Audit Trail - Complete chain of custody
- Privilege Logging - Automatic privilege flagging
- Bates Numbering - Production numbering
- Export Controls - Controlled document production
Next Steps
- Healthcare Scribe - Medical transcription
- RAG Agent - Knowledge retrieval
- Legal Dictation Template - Legal transcription