Appearance
RAG Agent Walkthrough
Build a Retrieval-Augmented Generation agent for knowledge-based Q&A.
Overview
This walkthrough demonstrates building a RAG agent that:
- Ingests and indexes documents
- Retrieves relevant context for queries
- Generates accurate, grounded answers
- Cites sources in responses
Prerequisites
- GateFlow account
- Documents to index
- Admin API key
Step 1: Create the Agent
bash
curl -X POST https://api.gateflow.ai/v1/mcp/agents \
-H "Authorization: Bearer gw_prod_admin_key" \
-H "Content-Type: application/json" \
-d '{
"name": "Knowledge Assistant",
"description": "RAG-powered Q&A agent",
"permissions": {
"tools": [
"llm/chat",
"llm/embed",
"retrieval/search",
"retrieval/rerank",
"retrieval/search_and_rerank",
"document/process"
],
"models": [
"gpt-5.2",
"text-embedding-3-large",
"rerank-english-v3.0"
],
"collections": [
"knowledge-base"
]
},
"limits": {
"requests_per_minute": 60,
"cost_daily": 100.00
}
}'Step 2: Set Up the Client
python
from gateflow_mcp import MCPClient
import base64
client = MCPClient(
agent_id="agent_knowledge",
api_key="gf-agent-xyz789..."
)
# Verify setup
whoami = client.call_tool("self_inspect/whoami", {})
print(f"Agent: {whoami['name']}")
print(f"Collections: {whoami['permissions']['collections']}")Step 3: Ingest Documents
python
def ingest_documents(file_paths: list, collection: str = "knowledge-base"):
"""Ingest documents into the knowledge base."""
results = []
for path in file_paths:
with open(path, "rb") as f:
file_b64 = base64.b64encode(f.read()).decode()
result = client.call_tool(
name="document/process",
arguments={
"file": file_b64,
"filename": os.path.basename(path),
"collection": collection,
"chunk_size": 1000,
"chunk_overlap": 200,
"metadata": {
"source": path,
"ingested_at": datetime.now().isoformat()
}
}
)
results.append({
"file": path,
"document_id": result["document_id"],
"chunks": result["chunks"]
})
print(f"Ingested: {path} ({result['chunks']} chunks)")
return results
# Ingest your documents
docs = ingest_documents([
"docs/product_guide.pdf",
"docs/faq.pdf",
"docs/troubleshooting.md"
])Step 4: Build the RAG Pipeline
python
class RAGAgent:
def __init__(self, client: MCPClient, collection: str = "knowledge-base"):
self.client = client
self.collection = collection
def search(self, query: str, top_k: int = 10):
"""Search for relevant documents."""
result = self.client.call_tool(
name="retrieval/search_and_rerank",
arguments={
"query": query,
"collection": self.collection,
"limit": top_k,
"initial_limit": 30
}
)
return result["results"]
def build_context(self, results: list, max_tokens: int = 4000):
"""Build context from search results."""
context_parts = []
total_length = 0
for i, r in enumerate(results):
chunk = f"[Source {i+1}: {r.get('title', 'Unknown')}]\n{r['content']}\n"
if total_length + len(chunk) > max_tokens * 4: # Rough char estimate
break
context_parts.append(chunk)
total_length += len(chunk)
return "\n".join(context_parts)
def generate_answer(self, query: str, context: str, results: list):
"""Generate answer with citations."""
response = self.client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5.2",
"messages": [
{
"role": "system",
"content": """You are a helpful assistant that answers questions based on the provided context.
Rules:
1. Only use information from the provided context
2. If the context doesn't contain the answer, say so
3. Cite sources using [Source N] notation
4. Be concise but complete"""
},
{
"role": "user",
"content": f"""Context:
{context}
Question: {query}
Answer the question using only the context above. Include citations."""
}
],
"temperature": 0.3
}
)
return {
"answer": response["content"],
"sources": [
{
"index": i + 1,
"title": r.get("title", "Unknown"),
"score": r.get("rerank_score", r.get("score")),
"document_id": r.get("document_id")
}
for i, r in enumerate(results)
],
"usage": response.get("usage", {})
}
def ask(self, query: str):
"""Complete RAG pipeline."""
# 1. Search
results = self.search(query)
if not results:
return {
"answer": "I couldn't find any relevant information to answer your question.",
"sources": []
}
# 2. Build context
context = self.build_context(results)
# 3. Generate answer
return self.generate_answer(query, context, results)Step 5: Use the RAG Agent
python
# Initialize
rag = RAGAgent(client)
# Ask questions
response = rag.ask("How do I reset my password?")
print("Answer:")
print(response["answer"])
print("\nSources:")
for source in response["sources"][:3]:
print(f" [{source['index']}] {source['title']} (score: {source['score']:.3f})")Example Output:
Answer:
To reset your password, follow these steps [Source 1]:
1. Go to the login page and click "Forgot Password"
2. Enter your email address
3. Check your email for the reset link
4. Click the link and enter your new password
5. Password must be at least 12 characters with a mix of letters, numbers, and symbols [Source 2]
The reset link expires after 24 hours. If you don't receive the email, check your spam folder [Source 1].
Sources:
[1] Password Reset Guide (score: 0.952)
[2] Security FAQ (score: 0.891)
[3] Account Settings Help (score: 0.834)Step 6: Advanced Features
Conversation History
python
class ConversationalRAG(RAGAgent):
def __init__(self, client, collection):
super().__init__(client, collection)
self.history = []
def ask(self, query: str):
# Include history in search
search_query = query
if self.history:
# Use recent context to improve search
recent = " ".join([h["query"] for h in self.history[-3:]])
search_query = f"{query} {recent}"
results = self.search(search_query)
context = self.build_context(results)
# Include history in prompt
history_text = ""
for h in self.history[-5:]:
history_text += f"User: {h['query']}\nAssistant: {h['answer'][:200]}...\n\n"
response = self.client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5.2",
"messages": [
{"role": "system", "content": "Answer based on context. Cite sources."},
{"role": "user", "content": f"""Previous conversation:
{history_text}
Context:
{context}
Current question: {query}"""}
]
}
)
# Update history
self.history.append({
"query": query,
"answer": response["content"]
})
return {"answer": response["content"], "sources": results}Hybrid Search
python
def hybrid_search(self, query: str, keywords: list = None):
"""Combine semantic and keyword search."""
results = self.client.call_tool(
name="retrieval/search",
arguments={
"query": query,
"collection": self.collection,
"search_type": "hybrid",
"semantic_weight": 0.7,
"keyword_weight": 0.3,
"filters": {
"$or": [{"content": {"$contains": kw}} for kw in keywords]
} if keywords else None
}
)
return results["results"]Query Expansion
python
def expand_query(self, query: str):
"""Expand query with related terms."""
expansion = self.client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5-mini",
"messages": [{
"role": "user",
"content": f"""Generate 3 alternative phrasings for this search query:
Query: {query}
Return only the alternatives, one per line."""
}],
"temperature": 0.7
}
)
alternatives = expansion["content"].strip().split("\n")
return [query] + alternatives[:3]Step 7: Monitoring and Evaluation
python
def evaluate_response(query: str, response: dict, ground_truth: str = None):
"""Evaluate RAG response quality."""
metrics = {
"has_answer": len(response["answer"]) > 50,
"has_citations": "[Source" in response["answer"],
"source_count": len(response["sources"]),
"top_source_score": response["sources"][0]["score"] if response["sources"] else 0
}
if ground_truth:
# Use LLM to evaluate accuracy
eval_result = client.call_tool(
name="llm/chat",
arguments={
"model": "gpt-5-mini",
"messages": [{
"role": "user",
"content": f"""Rate this answer's accuracy from 1-5:
Question: {query}
Expected: {ground_truth}
Actual: {response['answer']}
Return only the number."""
}]
}
)
metrics["accuracy_score"] = int(eval_result["content"].strip())
return metricsComplete Example
python
from gateflow_mcp import MCPClient
def main():
# Initialize
client = MCPClient(
agent_id="agent_knowledge",
api_key="gf-agent-xyz789..."
)
rag = RAGAgent(client, collection="knowledge-base")
# Interactive Q&A loop
print("Knowledge Assistant Ready! (type 'quit' to exit)")
while True:
query = input("\nYou: ").strip()
if query.lower() == "quit":
break
response = rag.ask(query)
print(f"\nAssistant: {response['answer']}")
if response["sources"]:
print("\nSources:")
for s in response["sources"][:3]:
print(f" • {s['title']}")
if __name__ == "__main__":
main()Best Practices
- Chunk wisely - 500-1500 chars with 10-20% overlap
- Use reranking - Significantly improves relevance
- Set temperature low - 0.2-0.4 for factual answers
- Cite sources - Build trust with citations
- Handle no-results - Graceful "I don't know" responses
- Monitor quality - Track relevance metrics
Next Steps
- Voice Agent - Voice-based assistant
- Retrieval Tools - Search configuration
- Semantic Search - Search guide