Skip to main content

Knowledge Bases

Knowledge bases are accessed through client.knowledge_bases for tenant-wide operations and client.agents.knowledge_bases for agent-scoped assignment and search.

Listing and retrieving

from agentflow import AsyncAgentFlow

async with AsyncAgentFlow.from_profile("local") as client:
    kbs = await client.knowledge_bases.list()
    for kb in kbs:
        print(f"{kb.name} - {kb.file_count} files, {kb.chunk_count} chunks")

    kb = await client.knowledge_bases.retrieve(kbs[0].id)

Creating and uploading

from pathlib import Path
from agentflow import AsyncAgentFlow

async with AsyncAgentFlow.from_profile("local") as client:
    kb = await client.knowledge_bases.create_from_files(
        name="Product Docs",
        files=[Path("product-guide.pdf"), Path("release-notes.md")],
        description="Uploaded product documentation",
        chunk_size=1200,
        overlap_percentage=0.15,
    )

    url_kb = await client.knowledge_bases.create_from_url(
        name="Public Docs",
        website_url="https://docs.example.com",
        description="Crawled public documentation",
        max_crawl_depth=2,
        max_crawl_pages=20,
    )

    upload = await client.knowledge_bases.upload_documents(
        kb.id,
        files=[Path("faq.md")],
    )
create_from_files(...) maps to POST /api/v1/knowledge-bases with multipart form data. create_from_url(...) maps to POST /api/v1/knowledge-bases/create-from-url with form data.

Searching

response = await client.knowledge_bases.search(
    kb.id,
    query="How do I authenticate API requests?",
    limit=10,
    search_type="hybrid",
    vector_weight=0.7,
    keyword_weight=0.3,
    enable_mmr=True,
    mmr_lambda=0.5,
    neighboring_chunks_count=1,
)

for document in response.documents:
    metadata = document.metadata or {}
    score = getattr(document, "score", None) or metadata.get("score")
    print(document.source_file, score)
    print(document.content[:200])

Search request fields

FieldTypeDescription
querystrRequired search query
limitintMaximum documents to return
search_typestrsemantic, keyword, or hybrid
vector_weight`floatNone`Hybrid vector contribution
keyword_weight`floatNone`Hybrid keyword contribution
enable_mmrboolEnable Maximal Marginal Relevance
mmr_lambdafloatMMR balance: 1.0 favors relevance, 0.0 favors diversity
neighboring_chunks_countintAdjacent chunks to include around each match

Search response fields

FieldTypeDescription
documentslist[KBChunk]Matching chunks/documents
total_resultsintTotal result count reported by the API
search_metadatadictQuery, search type, limit, thresholds, and KBs searched
Each document includes id, content, source_file, chunk_index, and metadata. Scores are available as document.score in SDK models that expose it, or in document.metadata["score"] for raw API-shaped documents.

Managing knowledge bases

# Update metadata
updated = await client.knowledge_bases.update(
    kb.id,
    description="Updated description",
    tags=["api", "reference"],
)

# Re-index all documents
await client.knowledge_bases.refresh(kb.id)

# Refresh one document
await client.knowledge_bases.refresh_document(kb.id, "doc_abc123")

# Get files
files = await client.knowledge_bases.list_files(kb.id)

# View the original text for one document
document = await client.knowledge_bases.view_document(
    kb.id,
    document_id=files[0].document_id,
)

# Get documents (chunks)
docs = await client.knowledge_bases.list_documents(kb.id)

# Inspect metadata fields
schema = await client.knowledge_bases.metadata_schema(kb.id, sample_size=100)

# Filter documents by metadata
filtered = await client.knowledge_bases.filter_documents(
    kb.id,
    metadata_filters={"category": "Support"},
    limit=25,
)

# Browse grouped or flattened content
content = await client.knowledge_bases.content(
    kb.id,
    page=1,
    page_size=20,
    source_file="faq.md",
    flatten=False,
)

# Get API search defaults
defaults = await client.knowledge_bases.search_defaults()

# Get summary / stats
summary = await client.knowledge_bases.summary(kb.id)

# Delete a specific document
await client.knowledge_bases.delete_document(kb.id, "doc_abc123")

# Delete the entire KB
await client.knowledge_bases.delete(kb.id)

Previewing chunking

Use preview before creating or uploading a KB when you want to inspect chunking settings.
preview = await client.knowledge_bases.preview(
    files=[Path("product-guide.pdf")],
    chunk_size=1200,
    overlap_percentage=0.15,
    max_chunks_per_file=5,
)

async for event in client.knowledge_bases.preview_stream(
    files=[Path("product-guide.pdf")],
    chunk_size=1200,
    overlap_percentage=0.15,
):
    print(event.get("type") or event.get("event"), event)

Agent-scoped knowledge bases

agent_id = {agent.name: agent.id for agent in await client.agents.list()}["SupportAgent"]

await client.agents.knowledge_bases.assign(agent_id, kb.id)
kbs = await client.agents.knowledge_bases.list(agent_id)

response = await client.agents.knowledge_bases.search(
    agent_id,
    kbs[0].id,
    query="How do I authenticate API requests?",
    limit=5,
)

await client.agents.knowledge_bases.unassign(agent_id, kb.id)
To make a KB available during an agent run, pass the typed run options:
from agentflow import RetrievalOptions, RunOptions

response = await client.agents.run(
    agent_id=agent_id,
    message="How do I authenticate API requests?",
    options=RunOptions(
        knowledge_bases=[kbs[0].id],
        retrieval_options=RetrievalOptions(search_type="hybrid", limit=5, enable_mmr=True),
    ),
)