so I am running the Agentic chunking script with o3-mini and the chunks are really terrible. they’re breaking the words in half even. I thought Agentic chunking would find natural breakpoints.
#!/usr/bin/env python3
"""
A script to demonstrate agentic chunking of a real article from the 'articles' table using Agno's Agentic Chunking.
This script connects to the database specified in the environment variable DATABASE_URL,
fetches a real article with non-empty content from the 'articles' table, and then uses AgenticChunking from the agno.document.chunking.agentic module
to split the article into semantically coherent chunks, rather than just fixed size pieces.
Refer to the Agno documentation at [Agentic Chunking](https://docs.agno.com/chunking/agentic-chunking) for more details.
"""
import os
import sys
from dotenv import load_dotenv
from sqlalchemy import create_engine, Table, MetaData, select
from agno.document.chunking.agentic import AgenticChunking
from agno.models.groq import Groq
class SimpleDocument:
def __init__(self, content, meta_data=None, doc_id=None, name=None):
self.content = content
self.meta_data = meta_data if meta_data is not None else {}
self.id = doc_id
self.name = name
def main():
# Load environment variables
load_dotenv()
# Get database URL from environment variables
db_url = os.getenv("DATABASE_URL")
if not db_url:
print("Error: DATABASE_URL not set in environment variables.")
sys.exit(1)
# Create the SQLAlchemy engine and reflect the articles table.
engine = create_engine(db_url)
metadata = MetaData()
try:
articles_table = Table("articles", metadata, autoload_with=engine)
except Exception as e:
print(f"Error loading articles table: {e}")
sys.exit(1)
# Fetch one article with non-empty content
with engine.connect() as conn:
stmt = select(articles_table).where(articles_table.c.content != None).limit(1)
result = conn.execute(stmt).fetchone()
if not result:
print("No article found with non-empty content in the 'articles' table.")
sys.exit(1)
# Handle result mapping (depending on SQLAlchemy version)
try:
article_data = result._mapping
except AttributeError:
article_data = result
article_text = article_data.get("content")
if not article_text:
print("The selected article has no content.")
sys.exit(1)
print("Original Article:\n")
print(article_text)
print("\n" + "="*80 + "\n")
# Wrap the article_text in a simple document with a 'content' attribute and an 'id'
document = SimpleDocument(article_text, doc_id=article_data.get("id"))
# Create an instance of AgenticChunking with default parameters
chunker = AgenticChunking(model=Groq(id="deepseek-r1-distill-llama-70b"), max_chunk_size=1000)
# Apply the chunker to the document
chunks = chunker.chunk(document)
print("Chunked Article:\n")
for idx, chunk in enumerate(chunks, start=1):
print(f"Chunk {idx}:")
print(chunk)
print("-"*40)
if __name__ == "__main__":
main()