I built a Rag agent and a knowledgebase agent. The knowledge base agent i set up to pull infor from a local pdf. It’s finding the pdf ok but its not pulling the info? Everything else is working ok.
I added some code to check this. here is my code and then below is terminal response:
import os
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.knowledge.pdf import PDFKnowledgeBase
from phi.vectordb.pgvector import PgVector, SearchType
db_url = “postgresql+psycopg://ai:ai@localhost:5532/ai”
knowledge_base = PDFKnowledgeBase(
path=“/Users/louisamayhanrahan/Desktop/base/ac2.pdf”,
vector_db=PgVector(
table_name=“pdf_documents”,
db_url=db_url,
),
)
Load the knowledge base: Comment after first run
try:
knowledge_base.load(upsert=True)
print(“Knowledge base loaded successfully.”)
# Access and print the loaded data CORRECTLY:
if hasattr(knowledge_base, 'entries'): # Check if 'entries' attribute exists
loaded_data = knowledge_base.entries
if loaded_data:
print(f"Number of entries loaded: {len(loaded_data)}")
for i, entry in enumerate(loaded_data):
print(f"\n--- Entry {i+1} ---")
print(f"Name: {entry.name}")
preview_length = 500
content_preview = entry.content[:preview_length] + "..." if len(entry.content) > preview_length else entry.content
print(f"Content Preview:\n{content_preview}")
else:
print("No entries were loaded from the PDF.")
elif hasattr(knowledge_base, '_documents'): # Check if '_documents' attribute exists (less common)
loaded_data = knowledge_base._documents
if loaded_data:
print(f"Number of documents loaded: {len(loaded_data)}")
for i, doc in enumerate(loaded_data):
print(f"\n--- Document {i+1} ---")
# Handle cases where name or page_content might not exist
doc_name = getattr(doc, 'name', 'No Name') # Use getattr for safe access
print(f"Name: {doc_name}")
content = getattr(doc, 'page_content', '') # Use getattr for safe access
preview_length = 500
content_preview = content[:preview_length] + "..." if len(content) > preview_length else content
print(f"Content Preview:\n{content_preview}")
else:
print("No documents were loaded from the PDF.")
else:
print("Neither 'entries' nor '_documents' attribute found. Please check the phi documentation for your version.")
# ... (rest of your code)
except Exception as e:
print(f"An error occurred: {e}")
exit()
except Exception as e:
print(f"An error occurred during knowledge base loading: {e}")
exit() # Exit the script if loading fails
if knowledge_base.documents: # Only create the agent if the KB loaded
agent = Agent(
model=OpenAIChat(id=“gpt-4o”, api_key=api_key), # Add API key here too
knowledge=knowledge_base,
add_context=True,
search_knowledge=False,
markdown=True,
)
agent.print_response("How many years experience does Louisamay have in making rice bowls?")
else:
print(“Knowledge base not loaded, cannot create agent or answer questions.”)
agent = Agent(
model=OpenAIChat(id=“gpt-4o”),
knowledge=knowledge_base,
# Enable RAG by adding references from AgentKnowledge to the user prompt.
add_context=True,
# Set as False because Agents default to search_knowledge=True
search_knowledge=False,
markdown=True,
# debug_mode=True,
)
agent.print_response(“How many years experience does name have in making rice bowls?”)
Terminal:
python main.py
INFO Creating collection
INFO Loading knowledge base
INFO Reading: ac2
INFO Upserted batch of 1 documents.
INFO Added 1 documents to knowledge base
Knowledge base loaded successfully.
Neither ‘entries’ nor ‘_documents’ attribute found. Please check the phi documentation for your version.
Traceback (most recent call last):
File “/Users/louisamayhanrahan/code/agent/knowledgebase/main.py”, line 61, in
if knowledge_base.documents: # Only create the agent if the KB loaded
File “/Users/louisamayhanrahan/code/agent/knowledgebase/.venv/lib/python3.9/site-packages/pydantic/main.py”, line 891, in getattr
raise AttributeError(f’{type(self).name!r} object has no attribute {item!r}')
AttributeError: ‘PDFKnowledgeBase’ object has no attribute ‘documents’