Documentation Index
Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt
Use this file to discover all available pages before exploring further.
Object tables store structured entities extracted from documents (e.g., invoices, contracts, claims).
Create an Object Table
Define your data structure using Pydantic models and create a table to store extracted objects:
from pydantic import BaseModel
from typing import Optional
# Define your Pydantic model
class Invoice(BaseModel):
"""An invoice extracted from a document"""
invoice_number: str
date: str
vendor: str
total_amount: float
currency: str
line_items: list[dict]
notes: Optional[str] = None
# Create an object table
object_table = warehouse.create_objects_table(
table_name="invoices",
object_class=Invoice,
object_name="Invoice", # Optional: custom name
table_version_dependencies={} # Optional: dependency on other tables
)
print(f"Created object table: {object_table.name}")
Write structured data to the object table:
# Write extracted objects
invoice_data = [
{
"id": "inv-001",
"json_object": {
"invoice_number": "INV-2024-001",
"date": "2024-01-15",
"vendor": "Acme Corp",
"total_amount": 1250.00,
"currency": "USD",
"line_items": [{"item": "Service", "amount": 1250.00}]
},
"document_id": "doc-id-123",
"min_page": 1,
"max_page": 2,
"content": "Invoice text...",
"version": "1.0"
},
{
"id": "inv-002",
"json_object": {
"invoice_number": "INV-2024-002",
"date": "2024-01-20",
"vendor": "Tech Solutions",
"total_amount": 3500.00,
"currency": "USD",
"line_items": [
{"item": "Consulting", "amount": 2000.00},
{"item": "Software License", "amount": 1500.00}
]
},
"document_id": "doc-id-124",
"min_page": 1,
"max_page": 3,
"content": "Invoice text...",
"version": "1.0"
}
]
object_table.write_data(invoice_data)
Read Object Data
Retrieve objects from the table:
# Get all objects
all_invoices = object_table.get_data()
for invoice in all_invoices:
print(f"Invoice: {invoice['json_object']['invoice_number']}")
print(f"Vendor: {invoice['json_object']['vendor']}")
print(f"Amount: {invoice['json_object']['total_amount']}")
print("---")
# Filter by document
doc_invoices = object_table.get_data(document_id="doc-id-123")