Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt

Use this file to discover all available pages before exploring further.

Object tables store structured entities extracted from documents (e.g., invoices, contracts, claims).

Create an Object Table

Define your data structure using Pydantic models and create a table to store extracted objects:
from pydantic import BaseModel
from typing import Optional

# Define your Pydantic model
class Invoice(BaseModel):
    """An invoice extracted from a document"""
    invoice_number: str
    date: str
    vendor: str
    total_amount: float
    currency: str
    line_items: list[dict]
    notes: Optional[str] = None

# Create an object table
object_table = warehouse.create_objects_table(
    table_name="invoices",
    object_class=Invoice,
    object_name="Invoice",  # Optional: custom name
    table_version_dependencies={}  # Optional: dependency on other tables
)

print(f"Created object table: {object_table.name}")

Write Extracted Objects

Write structured data to the object table:
# Write extracted objects
invoice_data = [
    {
        "id": "inv-001",
        "json_object": {
            "invoice_number": "INV-2024-001",
            "date": "2024-01-15",
            "vendor": "Acme Corp",
            "total_amount": 1250.00,
            "currency": "USD",
            "line_items": [{"item": "Service", "amount": 1250.00}]
        },
        "document_id": "doc-id-123",
        "min_page": 1,
        "max_page": 2,
        "content": "Invoice text...",
        "version": "1.0"
    },
    {
        "id": "inv-002",
        "json_object": {
            "invoice_number": "INV-2024-002",
            "date": "2024-01-20",
            "vendor": "Tech Solutions",
            "total_amount": 3500.00,
            "currency": "USD",
            "line_items": [
                {"item": "Consulting", "amount": 2000.00},
                {"item": "Software License", "amount": 1500.00}
            ]
        },
        "document_id": "doc-id-124",
        "min_page": 1,
        "max_page": 3,
        "content": "Invoice text...",
        "version": "1.0"
    }
]

object_table.write_data(invoice_data)

Read Object Data

Retrieve objects from the table:
# Get all objects
all_invoices = object_table.get_data()

for invoice in all_invoices:
    print(f"Invoice: {invoice['json_object']['invoice_number']}")
    print(f"Vendor: {invoice['json_object']['vendor']}")
    print(f"Amount: {invoice['json_object']['total_amount']}")
    print("---")

# Filter by document
doc_invoices = object_table.get_data(document_id="doc-id-123")