Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt

Use this file to discover all available pages before exploring further.

Tables store structured data extracted from documents or created manually.

List Tables

# List all tables in the warehouse
tables = warehouse.list_tables()
for table in tables:
    print(f"Table: {table.name} (ID: {table.table_id})")
    print(f"  Type: {table.object_type}")

Get a Specific Table

# Get table by name or ID
table = warehouse.get_table("chunks")  # or table ID

# Access table properties
print(f"Table name: {table.name}")
print(f"Table ID: {table.table_id}")
print(f"Object type: {table.object_type}")

Create a Custom Table

# Define table schema
columns = [
    {"name": "id", "type": "uuid"},
    {"name": "name", "type": "text"},
    {"name": "value", "type": "float"},
    {"name": "metadata", "type": "json"}
]

# Create table
table = warehouse.create_table(
    table_name="my_custom_table",
    columns=columns,
    object_type="custom",
    object_metadata={}
)

Write Data to a Table

# Prepare data
data = [
    {"id": "123", "name": "Item 1", "value": 100.5, "metadata": {"key": "value1"}},
    {"id": "456", "name": "Item 2", "value": 200.3, "metadata": {"key": "value2"}},
]

# Write data to table
response = table.write_data(
    data=data,
    override=False,  # Set to True to replace all existing data
    batch_size=1000  # Batch size for large datasets
)

Read Data from a Table

# Get all data from a table (all columns)
all_rows = table.get_data(
    page_size=10000,           # Results per page
    max_results=None           # Limit total results (None = all)
)
print(f"Retrieved {len(all_rows)} rows")

# Fetch only specific columns (NEW: columns parameter)
partial_rows = table.get_data(
    columns=["id", "name", "json_object"],  # Only these columns are returned
    page_size=500
)

# Combine with filters
filtered_rows = table.get_data(
    columns=["id", "json_object", "document_id"],
    filters={"document_id__in": "uuid1,uuid2,uuid3"}
)

# Other filtering examples
rows_by_document = table.get_data(filters={"document_id": "doc-id-123"})
rows_by_chunk = table.get_data(filters={"chunk_id": "chunk-id-456"})
rows_advanced = table.get_data(filters={
    "name": "Item 1",  # Filter by column value
})

Update Table Data

# Update rows matching filters
result = table.perform_data_operation(
    operation="update",
    data=[{"value": 150.0}],  # New values
    filters={"name": "Item 1"},  # Which rows to update
    table_version_id=None  # Optional: specific version
)

# Delete rows matching filters
result = table.perform_data_operation(
    operation="delete",
    data=None,
    filters={"value__lt": 50},  # Delete rows where value < 50
    table_version_id=None
)

Read with Selected Columns (Low-level)

# Low-level read with column selection using perform_data_operation
result = table.perform_data_operation(
    operation="read",
    columns=["id", "name", "created_at"],  # NEW: select columns
    limit=100,
    offset=0
)
rows = result["result"]["results"]
Note: The columns parameter is only applicable when operation="read". It is ignored for create, update, and delete.

Table Versioning

# Create a new version of the table
version = table.create_version()
print(f"Created version: {version['id']}")

# List all versions
versions = table.list_versions()
for v in versions:
    print(f"Version: {v['id']} - Deployed: {v.get('deployed', False)}")

# Set a version as deployed
table.set_deployed_version(version_id="version-id")

Import/Export Large Datasets

The Python client supports exporting table data to temporary datasets and importing data from temporary datasets (or by uploading data directly). Operations run asynchronously; you can optionally wait for completion.

Export Table Data to a Dataset

# Export the deployed version and wait for completion
export_result = table.export_to_dataset()
print(export_result["result"]["rows_exported"])
print(export_result["output_dataset"]["file"])  # URL to download JSON

# Export a specific version without waiting
operation = table.export_to_dataset(
    table_version_id="version-uuid",
    wait_for_completion=False
)
print(operation["id"])  # Use to poll later

Import Data from a Dataset or Directly

import_result = table.import_from_dataset(
    table_version_id="version-uuid",
    data=[
        {"id": 1, "name": "Alice", "email": "alice@example.com"},
        {"id": 2, "name": "Bob", "email": "bob@example.com"},
    ],
    override=True  # Clear existing data first
)
print(import_result["result"]["rows_imported"])

# Import from a JSON file on disk
from pathlib import Path
file_import = table.import_from_dataset(data=Path("./data/users.json"))

Delete a Table

# Delete a table
warehouse.delete_table("table-name-or-id")

# Or using table object
table.delete()