Documentation Index
Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt
Use this file to discover all available pages before exploring further.
Tables store structured data extracted from documents or created manually.
List Tables
# List all tables in the warehouse
tables = warehouse.list_tables()
for table in tables:
print(f"Table: {table.name} (ID: {table.table_id})")
print(f" Type: {table.object_type}")
Get a Specific Table
# Get table by name or ID
table = warehouse.get_table("chunks") # or table ID
# Access table properties
print(f"Table name: {table.name}")
print(f"Table ID: {table.table_id}")
print(f"Object type: {table.object_type}")
Create a Custom Table
# Define table schema
columns = [
{"name": "id", "type": "uuid"},
{"name": "name", "type": "text"},
{"name": "value", "type": "float"},
{"name": "metadata", "type": "json"}
]
# Create table
table = warehouse.create_table(
table_name="my_custom_table",
columns=columns,
object_type="custom",
object_metadata={}
)
Write Data to a Table
# Prepare data
data = [
{"id": "123", "name": "Item 1", "value": 100.5, "metadata": {"key": "value1"}},
{"id": "456", "name": "Item 2", "value": 200.3, "metadata": {"key": "value2"}},
]
# Write data to table
response = table.write_data(
data=data,
override=False, # Set to True to replace all existing data
batch_size=1000 # Batch size for large datasets
)
Read Data from a Table
# Get all data from a table (all columns)
all_rows = table.get_data(
page_size=10000, # Results per page
max_results=None # Limit total results (None = all)
)
print(f"Retrieved {len(all_rows)} rows")
# Fetch only specific columns (NEW: columns parameter)
partial_rows = table.get_data(
columns=["id", "name", "json_object"], # Only these columns are returned
page_size=500
)
# Combine with filters
filtered_rows = table.get_data(
columns=["id", "json_object", "document_id"],
filters={"document_id__in": "uuid1,uuid2,uuid3"}
)
# Other filtering examples
rows_by_document = table.get_data(filters={"document_id": "doc-id-123"})
rows_by_chunk = table.get_data(filters={"chunk_id": "chunk-id-456"})
rows_advanced = table.get_data(filters={
"name": "Item 1", # Filter by column value
})
Update Table Data
# Update rows matching filters
result = table.perform_data_operation(
operation="update",
data=[{"value": 150.0}], # New values
filters={"name": "Item 1"}, # Which rows to update
table_version_id=None # Optional: specific version
)
# Delete rows matching filters
result = table.perform_data_operation(
operation="delete",
data=None,
filters={"value__lt": 50}, # Delete rows where value < 50
table_version_id=None
)
Read with Selected Columns (Low-level)
# Low-level read with column selection using perform_data_operation
result = table.perform_data_operation(
operation="read",
columns=["id", "name", "created_at"], # NEW: select columns
limit=100,
offset=0
)
rows = result["result"]["results"]
Note: The columns parameter is only applicable when operation="read". It is ignored for create, update, and delete.
Table Versioning
# Create a new version of the table
version = table.create_version()
print(f"Created version: {version['id']}")
# List all versions
versions = table.list_versions()
for v in versions:
print(f"Version: {v['id']} - Deployed: {v.get('deployed', False)}")
# Set a version as deployed
table.set_deployed_version(version_id="version-id")
Import/Export Large Datasets
The Python client supports exporting table data to temporary datasets and importing data from temporary datasets (or by uploading data directly). Operations run asynchronously; you can optionally wait for completion.
Export Table Data to a Dataset
# Export the deployed version and wait for completion
export_result = table.export_to_dataset()
print(export_result["result"]["rows_exported"])
print(export_result["output_dataset"]["file"]) # URL to download JSON
# Export a specific version without waiting
operation = table.export_to_dataset(
table_version_id="version-uuid",
wait_for_completion=False
)
print(operation["id"]) # Use to poll later
Import Data from a Dataset or Directly
import_result = table.import_from_dataset(
table_version_id="version-uuid",
data=[
{"id": 1, "name": "Alice", "email": "alice@example.com"},
{"id": 2, "name": "Bob", "email": "bob@example.com"},
],
override=True # Clear existing data first
)
print(import_result["result"]["rows_imported"])
# Import from a JSON file on disk
from pathlib import Path
file_import = table.import_from_dataset(data=Path("./data/users.json"))
Delete a Table
# Delete a table
warehouse.delete_table("table-name-or-id")
# Or using table object
table.delete()