Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt

Use this file to discover all available pages before exploring further.

Tag tables store categorical labels for chunks of text, allowing you to classify and organize content at a granular level.

Create a Tag Table

Define a hierarchical tagging structure and create a table:
# Define tagging tree structure
tagging_tree = {
    "name": "Document Type",
    "children": [
        {
            "name": "Financial",
            "children": [
                {"name": "Invoice"},
                {"name": "Receipt"},
                {"name": "Statement"}
            ]
        },
        {
            "name": "Legal",
            "children": [
                {"name": "Contract"},
                {"name": "Agreement"},
                {"name": "NDA"}
            ]
        }
    ]
}

# Create tag table
tag_table = warehouse.create_tag_table(
    table_name="document_types",
    tagging_tree=tagging_tree,
    table_version_dependencies={}
)

print(f"Created tag table: {tag_table.name}")

Write Tag Data

Apply tags to chunks:
# Write tag data
tag_data = [
    {
        "chunk_id": "chunk-id-123",
        "id": "Financial.Invoice",
        "metadata": {"confidence": 0.95}
    },
    {
        "chunk_id": "chunk-id-124",
        "id": "Legal.Contract",
        "metadata": {"confidence": 0.87}
    },
    {
        "chunk_id": "chunk-id-125",
        "id": "Financial.Receipt",
        "metadata": {"confidence": 0.92}
    }
]

tag_table.write_data(tag_data)

Read Tag Data

Retrieve tags from the table:
# Get all tags
all_tags = tag_table.get_data()

for tag in all_tags:
    print(f"Chunk: {tag['chunk_id']}")
    print(f"Tag: {tag['id']}")
    print(f"Confidence: {tag['metadata']['confidence']}")
    print("---")

# Filter by specific chunk
chunk_tags = tag_table.get_data(chunk_id="chunk-id-123")