Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt

Use this file to discover all available pages before exploring further.

Tag extractors classify chunks of text into categories defined by a tagging tree.

List Tag Extractors

# List all tag extractors
extractors = warehouse.list_tag_extractors()
for extractor in extractors:
    print(f"Tag Extractor: {extractor['name']} (ID: {extractor['id']})")

Create Tag Extractor

# Define tagging tree structure
tagging_tree = {
    "name": "Document Type",
    "children": [
        {
            "name": "Financial",
            "children": [
                {"name": "Invoice"},
                {"name": "Receipt"},
                {"name": "Statement"}
            ]
        },
        {
            "name": "Legal",
            "children": [
                {"name": "Contract"},
                {"name": "Agreement"},
                {"name": "NDA"}
            ]
        }
    ]
}

# Create tag extractor
extractor = warehouse.create_tag_extractor(
    name="Document Classifier",
    brief="Classify document chunks by type",
    tagging_tree=tagging_tree,
    extraction_prompt="Classify the following text...",
    llm_model="gpt-4o-mini",
    compute_alerts=True,
    enforce_single_tag=False  # Allow multiple tags per chunk
)

print(f"Created tag extractor: {extractor['id']}")

Update Tag Extractor

# Update an existing tag extractor
updated = warehouse.update_tag_extractor(
    tag_extractor_id=extractor['id'],
    name="Updated Document Classifier",
    extraction_prompt="Updated classification prompt...",
    enforce_single_tag=True
)

Run Tag Extractor

# Run tagging on all documents
task = warehouse.run_tag_extractor(
    tag_extractor_id=extractor['id']
)
result = task.wait_for_completion()

# Run on specific documents or chunks
task = warehouse.run_tag_extractor(
    tag_extractor_id=extractor['id'],
    document_ids=["doc-id-1"],
    chunk_ids=["chunk-id-1", "chunk-id-2"]
)

Run Tag Extraction Task (Alternative)

# Run with more options
task = warehouse.run_tag_extraction_task(
    tag_extractor_id=extractor['id'],
    mode="recreate-all",
    compute_alerts=True,
    llm_model="gpt-4o-mini",
    document_ids=[],
    chunk_ids=[]
)

result = task.wait_for_completion()

Delete Tag Extractor

# Delete a tag extractor
warehouse.delete_tag_extractor(tag_extractor_id=extractor['id'])

Document Tags Extraction

Run document-level tag extraction:
# Run document tags extraction
task = warehouse.run_document_tags_extraction(
    table_id="doc-tags-table-id",
    mode="recreate-all",
    llm_model="gpt-4o-mini",
    document_ids=["doc-id-1", "doc-id-2"],
    should_push_to_graph=True
)

result = task.wait_for_completion()