Documentation Index
Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt
Use this file to discover all available pages before exploring further.
Tag extractors classify chunks of text into categories defined by a tagging tree.
# List all tag extractors
extractors = warehouse.list_tag_extractors()
for extractor in extractors:
print(f"Tag Extractor: {extractor['name']} (ID: {extractor['id']})")
# Define tagging tree structure
tagging_tree = {
"name": "Document Type",
"children": [
{
"name": "Financial",
"children": [
{"name": "Invoice"},
{"name": "Receipt"},
{"name": "Statement"}
]
},
{
"name": "Legal",
"children": [
{"name": "Contract"},
{"name": "Agreement"},
{"name": "NDA"}
]
}
]
}
# Create tag extractor
extractor = warehouse.create_tag_extractor(
name="Document Classifier",
brief="Classify document chunks by type",
tagging_tree=tagging_tree,
extraction_prompt="Classify the following text...",
llm_model="gpt-4o-mini",
compute_alerts=True,
enforce_single_tag=False # Allow multiple tags per chunk
)
print(f"Created tag extractor: {extractor['id']}")
# Update an existing tag extractor
updated = warehouse.update_tag_extractor(
tag_extractor_id=extractor['id'],
name="Updated Document Classifier",
extraction_prompt="Updated classification prompt...",
enforce_single_tag=True
)
# Run tagging on all documents
task = warehouse.run_tag_extractor(
tag_extractor_id=extractor['id']
)
result = task.wait_for_completion()
# Run on specific documents or chunks
task = warehouse.run_tag_extractor(
tag_extractor_id=extractor['id'],
document_ids=["doc-id-1"],
chunk_ids=["chunk-id-1", "chunk-id-2"]
)
# Run with more options
task = warehouse.run_tag_extraction_task(
tag_extractor_id=extractor['id'],
mode="recreate-all",
compute_alerts=True,
llm_model="gpt-4o-mini",
document_ids=[],
chunk_ids=[]
)
result = task.wait_for_completion()
# Delete a tag extractor
warehouse.delete_tag_extractor(tag_extractor_id=extractor['id'])
Run document-level tag extraction:
# Run document tags extraction
task = warehouse.run_document_tags_extraction(
table_id="doc-tags-table-id",
mode="recreate-all",
llm_model="gpt-4o-mini",
document_ids=["doc-id-1", "doc-id-2"],
should_push_to_graph=True
)
result = task.wait_for_completion()