Documentation Index
Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt
Use this file to discover all available pages before exploring further.
Chunk extractors control how documents are split into searchable chunks.
# List all chunk extractors
extractors = warehouse.list_chunk_extractors()
for extractor in extractors:
print(f"Chunk Extractor: {extractor['name']} (ID: {extractor['id']})")
# Create a chunk extractor with custom settings
extractor = warehouse.create_chunk_extractor(
name="Custom Chunker",
document_ids=["doc-id-1", "doc-id-2"],
maximum_chunk_size=15000,
minimum_chunk_size=200,
page_as_separator=False,
title_section_separator_mode="both", # "title", "section", "both", or "none"
excluded_block_types=["header", "footer"]
)
print(f"Created chunk extractor: {extractor['id']}")
# Update chunk extractor settings
updated = warehouse.update_chunk_extractor(
chunk_extractor_id=extractor['id'],
maximum_chunk_size=20000,
minimum_chunk_size=500,
page_as_separator=True,
excluded_block_types=["header", "footer", "page_number"]
)
# Run chunking on all documents
task = warehouse.run_chunk_extractor(
chunk_extractor_id=extractor['id']
)
result = task.wait_for_completion()
# Run on specific documents
task = warehouse.run_chunk_extractor(
chunk_extractor_id=extractor['id'],
document_ids=["doc-id-1", "doc-id-2"]
)
# Delete a chunk extractor
warehouse.delete_chunk_extractor(chunk_extractor_id=extractor['id'])