Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.clarifeye.ai/llms.txt

Use this file to discover all available pages before exploring further.

Chunk extractors control how documents are split into searchable chunks.

List Chunk Extractors

# List all chunk extractors
extractors = warehouse.list_chunk_extractors()
for extractor in extractors:
    print(f"Chunk Extractor: {extractor['name']} (ID: {extractor['id']})")

Create Chunk Extractor

# Create a chunk extractor with custom settings
extractor = warehouse.create_chunk_extractor(
    name="Custom Chunker",
    document_ids=["doc-id-1", "doc-id-2"],
    maximum_chunk_size=15000,
    minimum_chunk_size=200,
    page_as_separator=False,
    title_section_separator_mode="both",  # "title", "section", "both", or "none"
    excluded_block_types=["header", "footer"]
)

print(f"Created chunk extractor: {extractor['id']}")

Update Chunk Extractor

# Update chunk extractor settings
updated = warehouse.update_chunk_extractor(
    chunk_extractor_id=extractor['id'],
    maximum_chunk_size=20000,
    minimum_chunk_size=500,
    page_as_separator=True,
    excluded_block_types=["header", "footer", "page_number"]
)

Run Chunk Extractor

# Run chunking on all documents
task = warehouse.run_chunk_extractor(
    chunk_extractor_id=extractor['id']
)
result = task.wait_for_completion()

# Run on specific documents
task = warehouse.run_chunk_extractor(
    chunk_extractor_id=extractor['id'],
    document_ids=["doc-id-1", "doc-id-2"]
)

Delete Chunk Extractor

# Delete a chunk extractor
warehouse.delete_chunk_extractor(chunk_extractor_id=extractor['id'])