diff options
Diffstat (limited to 'src/server/chunker/pdf_chunker.py')
-rw-r--r-- | src/server/chunker/pdf_chunker.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 04d9f51a4..914594f1e 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -307,7 +307,7 @@ class PDFChunker: page_texts = await self.extract_text_from_masked_pages(pages, job_id) # Extract text from masked pages update_progress(job_id, "Processing text...", 0) - text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) # Chunk text into smaller parts + text_chunks = self.chunk_text_with_metadata(page_texts, max_words=2000, job_id=job_id) # Chunk text into smaller parts # Combine text and visual elements into a unified structure (chunks) chunks = self.combine_chunks(text_chunks, [elem for page in pages for elem in page.elements], file_name, |