aboutsummaryrefslogtreecommitdiff
path: root/src/server/chunker
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/chunker')
-rw-r--r--src/server/chunker/requirements.txt37
1 files changed, 29 insertions, 8 deletions
diff --git a/src/server/chunker/requirements.txt b/src/server/chunker/requirements.txt
index 20bd486e5..586bbe505 100644
--- a/src/server/chunker/requirements.txt
+++ b/src/server/chunker/requirements.txt
@@ -1,15 +1,36 @@
+# Prefer official CPU wheels from the PyTorch index
+--extra-index-url https://download.pytorch.org/whl/cpu
+
+###############################################################################
+# Stable env for pdf_chunker.py #
+###############################################################################
+
+# ─── LLM clients ─────────────────────────────────────────────────────────────
+openai==1.40.6
+httpx==0.27.2 # <0.28 → avoids "proxies=" crash
anthropic==0.34.0
cohere==5.8.0
-python-dotenv==1.0.1
+
+# ─── Torch stack (CPU) ───────────────────────────────────────────────────────
+torch<=2.7.1
+torchvision<=0.22.1 # matches torch 2.5.x
+torchaudio<=2.7.1
+
+# ─── Vision / OCR / PDF processing ───────────────────────────────────────────
+ultralyticsplus==0.0.28
+easyocr==1.7.0
pymupdf==1.22.2
-lxml==5.3.0
+PyPDF2==3.0.1
+pytesseract==0.3.10
+Pillow==10.4.0
layoutparser==0.3.4
+lxml==5.3.0
+
+# ─── ML / maths ──────────────────────────────────────────────────────────────
numpy==1.26.4
-openai==1.40.6
-Pillow==10.4.0
-pytesseract==0.3.10
-PyPDF2==3.0.1
scikit-learn==1.5.1
+
+# ─── Utilities ──────────────────────────────────────────────────────────────
tqdm==4.66.5
-ultralyticsplus==0.0.28
-easyocr==1.7.0 \ No newline at end of file
+python-dotenv==1.0.1
+packaging==24.0 \ No newline at end of file