2 files changed, 12 insertions, 12 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index c41f697db..4719541b9 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -538,7 +538,7 @@ export default class AssistantManager extends ApiManager {
 
                     // Spawn the Python process and track its progress/output
                     // eslint-disable-next-line no-use-before-define
-                    spawnPythonProcess(jobId, file_name, public_path);
+                    spawnPythonProcess(jobId, public_path);
 
                     // Send the job ID back to the client for tracking
                     res.send({ jobId });
@@ -695,7 +695,7 @@ export default class AssistantManager extends ApiManager {
  * @param file_name The name of the file to process.
  * @param file_path The filepath of the file to process.
  */
-function spawnPythonProcess(jobId: string, file_name: string, file_path: string) {
+function spawnPythonProcess(jobId: string, file_path: string) {
     const venvPath = path.join(__dirname, '../chunker/venv');
     const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
     const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index a9dbcbb0c..697550f2e 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -21,7 +21,7 @@ import json
 import os
 import uuid  # For generating unique IDs
 from enum import Enum  # Enums for types like document type and purpose
-import cohere  # Embedding client
+import openai
 import numpy as np
 from PyPDF2 import PdfReader  # PDF text extraction
 from openai import OpenAI  # OpenAI client for text completion
@@ -35,8 +35,8 @@ warnings.filterwarnings('ignore', message="torch.load")
 dotenv.load_dotenv()  # Load environment variables
 
 # Fix for newer versions of PIL
-if parse(PIL.__version__) >= parse('10.0.0'):
-    Image.LINEAR = Image.BILINEAR
+# if parse(PIL.__version__) >= parse('10.0.0'):
+#     Image.LINEAR = Image.BILINEAR
 
 # Global dictionary to track progress of document processing jobs
 current_progress = {}
@@ -727,19 +727,19 @@ class Document:
         """
         Embed the text chunks using the Cohere API.
         """
-        co = cohere.Client(os.getenv("COHERE_API_KEY"))  # Initialize Cohere client with API key
+        openai = OpenAI()  # Initialize Cohere client with API key
         batch_size = 90  # Batch size for embedding
         chunks_len = len(self.chunks)  # Total number of chunks to embed
         for i in tqdm(range(0, chunks_len, batch_size), desc="Embedding Chunks"):
             batch = self.chunks[i: min(i + batch_size, chunks_len)]  # Get batch of chunks
             texts = [chunk['metadata']['text'] for chunk in batch]  # Extract text from each chunk
-            chunk_embs_batch = co.embed(
-                texts=texts,
-                model="embed-english-v3.0",  # Use Cohere's embedding model
-                input_type="search_document"  # Specify input type
+            chunk_embs_batch = openai.embeddings.create(
+                model="text-embedding-3-large",
+                input=texts,
+                encoding_format="float"
             )
-            for j, emb in enumerate(chunk_embs_batch.embeddings):
-                self.chunks[i + j]['values'] = emb  # Store the embeddings in the corresponding chunks
+            for j, data_val in enumerate(chunk_embs_batch.data):
+                self.chunks[i + j]['values'] = data_val.embedding  # Store the embeddings in the corresponding chunks
 
     def _generate_summary(self) -> str:
         """