7 files changed, 701 insertions, 16 deletions
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 91a7adf24..470f94a8d 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -164,7 +164,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             });
 
             // Process the document first to ensure it has a valid ID
-            this.docManager.processDocument(newLinkedDoc);
+            await this.docManager.processDocument(newLinkedDoc);
 
             // Add the document to the vectorstore which will also register chunks
             await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
@@ -648,7 +648,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             const { foundChunk, doc, dataDoc } = this.docManager.getSimplifiedChunkById(chunkId);
             console.log('doc: ', doc);
             console.log('dataDoc: ', dataDoc);
-            if (!foundChunk) {
+            if (!foundChunk || !doc) {
                 if (doc) {
                     console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
                     DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
@@ -1102,8 +1102,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
         // If there are stored doc IDs in our list of docs to add, process them
         if (this._linked_docs_to_add.size > 0) {
-            this._linked_docs_to_add.forEach(doc => {
-                this.docManager.processDocument(doc);
+            this._linked_docs_to_add.forEach(async doc => {
+                await this.docManager.processDocument(doc);
             });
         }
     }
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index 90b803d21..af44de520 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -12,6 +12,12 @@ const ragToolParams = [
         description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
         required: true,
     },
+    {
+        name: 'doc_ids',
+        type: 'string[]',
+        description: 'An optional array of document IDs to retrieve chunks from. If you want to retrieve chunks from all documents, leave this as an empty array: [] (DO NOT LEAVE THIS EMPTY).',
+        required: false,
+    },
 ] as const;
 
 type RAGToolParamsType = typeof ragToolParams;
@@ -69,7 +75,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
     }
 
     async execute(args: ParametersType<RAGToolParamsType>): Promise<Observation[]> {
-        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk, undefined, args.doc_ids ?? undefined);
         const formattedChunks = await this.getFormattedChunks(relevantChunks);
         return formattedChunks;
     }
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 43f14ea83..8e6edce8c 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -22,7 +22,7 @@ type SearchToolParamsType = typeof searchToolParams;
 
 const searchToolInfo: ToolInfo<SearchToolParamsType> = {
     name: 'searchTool',
-    citationRules: 'No citation needed. Cannot cite search results for a response. Use web scraping tools to cite specific information.',
+    citationRules: 'Always cite the search results for a response, if the search results are relevant to the response. Use the chunk_id to cite the search results. If the search results are not relevant to the response, do not cite them.   ',
     parameterRules: searchToolParams,
     description: 'Search the web to find a wide range of websites related to a query or multiple queries. Returns a list of websites and their overviews based on the search queries.',
 };
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 495a985cb..727d35e2c 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -22,6 +22,7 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
     name: 'websiteInfoScraper',
     description: 'Scrape detailed information from specific websites relevant to the user query. Returns the text content of the webpages for further analysis and grounding.',
     citationRules: `
+      !IMPORTANT! THESE CHUNKS REPLACE THE CHUNKS THAT ARE RETURNED FROM THE SEARCHTOOL.
       Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
 
       1. Grounded Text Tag Structure:
@@ -88,6 +89,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
         console.log(url);
         console.log(chunkDoc);
         console.log(chunkDoc.data);
+        const id = chunkDoc.id;
         // Validate URL format
         try {
             new URL(url); // This will throw if URL is invalid
@@ -130,7 +132,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                     if (retryCount === maxRetries) {
                         return {
                             type: 'text',
-                            text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+                            text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
                         } as Observation;
                     }
 
@@ -142,7 +144,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 // Process and return content if it looks good
                 return {
                     type: 'text',
-                    text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+                    text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
                 } as Observation;
             } catch (error) {
                 lastError = error instanceof Error ? error.message : 'Unknown error';
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 33eec5972..3c8b49f33 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -153,9 +153,9 @@ export class AgentDocumentManager {
                 console.log(`Found ${linkedDocs.length} linked documents via LinkManager`);
 
                 // Process the linked documents
-                linkedDocs.forEach((doc: Doc | undefined) => {
+                linkedDocs.forEach(async (doc: Doc | undefined) => {
                     if (doc) {
-                        this.processDocument(doc);
+                        await this.processDocument(doc);
                         console.log('Processed linked document:', doc[Id], doc.title, doc.type);
                     }
                 });
@@ -170,7 +170,7 @@ export class AgentDocumentManager {
      * @param doc The document to process
      */
     @action
-    public processDocument(doc: Doc): string {
+    public async processDocument(doc: Doc): Promise<string> {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
         if (doc.chunk_simplified) {
@@ -900,7 +900,7 @@ export class AgentDocumentManager {
                     }
                 });
 
-                const id = this.processDocument(doc);
+                const id = await this.processDocument(doc);
                 return id;
             } else {
                 throw new Error(`Error creating document. Created document not found.`);
@@ -1081,6 +1081,18 @@ export class AgentDocumentManager {
         return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId), dataDoc: this.getDataDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
+    public getChunkIdsFromDocIds(docIds: string[]): string[] {
+        return docIds
+            .map(docId => {
+                for (const chunk of this.simplifiedChunks.values()) {
+                    if (chunk.doc_id === docId) {
+                        return chunk.chunkId;
+                    }
+                }
+            })
+            .filter(chunkId => chunkId !== undefined) as string[];
+    }
+
     /**
      * Gets the original segments from a media document
      * @param doc The document containing original media segments
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 252672dfc..5c2d0e5ea 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -27,11 +27,16 @@ dotenv.config();
 export class Vectorstore {
     private pinecone!: Pinecone; // Pinecone client for managing the vector index.
     private index!: Index; // The specific Pinecone index used for document chunks.
+    private summaryIndex!: Index; // The Pinecone index used for file summaries.
     private openai!: OpenAI; // OpenAI client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
+    private summaryIndexName: string = 'file-summaries'; // Name for the summaries index.
     private _id!: string; // Unique ID for the Vectorstore instance.
     private docManager!: AgentDocumentManager; // Document manager for handling documents
+    private summaryCacheCount: number = 0; // Cache for the number of summaries
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
+    private debug: boolean = true; // Enable debugging
+    private initialized: boolean = false;
 
     /**
      * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
@@ -40,6 +45,7 @@ export class Vectorstore {
      * @param docManager An instance of AgentDocumentManager to handle document management.
      */
     constructor(id: string, docManager: AgentDocumentManager) {
+        if (this.debug) console.log(`[DEBUG] Initializing Vectorstore with ID: ${id}`);
         const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
         if (!pineconeApiKey) {
             console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
@@ -51,7 +57,32 @@ export class Vectorstore {
         this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
         this._id = id;
         this.docManager = docManager;
-        this.initializeIndex();
+
+        // Proper async initialization sequence
+        this.initializeAsync(id);
+    }
+
+    /**
+     * Handles async initialization of all components
+     */
+    private async initializeAsync(id: string) {
+        try {
+            if (this.debug) console.log(`[DEBUG] Starting async initialization sequence for Vectorstore ID: ${id}`);
+
+            // Initialize the main document index
+            await this.initializeIndex();
+
+            // Initialize the summary index
+            await this.initializeSummaryIndex();
+
+            this.initialized = true;
+            if (this.debug) console.log(`[DEBUG] ✅ Vectorstore initialization complete, running test query...`);
+
+            // Run a single test query instead of multiple
+            await this.runSingleTestQuery();
+        } catch (error) {
+            console.error('[ERROR] Failed to initialize Vectorstore:', error);
+        }
     }
 
     /**
@@ -59,10 +90,13 @@ export class Vectorstore {
      * Sets the index to use cosine similarity for vector similarity calculations.
      */
     private async initializeIndex() {
+        if (this.debug) console.log(`[DEBUG] Initializing main document index: ${this.indexName}`);
         const indexList: IndexList = await this.pinecone.listIndexes();
+        if (this.debug) console.log(`[DEBUG] Available Pinecone indexes: ${indexList.indexes?.map(i => i.name).join(', ') || 'none'}`);
 
         // Check if the index already exists, otherwise create it.
         if (!indexList.indexes?.some(index => index.name === this.indexName)) {
+            if (this.debug) console.log(`[DEBUG] Creating new index: ${this.indexName}`);
             await this.pinecone.createIndex({
                 name: this.indexName,
                 dimension: 3072,
@@ -74,6 +108,9 @@ export class Vectorstore {
                     },
                 },
             });
+            if (this.debug) console.log(`[DEBUG] ✅ Index ${this.indexName} created successfully`);
+        } else {
+            if (this.debug) console.log(`[DEBUG] ✅ Using existing index: ${this.indexName}`);
         }
 
         // Set the index for future use.
@@ -81,6 +118,453 @@ export class Vectorstore {
     }
 
     /**
+     * Initializes the Pinecone index for file summaries.
+     * Checks if it exists and creates it if necessary.
+     */
+    private async initializeSummaryIndex() {
+        if (this.debug) console.log(`[DEBUG] Initializing file summaries index: ${this.summaryIndexName}`);
+        const indexList: IndexList = await this.pinecone.listIndexes();
+
+        // Check if the index already exists, otherwise create it.
+        if (!indexList.indexes?.some(index => index.name === this.summaryIndexName)) {
+            if (this.debug) console.log(`[DEBUG] Creating new summary index: ${this.summaryIndexName}`);
+            await this.pinecone.createIndex({
+                name: this.summaryIndexName,
+                dimension: 3072,
+                metric: 'cosine',
+                spec: {
+                    serverless: {
+                        cloud: 'aws',
+                        region: 'us-east-1',
+                    },
+                },
+            });
+            if (this.debug) console.log(`[DEBUG] ✅ Summary index ${this.summaryIndexName} created successfully`);
+        } else {
+            if (this.debug) console.log(`[DEBUG] ✅ Using existing summary index: ${this.summaryIndexName}`);
+        }
+
+        // Set the summaries index for future use.
+        this.summaryIndex = this.pinecone.Index(this.summaryIndexName);
+
+        // Check if we need to index the file summaries
+        await this.processFileSummaries();
+    }
+
+    /**
+     * Processes file summaries from the JSON file if needed.
+     * Checks if the index contains the correct number of summaries before embedding.
+     */
+    private async processFileSummaries() {
+        if (this.debug) console.log(`[DEBUG] Starting file summaries processing`);
+        try {
+            // Get file summaries from the server
+            if (this.debug) console.log(`[DEBUG] Fetching file summaries from server...`);
+            const response = await Networking.FetchFromServer('/getFileSummaries');
+
+            if (!response) {
+                console.error('[ERROR] Failed to fetch file summaries');
+                return;
+            }
+            if (this.debug) console.log(`[DEBUG] File summaries response received (${response.length} bytes)`);
+
+            const summaries = JSON.parse(response);
+            const filepaths = Object.keys(summaries);
+            const summaryCount = filepaths.length;
+            this.summaryCacheCount = summaryCount;
+
+            if (this.debug) {
+                console.log(`[DEBUG] File summaries parsed: ${summaryCount} files`);
+                console.log(`[DEBUG] Sample filepaths: ${filepaths.slice(0, 3).join(', ')}...`);
+                console.log(`[DEBUG] Sample summary: "${summaries[filepaths[0]].substring(0, 100)}..."`);
+            }
+
+            // Check if index already has the correct number of summaries
+            try {
+                if (this.debug) console.log(`[DEBUG] Checking summary index stats...`);
+                const indexStats = await this.summaryIndex.describeIndexStats();
+                const vectorCount = indexStats.totalRecordCount;
+
+                if (this.debug) console.log(`[DEBUG] Summary index has ${vectorCount} records, expecting ${summaryCount}`);
+
+                if (vectorCount === summaryCount) {
+                    console.log(`[DEBUG] ✅ Summary index already contains ${vectorCount} entries, skipping embedding.`);
+                    return;
+                }
+
+                if (this.debug) console.log(`[DEBUG] ⚠️ Summary index contains ${vectorCount} entries, but there are ${summaryCount} summaries. Re-indexing.`);
+            } catch (error) {
+                console.error('[ERROR] Error checking summary index stats:', error);
+            }
+
+            // If we get here, we need to embed the summaries
+            await this.embedAndIndexFileSummaries(summaries);
+        } catch (error) {
+            console.error('[ERROR] Error processing file summaries:', error);
+        }
+    }
+
+    /**
+     * Embeds and indexes file summaries into the summary index.
+     * @param summaries Object mapping filepaths to summaries
+     */
+    private async embedAndIndexFileSummaries(summaries: Record<string, string>) {
+        if (this.debug) console.log(`[DEBUG] Starting embedding and indexing of file summaries...`);
+
+        const filepaths = Object.keys(summaries);
+        const summaryTexts = Object.values(summaries);
+
+        // Split into batches of 100 to avoid exceeding API limits
+        const batchSize = 100;
+        const totalBatches = Math.ceil(filepaths.length / batchSize);
+
+        if (this.debug) console.log(`[DEBUG] Processing ${filepaths.length} files in ${totalBatches} batches of size ${batchSize}`);
+
+        for (let i = 0; i < filepaths.length; i += batchSize) {
+            const batchFilepaths = filepaths.slice(i, i + batchSize);
+            const batchTexts = summaryTexts.slice(i, i + batchSize);
+
+            if (this.debug) {
+                console.log(`[DEBUG] Processing batch ${Math.floor(i / batchSize) + 1}/${totalBatches}`);
+                console.log(`[DEBUG] First file in batch: ${batchFilepaths[0]}`);
+                console.log(`[DEBUG] First summary in batch: "${batchTexts[0].substring(0, 50)}..."`);
+            }
+
+            try {
+                // Generate embeddings for this batch
+                if (this.debug) console.log(`[DEBUG] Generating embeddings for batch of ${batchTexts.length} summaries...`);
+                const startTime = Date.now();
+                const embeddingResponse = await this.openai.embeddings.create({
+                    model: 'text-embedding-3-large',
+                    input: batchTexts,
+                    encoding_format: 'float',
+                });
+                const duration = Date.now() - startTime;
+                if (this.debug) console.log(`[DEBUG] ✅ Embeddings generated in ${duration}ms`);
+
+                // Prepare Pinecone records
+                if (this.debug) console.log(`[DEBUG] Preparing Pinecone records...`);
+                const pineconeRecords: PineconeRecord[] = batchTexts.map((text, index) => {
+                    const embedding = (embeddingResponse.data as Embedding[])[index].embedding;
+                    if (this.debug && index === 0) console.log(`[DEBUG] Sample embedding dimensions: ${embedding.length}, first few values: [${embedding.slice(0, 5).join(', ')}...]`);
+
+                    return {
+                        id: uuidv4(), // Generate a unique ID for each summary
+                        values: embedding,
+                        metadata: {
+                            filepath: batchFilepaths[index],
+                            summary: text,
+                        } as RecordMetadata,
+                    };
+                });
+
+                // Upload to Pinecone
+                if (this.debug) console.log(`[DEBUG] Upserting ${pineconeRecords.length} records to Pinecone...`);
+                const upsertStart = Date.now();
+                try {
+                    await this.summaryIndex.upsert(pineconeRecords);
+                    const upsertDuration = Date.now() - upsertStart;
+                    if (this.debug) console.log(`[DEBUG] ✅ Batch ${Math.floor(i / batchSize) + 1}/${totalBatches} indexed in ${upsertDuration}ms`);
+                } catch (upsertError) {
+                    console.error(`[ERROR] Failed to upsert batch ${Math.floor(i / batchSize) + 1}/${totalBatches} to Pinecone:`, upsertError);
+                    // Try again with smaller batch
+                    if (batchTexts.length > 20) {
+                        console.log(`[DEBUG] 🔄 Retrying with smaller batch size...`);
+                        // Split the batch in half and retry recursively
+                        const midpoint = Math.floor(batchTexts.length / 2);
+                        const firstHalf = {
+                            filepaths: batchFilepaths.slice(0, midpoint),
+                            texts: batchTexts.slice(0, midpoint),
+                        };
+                        const secondHalf = {
+                            filepaths: batchFilepaths.slice(midpoint),
+                            texts: batchTexts.slice(midpoint),
+                        };
+
+                        // Create a helper function to retry smaller batches
+                        const retryBatch = async (paths: string[], texts: string[], batchNum: string) => {
+                            try {
+                                if (this.debug) console.log(`[DEBUG] Generating embeddings for sub-batch ${batchNum}...`);
+                                const embRes = await this.openai.embeddings.create({
+                                    model: 'text-embedding-3-large',
+                                    input: texts,
+                                    encoding_format: 'float',
+                                });
+
+                                const records = texts.map((t, idx) => ({
+                                    id: uuidv4(),
+                                    values: (embRes.data as Embedding[])[idx].embedding,
+                                    metadata: {
+                                        filepath: paths[idx],
+                                        summary: t,
+                                    } as RecordMetadata,
+                                }));
+
+                                if (this.debug) console.log(`[DEBUG] Upserting sub-batch ${batchNum} (${records.length} records)...`);
+                                await this.summaryIndex.upsert(records);
+                                if (this.debug) console.log(`[DEBUG] ✅ Sub-batch ${batchNum} upserted successfully`);
+                            } catch (retryError) {
+                                console.error(`[ERROR] Failed to upsert sub-batch ${batchNum}:`, retryError);
+                            }
+                        };
+
+                        await retryBatch(firstHalf.filepaths, firstHalf.texts, `${Math.floor(i / batchSize) + 1}.1`);
+                        await retryBatch(secondHalf.filepaths, secondHalf.texts, `${Math.floor(i / batchSize) + 1}.2`);
+                    }
+                }
+            } catch (error) {
+                console.error('[ERROR] Error processing batch:', error);
+            }
+        }
+
+        if (this.debug) console.log(`[DEBUG] ✅ File summary indexing complete for all ${filepaths.length} files`);
+
+        // Verify the index was populated correctly
+        try {
+            const indexStats = await this.summaryIndex.describeIndexStats();
+            const vectorCount = indexStats.totalRecordCount;
+            if (this.debug) console.log(`[DEBUG] 🔍 Final index verification: ${vectorCount} records in Pinecone index (expected ${filepaths.length})`);
+        } catch (error) {
+            console.error('[ERROR] Failed to verify index stats:', error);
+        }
+    }
+
+    /**
+     * Searches for file summaries similar to the given query.
+     * @param query The search query
+     * @param topK Number of results to return (default: 5)
+     * @returns Array of filepath and summary pairs with relevance scores
+     */
+    async searchFileSummaries(query: string, topK: number = 5): Promise<Array<{ filepath: string; summary: string; score?: number }>> {
+        if (!this.initialized) {
+            console.error('[ERROR] Cannot search - Vectorstore not fully initialized');
+            return [];
+        }
+
+        if (this.debug) console.log(`[DEBUG] Searching file summaries for query: "${query}" (topK=${topK})`);
+        try {
+            // Generate embedding for the query
+            if (this.debug) console.log(`[DEBUG] Generating embedding for query...`);
+            const startTime = Date.now();
+            const queryEmbeddingResponse = await this.openai.embeddings.create({
+                model: 'text-embedding-3-large',
+                input: query,
+                encoding_format: 'float',
+            });
+            const duration = Date.now() - startTime;
+
+            const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
+            if (this.debug) {
+                console.log(`[DEBUG] ✅ Query embedding generated in ${duration}ms`);
+                console.log(`[DEBUG] Query embedding dimensions: ${queryEmbedding.length}`);
+            }
+
+            // Check if summary index is ready
+            try {
+                const indexStats = await this.summaryIndex.describeIndexStats();
+                const vectorCount = indexStats.totalRecordCount;
+                if (this.debug) console.log(`[DEBUG] Summary index contains ${vectorCount} records`);
+
+                if (vectorCount === 0) {
+                    console.error('[ERROR] Summary index is empty, cannot perform search');
+                    return [];
+                }
+            } catch (statsError) {
+                console.error('[ERROR] Failed to check summary index stats:', statsError);
+                console.error('[ERROR] Stats error details:', JSON.stringify(statsError));
+            }
+
+            // Test direct API access to Pinecone
+            if (this.debug) console.log(`[DEBUG] Testing Pinecone connection...`);
+            try {
+                const indexes = await this.pinecone.listIndexes();
+                console.log(`[DEBUG] Available Pinecone indexes: ${indexes.indexes?.map(idx => idx.name).join(', ')}`);
+            } catch (connectionError) {
+                console.error('[ERROR] Could not connect to Pinecone:', connectionError);
+            }
+
+            // Query the summaries index
+            if (this.debug) console.log(`[DEBUG] Querying Pinecone summary index (${this.summaryIndexName})...`);
+            const queryStart = Date.now();
+
+            let queryResponse;
+            try {
+                // First, make sure we can access the index
+                const indexInfo = await this.summaryIndex.describeIndexStats();
+                if (this.debug) console.log(`[DEBUG] Index stats:`, indexInfo);
+
+                queryResponse = await this.summaryIndex.query({
+                    vector: queryEmbedding,
+                    topK,
+                    includeMetadata: true,
+                });
+
+                const queryDuration = Date.now() - queryStart;
+
+                if (this.debug) {
+                    console.log(`[DEBUG] ✅ Pinecone query completed in ${queryDuration}ms`);
+                    console.log(`[DEBUG] Raw Pinecone response:`, JSON.stringify(queryResponse, null, 2));
+                    if (queryResponse.matches) {
+                        console.log(`[DEBUG] Found ${queryResponse.matches.length} matching summaries`);
+                        console.log(`[DEBUG] Match scores: ${queryResponse.matches.map(m => m.score?.toFixed(4)).join(', ')}`);
+                    } else {
+                        console.log(`[DEBUG] No matches in response`);
+                    }
+                }
+            } catch (queryError) {
+                console.error('[ERROR] Pinecone query failed:', queryError);
+                if (typeof queryError === 'object' && queryError !== null) {
+                    console.error('[ERROR] Query error details:', JSON.stringify(queryError, null, 2));
+                }
+                return [];
+            }
+
+            if (!queryResponse || !queryResponse.matches || queryResponse.matches.length === 0) {
+                console.log('[DEBUG] ⚠️ No matches found in Pinecone for query');
+                return [];
+            }
+
+            // Format results
+            const results = queryResponse.matches.map(match => {
+                if (!match.metadata) {
+                    console.error('[ERROR] Match is missing metadata:', match);
+                    return { filepath: 'unknown', summary: 'No summary available' };
+                }
+
+                return {
+                    filepath: (match.metadata as { filepath: string }).filepath || 'unknown',
+                    summary: (match.metadata as { summary: string }).summary || 'No summary available',
+                    score: match.score,
+                };
+            });
+
+            if (this.debug) {
+                if (results.length > 0) {
+                    console.log(`[DEBUG] Top result filepath: ${results[0]?.filepath}`);
+                    console.log(`[DEBUG] Top result score: ${results[0]?.score}`);
+                    console.log(`[DEBUG] Top result summary excerpt: "${results[0]?.summary?.substring(0, 100)}..."`);
+                } else {
+                    console.log(`[DEBUG] No results returned after processing`);
+                }
+            }
+
+            return results;
+        } catch (error) {
+            console.error('[ERROR] Error searching file summaries:', error);
+            if (typeof error === 'object' && error !== null) {
+                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
+            }
+            return [];
+        }
+    }
+
+    /**
+     * Runs a single test query after setup to validate the file summary search functionality.
+     */
+    private async runSingleTestQuery() {
+        console.log(`\n[TEST] Running single test query to validate file summary search functionality...`);
+
+        // Verify the index is accessible
+        try {
+            const indexStats = await this.summaryIndex.describeIndexStats();
+            console.log(`[TEST] Pinecone index stats:`, JSON.stringify(indexStats, null, 2));
+            console.log(`[TEST] Summary index contains ${indexStats.totalRecordCount} indexed summaries`);
+        } catch (error) {
+            console.error('[TEST] ❌ Failed to access Pinecone index:', error);
+            return;
+        }
+
+        // Add a brief delay to ensure Pinecone has finished processing
+        console.log('[TEST] Waiting 2 seconds for Pinecone indexing to complete...');
+        await new Promise(resolve => setTimeout(resolve, 2000));
+
+        // Run a single test query
+        const query = 'React components for the UI';
+        console.log(`\n[TEST] Executing query: "${query}"`);
+
+        try {
+            const results = await this.searchFileSummaries(query);
+            console.log(`[TEST] Search returned ${results.length} results:`);
+
+            results.forEach((result, i) => {
+                console.log(`\n[TEST] Result ${i + 1}:`);
+                console.log(`[TEST] File: ${result.filepath}`);
+                console.log(`[TEST] Score: ${result.score}`);
+                console.log(`[TEST] Summary: "${result.summary?.substring(0, 150)}..."`);
+            });
+
+            // If we have results, fetch the content for the first one
+            if (results.length > 0) {
+                const topFilepath = results[0].filepath;
+                console.log(`\n[TEST] Fetching full content for top result: ${topFilepath}`);
+                const content = await this.getFileContent(topFilepath);
+
+                if (content) {
+                    console.log(`[TEST] ✅ Content retrieved successfully (${content.length} chars)`);
+                    console.log(`[TEST] Content excerpt:\n---\n${content.substring(0, 300)}...\n---`);
+                } else {
+                    console.log(`[TEST] ❌ Failed to retrieve content for ${topFilepath}`);
+                }
+            } else {
+                console.log(`\n[TEST] ⚠️ No results to fetch content for`);
+            }
+
+            console.log(`\n[TEST] ✅ Test query completed`);
+        } catch (testError) {
+            console.error(`[TEST] ❌ Test query failed:`, testError);
+            if (typeof testError === 'object' && testError !== null) {
+                console.error('[TEST] Full error details:', JSON.stringify(testError, null, 2));
+            }
+        }
+    }
+
+    /**
+     * Gets the full content of a file by its filepath.
+     * @param filepath The filepath to look up
+     * @returns The file content or null if not found
+     */
+    async getFileContent(filepath: string): Promise<string | null> {
+        if (this.debug) console.log(`[DEBUG] Getting file content for: ${filepath}`);
+        try {
+            const startTime = Date.now();
+
+            // Use the Networking utility for consistent API access
+            // But convert the response to text manually to avoid JSON parsing
+            const rawResponse = await fetch('/getRawFileContent', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({ filepath }),
+            });
+
+            if (!rawResponse.ok) {
+                const errorText = await rawResponse.text();
+                console.error(`[ERROR] Server returned error ${rawResponse.status}: ${errorText}`);
+                return null;
+            }
+
+            // Get the raw text content without JSON parsing
+            const content = await rawResponse.text();
+            const duration = Date.now() - startTime;
+
+            if (this.debug) {
+                console.log(`[DEBUG] ✅ File content retrieved in ${duration}ms`);
+                console.log(`[DEBUG] Content length: ${content.length} chars`);
+                console.log(`[DEBUG] Content excerpt: "${content.substring(0, 100)}..."`);
+            }
+
+            return content;
+        } catch (error) {
+            console.error('[ERROR] Error getting file content:', error);
+            if (typeof error === 'object' && error !== null) {
+                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
+            }
+            return null;
+        }
+    }
+
+    /**
      * Adds an AI document to the vectorstore. Handles media file processing for audio/video,
      * and text embedding for all document types. Updates document metadata during processing.
      * @param doc The document to add.
@@ -303,7 +787,7 @@ export class Vectorstore {
      * @param topK The number of top results to return (default is 10).
      * @returns A list of document chunks that match the query.
      */
-    async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
+    async retrieve(query: string, topK: number = 10, docIds?: string[]): Promise<RAGChunk[]> {
         console.log(`Retrieving chunks for query: ${query}`);
         try {
             // Generate an embedding for the query using OpenAI.
@@ -314,15 +798,16 @@ export class Vectorstore {
             });
 
             const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
+            const _docIds = docIds?.length === 0 || !docIds ? this.docManager.docIds : docIds;
 
-            console.log('Using document IDs for retrieval:', this.docManager.docIds);
+            console.log('Using document IDs for retrieval:', _docIds);
 
             // Query the Pinecone index using the embedding and filter by document IDs.
             // We'll query based on document IDs that are registered in the document manager
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
-                    doc_id: { $in: this.docManager.docIds },
+                    doc_id: { $in: _docIds },
                 },
                 topK,
                 includeValues: true,
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b7ce4f663..9d0427b52 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -39,6 +39,7 @@ export enum Directory {
     csv = 'csv',
     chunk_images = 'chunk_images',
     scrape_images = 'scrape_images',
+    vectorstore = 'vectorstore',
 }
 
 // In-memory job tracking
@@ -92,6 +93,119 @@ export default class AssistantManager extends ApiManager {
         const customsearch = google.customsearch('v1');
         const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
 
+        // Register an endpoint to retrieve file summaries from the json file
+        register({
+            method: Method.GET,
+            subscription: '/getFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                try {
+                    // Read the file summaries JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_summaries.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File summaries not found' });
+                        return;
+                    }
+
+                    const data = fs.readFileSync(filePath, 'utf8');
+                    res.send(data);
+                } catch (error) {
+                    console.error('Error retrieving file summaries:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file summaries',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to retrieve file content from the content json file
+        register({
+            method: Method.POST,
+            subscription: '/getFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send({ error: 'Filepath is required' });
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File content database not found' });
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving content for: ${filepath}`);
+
+                    // Read the JSON file in chunks to handle large files
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send({ error: `Content not found for filepath: ${filepath}` });
+                                return;
+                            }
+
+                            // Return the file content as is, not as JSON
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send({
+                                error: 'Failed to parse file content database',
+                            });
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send({
+                            error: 'Failed to read file content database',
+                        });
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file content',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to search file summaries
+        register({
+            method: Method.POST,
+            subscription: '/searchFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                const { query, topK } = req.body;
+
+                if (!query) {
+                    res.status(400).send({ error: 'Search query is required' });
+                    return;
+                }
+
+                // This endpoint will be called by the client-side Vectorstore to perform the search
+                // The actual search is implemented in the Vectorstore class
+
+                res.send({ message: 'This endpoint should be called through the Vectorstore class' });
+            },
+        });
+
         // Register Wikipedia summary API route
         register({
             method: Method.POST,
@@ -848,6 +962,72 @@ export default class AssistantManager extends ApiManager {
                 }
             },
         });
+
+        // Register an endpoint to retrieve raw file content as plain text (no JSON parsing)
+        register({
+            method: Method.POST,
+            subscription: '/getRawFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send('Filepath is required');
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send('File content database not found');
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving raw content for: ${filepath}`);
+
+                    // Read the JSON file
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send(`Content not found for filepath: ${filepath}`);
+                                return;
+                            }
+
+                            // Set content type to plain text to avoid JSON parsing
+                            res.setHeader('Content-Type', 'text/plain');
+
+                            // Return the file content as plain text
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send('Failed to parse file content database');
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send('Failed to read file content database');
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send('Failed to retrieve file content');
+                }
+            },
+        });
     }
 }