diff options
Diffstat (limited to 'src')
32 files changed, 3016 insertions, 819 deletions
diff --git a/src/client/documents/DocumentTypes.ts b/src/client/documents/DocumentTypes.ts index 56d505681..bd0ae40b8 100644 --- a/src/client/documents/DocumentTypes.ts +++ b/src/client/documents/DocumentTypes.ts @@ -31,6 +31,7 @@ export enum DocumentType { DATAVIZ = 'dataviz', LOADING = 'loading', SIMULATION = 'simulation', // physics simulation + MESSAGE = 'message', // chat message // special purpose wrappers that either take no data or are compositions of lower level types LINK = 'link', diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index d5a7b0465..b7ea11bce 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -810,6 +810,11 @@ export namespace Docs { export function RTFDocument(field: RichTextField, options: DocumentOptions = {}, fieldKey: string = 'text') { return InstanceFromProto(Prototypes.get(DocumentType.RTF), field, options, undefined, fieldKey); } + + export function MessageDocument(field: string, options: DocumentOptions = {}, fieldKey: string = 'data') { + return InstanceFromProto(Prototypes.get(DocumentType.MESSAGE), field, options, undefined, fieldKey); + } + export function TextDocument(text: string, options: DocumentOptions = {}, fieldKey: string = 'text') { const rtf = { doc: { diff --git a/src/client/util/CurrentUserUtils.ts b/src/client/util/CurrentUserUtils.ts index f042f33ce..6aca86923 100644 --- a/src/client/util/CurrentUserUtils.ts +++ b/src/client/util/CurrentUserUtils.ts @@ -365,7 +365,7 @@ pie title Minerals in my tap water {key: "Button", creator: Docs.Create.ButtonDocument, opts: { _width: 150, _height: 50, _xPadding: 10, _yPadding: 10, title_custom: true, waitForDoubleClickToClick: 'never'}, scripts: {onClick: FollowLinkScript()?.script.originalScript ?? ""}}, {key: "Script", creator: opts => Docs.Create.ScriptingDocument(null, opts), opts: { _width: 200, _height: 250, }}, {key: "DataViz", creator: opts => Docs.Create.DataVizDocument("/users/rz/Downloads/addresses.csv", opts), opts: { _width: 300, _height: 300 }}, - {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 300, _height: 300, }}, + {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 500, _height: 500, }}, {key: "Header", creator: headerTemplate, opts: { _width: 300, _height: 120, _header_pointerEvents: "all", _header_height: 50, _header_fontSize: 9,_layout_autoHeightMargins: 50, _layout_autoHeight: true, treeView_HideUnrendered: true}}, {key: "ViewSlide", creator: slideView, opts: { _width: 400, _height: 300, _xMargin: 3, _yMargin: 3,}}, {key: "Trail", creator: Docs.Create.PresDocument, opts: { _width: 400, _height: 30, _type_collection: CollectionViewType.Stacking, dropAction: dropActionType.embed, treeView_HideTitle: true, _layout_fitWidth:true, layout_boxShadow: "0 0" }}, diff --git a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx index 8c6a6b551..fc5f5cb71 100644 --- a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx +++ b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx @@ -1,5 +1,5 @@ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; -import { action, computed, makeObservable, observable } from 'mobx'; +import { action, computed, makeObservable, observable, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import * as React from 'react'; import { lightOrDark, returnEmptyString } from '../../../ClientUtils'; @@ -87,12 +87,16 @@ export class CollectionNoteTakingViewColumn extends ObservableReactComponent<CSV }; componentDidMount(): void { - this._ele && this.props.refList.push(this._ele); + runInAction(() => { + this._ele && this.props.refList.push(this._ele); + }); } componentWillUnmount() { - this._ele && this.props.refList.splice(this._props.refList.indexOf(this._ele), 1); - this._ele = null; + runInAction(() => { + this._ele && this.props.refList.splice(this._props.refList.indexOf(this._ele), 1); + this._ele = null; + }); } @undoBatch diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts new file mode 100644 index 000000000..eaa17d283 --- /dev/null +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -0,0 +1,254 @@ +import OpenAI from 'openai'; +import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from './types'; +import { getReactPrompt } from './prompts'; +import { XMLParser, XMLBuilder } from 'fast-xml-parser'; +import { Vectorstore } from './vectorstore/Vectorstore'; +import { ChatCompletionMessageParam } from 'openai/resources'; +import dotenv from 'dotenv'; +import { CalculateTool } from './tools/CalculateTool'; +import { RAGTool } from './tools/RAGTool'; +import { DataAnalysisTool } from './tools/DataAnalysisTool'; +import { WebsiteInfoScraperTool } from './tools/WebsiteInfoScraperTool'; +import { SearchTool } from './tools/SearchTool'; +import { NoTool } from './tools/NoTool'; +import { on } from 'events'; +import { v4 as uuidv4 } from 'uuid'; +import { AnswerParser } from './AnswerParser'; +import { StreamedAnswerParser } from './StreamedAnswerParser'; +import { CreateCSVTool } from './tools/CreateCSVTool'; + +dotenv.config(); + +export class Agent { + private client: OpenAI; + private tools: Record<string, Tool<any>>; + private messages: AgentMessage[] = []; + private interMessages: AgentMessage[] = []; + private vectorstore: Vectorstore; + private _history: () => string; + private _summaries: () => string; + private _csvData: () => { filename: string; id: string; text: string }[]; + private actionNumber: number = 0; + private thoughtNumber: number = 0; + private processingNumber: number = 0; + private processingInfo: ProcessingInfo[] = []; + private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); + + constructor( + _vectorstore: Vectorstore, + summaries: () => string, + history: () => string, + csvData: () => { filename: string; id: string; text: string }[], + addLinkedUrlDoc: (url: string, id: string) => void, + createCSVInDash: (url: string, title: string, id: string, data: string) => void + ) { + this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); + this.vectorstore = _vectorstore; + this._history = history; + this._summaries = summaries; + this._csvData = csvData; + this.tools = { + calculate: new CalculateTool(), + rag: new RAGTool(this.vectorstore), + dataAnalysis: new DataAnalysisTool(csvData), + websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + searchTool: new SearchTool(addLinkedUrlDoc), + createCSV: new CreateCSVTool(createCSVInDash), + no_tool: new NoTool(), + }; + } + + async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise<AssistantMessage> { + console.log(`Starting query: ${question}`); + this.messages.push({ role: 'user', content: question }); + const chatHistory = this._history(); + const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); + this.interMessages = [{ role: 'system', content: systemPrompt }]; + this.interMessages.push({ role: 'user', content: `<stage number="1" role="user"><query>${question}</query></stage>` }); + const parser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: '@_', + textNodeName: '_text', + isArray: (name, jpath, isLeafNode, isAttribute) => { + // Convert tags with the same name to arrays + return ['query', 'url'].indexOf(name) !== -1; + }, + }); + const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); + + let currentAction: string | undefined; + + this.processingInfo = []; + + for (let i = 2; i < maxTurns; i += 2) { + console.log(this.interMessages); + console.log(`Turn ${i}/${maxTurns}`); + + const result = await this.execute(onProcessingUpdate, onAnswerUpdate); + this.interMessages.push({ role: 'assistant', content: result }); + + let parsedResult; + try { + parsedResult = parser.parse(result); + } catch (error) { + throw new Error(`Error parsing response: ${error}`); + } + + const stage = parsedResult.stage; + + if (!stage) { + throw new Error(`Error: No stage found in response`); + } + + for (const key in stage) { + if (key === 'thought') { + console.log(`Thought: ${stage[key]}`); + this.processingNumber++; + } else if (key === 'action') { + currentAction = stage[key] as string; + console.log(`Action: ${currentAction}`); + if (this.tools[currentAction]) { + const nextPrompt = [ + { + type: 'text', + text: `<stage number="${i + 1}" role="user">` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + `</stage>`, + }, + ]; + this.interMessages.push({ role: 'user', content: nextPrompt }); + break; + } else { + console.log('Error: No valid action'); + this.interMessages.push({ role: 'user', content: `<stage number="${i + 1}" role="system-error-reporter">No valid action, try again.</stage>` }); + break; + } + } else if (key === 'action_input') { + const actionInput = stage[key]; + console.log(`Action input:`, actionInput.inputs); + if (currentAction) { + try { + // Parse the inputs + //const parsedInputs = this.parseActionInputs(actionInput.inputs); + //console.log(`Parsed inputs:`, parsedInputs); + const observation = await this.processAction(currentAction, actionInput.inputs); + const nextPrompt = [{ type: 'text', text: `<stage number="${i + 1}" role="user"> <observation>` }, ...observation, { type: 'text', text: '</observation></stage>' }]; + console.log(observation); + this.interMessages.push({ role: 'user', content: nextPrompt }); + this.processingNumber++; + break; + } catch (error) { + throw new Error(`Error processing action: ${error}`); + } + } else { + throw new Error('Error: Action input without a valid action'); + } + } else if (key === 'answer') { + console.log('Answer found. Ending query.'); + this.streamedAnswerParser.reset(); + const parsedAnswer = AnswerParser.parse(result, this.processingInfo); + return parsedAnswer; + } + } + } + throw new Error('Reached maximum turns. Ending query.'); + } + + private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise<string> { + const stream = await this.client.chat.completions.create({ + model: 'gpt-4o', + messages: this.interMessages as ChatCompletionMessageParam[], + temperature: 0, + stream: true, + }); + + let fullResponse: string = ''; + let currentTag: string = ''; + let currentContent: string = ''; + let isInsideTag: boolean = false; + + for await (const chunk of stream) { + let content = chunk.choices[0]?.delta?.content || ''; + fullResponse += content; + + for (const char of content) { + if (currentTag === 'answer') { + currentContent += char; + //console.log(char); + const streamedAnswer = this.streamedAnswerParser.parse(char); + //console.log(streamedAnswer); + onAnswerUpdate(streamedAnswer); + continue; + } else if (char === '<') { + isInsideTag = true; + currentTag = ''; + currentContent = ''; + } else if (char === '>') { + isInsideTag = false; + if (currentTag.startsWith('/')) { + currentTag = ''; + } + } else if (isInsideTag) { + currentTag += char; + } else if (currentTag === 'thought' || currentTag === 'action_input_description') { + currentContent += char; + const current_info = this.processingInfo.find(info => info.index === this.processingNumber); + if (current_info) { + current_info.content = currentContent.trim(); + onProcessingUpdate(this.processingInfo); + } else { + this.processingInfo.push({ index: this.processingNumber, type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, content: currentContent.trim() }); + onProcessingUpdate(this.processingInfo); + } + } + } + } + + return fullResponse; + } + + private async processAction(action: string, actionInput: any): Promise<any> { + if (!(action in this.tools)) { + throw new Error(`Unknown action: ${action}`); + } + + const tool = this.tools[action]; + const args: Record<string, any> = {}; + + // for (const paramName in tool.parameters) { + // if (actionInput[paramName] !== undefined) { + // if (Array.isArray(actionInput[paramName])) { + // // If the input is already an array, use it as is + // args[paramName] = actionInput[paramName]; + // } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { + // // If the input is an object, check if it has multiple of the same tag + // const values = Object.values(actionInput[paramName]); + // if (values.length > 1) { + // // If there are multiple values, convert to an array + // args[paramName] = values; + // } else { + // // If there's only one value, use it directly + // args[paramName] = values[0]; + // } + // } else { + // // For single values, use them as is + // args[paramName] = actionInput[paramName]; + // } + // } else if (tool.parameters[paramName].required === 'true') { + // throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); + // } + // } + + return await tool.execute(actionInput); + } + + private parseActionInputs(inputs: any): Record<string, string | string[]> { + const parsedInputs: Record<string, string | string[]> = {}; + for (const key in inputs) { + if (Array.isArray(inputs[key])) { + parsedInputs[key] = inputs[key].map((item: any) => item._text); + } else { + parsedInputs[key] = inputs[key]._text; + } + } + return parsedInputs; + } +} diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts new file mode 100644 index 000000000..885114195 --- /dev/null +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -0,0 +1,125 @@ +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from './types'; +import { v4 as uuid } from 'uuid'; + +export class AnswerParser { + static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { + const answerRegex = /<answer>([\s\S]*?)<\/answer>/; + const citationsRegex = /<citations>([\s\S]*?)<\/citations>/; + const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g; + const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/; + const questionRegex = /<question>(.*?)<\/question>/g; + const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; + const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g; + const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/; + + const answerMatch = answerRegex.exec(xml); + const citationsMatch = citationsRegex.exec(xml); + const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + const loopSummaryMatch = loopSummaryRegex.exec(xml); + + if (!answerMatch) { + throw new Error('Invalid XML: Missing <answer> tag.'); + } + + let rawTextContent = answerMatch[1].trim(); + let content: AssistantMessage['content'] = []; + let citations: Citation[] = []; + let contentIndex = 0; + + // Remove citations and follow-up questions from rawTextContent + if (citationsMatch) { + rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); + } + if (followUpQuestionsMatch) { + rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); + } + if (loopSummaryMatch) { + rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); + } + + // Parse citations + let citationMatch; + const citationMap = new Map<string, string>(); + if (citationsMatch) { + const citationsContent = citationsMatch[1]; + while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id, + citation_id, + }); + } + } + + rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); + + // Parse text content (normal and grounded) + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text that is before the grounded text + if (match.index > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, match.index).trim(); + if (normalText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); + content.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids, + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < rawTextContent.length) { + const remainingText = rawTextContent.slice(lastIndex).trim(); + if (remainingText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + + let followUpQuestions: string[] = []; + if (followUpQuestionsMatch) { + const questionsText = followUpQuestionsMatch[1]; + let questionMatch; + while ((questionMatch = questionRegex.exec(questionsText)) !== null) { + followUpQuestions.push(questionMatch[1].trim()); + } + } + + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content, + follow_up_questions: followUpQuestions, + citations, + processing_info: processingInfo, + loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, + }; + + return assistantResponse; + } +} diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index f1ad3d074..76fa05ce8 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -1,228 +1,246 @@ -$background-color: #f8f9fa; +@import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap'); + +$primary-color: #4a90e2; +$secondary-color: #f5f8fa; $text-color: #333; -$input-background: #fff; -$button-color: #007bff; -$button-hover-color: darken($button-color, 10%); -$shadow-color: rgba(0, 0, 0, 0.075); -$border-radius: 8px; +$light-text-color: #777; +$border-color: #e1e8ed; +$shadow-color: rgba(0, 0, 0, 0.1); +$transition: all 0.3s ease; -.chatBox { +.chat-box { display: flex; flex-direction: column; - width: 100%; /* Adjust the width as needed, could be in percentage */ - height: 100%; /* Adjust the height as needed, could be in percentage */ - background-color: $background-color; - font-family: 'Helvetica Neue', Arial, sans-serif; - //margin: 20px auto; - //overflow: hidden; - - .scroll-box { + height: 100%; + background-color: #fff; + font-family: + 'Atkinson Hyperlegible', + -apple-system, + BlinkMacSystemFont, + 'Segoe UI', + Roboto, + Helvetica, + Arial, + sans-serif; + border-radius: 12px; + overflow: hidden; + box-shadow: 0 4px 12px $shadow-color; + + .chat-header { + background-color: $primary-color; + color: white; + padding: 15px; + text-align: center; + box-shadow: 0 2px 4px $shadow-color; + height: fit-content; + h2 { + margin: 0; + font-size: 1.3em; + font-weight: 500; + } + } + + .chat-messages { flex-grow: 1; - overflow-y: scroll; - overflow-x: hidden; - height: 100%; - padding: 10px; + overflow-y: auto; + padding: 20px; display: flex; - flex-direction: column-reverse; - + flex-direction: column; + &::-webkit-scrollbar { - width: 8px; + width: 6px; } + &::-webkit-scrollbar-thumb { - background-color: darken($background-color, 10%); - border-radius: $border-radius; + background-color: $border-color; + border-radius: 3px; } - + } - .chat-content { - display: flex; - flex-direction: column; + .chat-input { + display: flex; + padding: 20px; + border-top: 1px solid $border-color; + background-color: #fff; + + input { + flex-grow: 1; + padding: 12px 15px; + border: 1px solid $border-color; + border-radius: 24px; + font-size: 15px; + transition: $transition; + + &:focus { + outline: none; + border-color: $primary-color; + box-shadow: 0 0 0 2px rgba($primary-color, 0.2); + } } - .messages { + .submit-button { + background-color: $primary-color; + color: white; + border: none; + border-radius: 50%; + width: 48px; + height: 48px; + margin-left: 10px; + cursor: pointer; + transition: $transition; display: flex; - flex-direction: column; - .message { - padding: 10px; - margin-bottom: 10px; - border-radius: $border-radius; - background-color: lighten($background-color, 5%); - box-shadow: 0 2px 5px $shadow-color; - //display: flex; - align-items: center; - max-width: 70%; - word-break: break-word; - .message-footer { // Assuming this is the container for the toggle button - //max-width: 70%; - - - .toggle-logs-button { - margin-top: 10px; // Padding on sides to align with the text above - width: 95%; - //display: block; // Ensures the button extends the full width of its container - text-align: center; // Centers the text inside the button - //padding: 8px 0; // Adequate padding for touch targets - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - //transition: background-color 0.3s; - //margin-top: 10px; // Adds space above the button - box-shadow: 0 2px 4px $shadow-color; // Consistent shadow with other elements - &:hover { - background-color: $button-hover-color; - } - } - .tool-logs { - width: 100%; - background-color: $input-background; - color: $text-color; - margin-top: 5px; - //padding: 10px; - //border-radius: $border-radius; - //box-shadow: inset 0 2px 4px $shadow-color; - //transition: opacity 1s ease-in-out; - font-family: monospace; - overflow-x: auto; - max-height: 150px; // Ensuring it does not grow too large - overflow-y: auto; - } - - } - - .custom-link { - color: lightblue; - text-decoration: underline; - cursor: pointer; - } - &.user { - align-self: flex-end; - background-color: $button-color; - color: #fff; - } - - &.chatbot { - align-self: flex-start; - background-color: $input-background; - color: $text-color; - } - - span { - flex-grow: 1; - padding-right: 10px; - } - - img { - max-width: 50px; - max-height: 50px; + align-items: center; + justify-content: center; + + &:hover { + background-color: darken($primary-color, 10%); + } + + &:disabled { + background-color: $light-text-color; + cursor: not-allowed; + } + + .spinner { + height: 24px; + width: 24px; + border: 3px solid rgba(255, 255, 255, 0.3); + border-top: 3px solid #fff; border-radius: 50%; - } + animation: spin 1s linear infinite; } } - padding-bottom: 0; } +} - .chat-form { - display: flex; - flex-grow: 1; - //height: 50px; - bottom: 0; +.message { + max-width: 80%; + margin-bottom: 20px; + padding: 16px 20px; + border-radius: 18px; + font-size: 15px; + line-height: 1.5; + box-shadow: 0 2px 4px $shadow-color; + + &.user { + align-self: flex-end; + background-color: $primary-color; + color: white; + border-bottom-right-radius: 4px; + } + + &.chatbot { + align-self: flex-start; + background-color: $secondary-color; + color: $text-color; + border-bottom-left-radius: 4px; + } + + .toggle-info { + background-color: transparent; + color: $primary-color; + border: 1px solid $primary-color; width: 100%; - padding: 10px; - background-color: $input-background; - box-shadow: inset 0 -1px 2px $shadow-color; - - input[type="text"] { - flex-grow: 1; - border: 1px solid darken($input-background, 10%); - border-radius: $border-radius; - padding: 8px 12px; - margin-right: 10px; + height: fit-content; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + margin-top: 10px; + + &:hover { + background-color: rgba($primary-color, 0.1); } + } +} + +.follow-up-questions { + margin-top: 15px; + + h4 { + font-size: 15px; + font-weight: 600; + margin-bottom: 10px; + } - button { - padding: 8px 16px; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - transition: background-color 0.3s; - - &:hover { - background-color: $button-hover-color; - } + .questions-list { + display: flex; + flex-direction: column; + gap: 10px; + } + + .follow-up-button { + background-color: #fff; + color: $primary-color; + border: 1px solid $primary-color; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + text-align: left; + white-space: normal; + word-wrap: break-word; + width: 100%; + height: fit-content; + + &:hover { + background-color: $primary-color; + color: #fff; } - margin-bottom: 0; } } -.initializing-overlay { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 100%; - background-color: rgba($background-color, 0.95); - display: flex; - justify-content: center; +.citation-button { + display: inline-flex; align-items: center; - font-size: 1.5em; + justify-content: center; + width: 20px; + height: 20px; + border-radius: 50%; + background-color: rgba(0, 0, 0, 0.1); color: $text-color; - z-index: 10; // Ensure it's above all other content (may be better solution) - - &::before { - content: 'Initializing...'; - font-weight: bold; + font-size: 12px; + font-weight: bold; + margin-left: 5px; + cursor: pointer; + transition: $transition; + vertical-align: middle; + + &:hover { + background-color: rgba(0, 0, 0, 0.2); } } - -.modal { - position: fixed; +.uploading-overlay { + position: absolute; top: 0; left: 0; - width: 100%; - height: 100%; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); display: flex; justify-content: center; align-items: center; - background-color: rgba(0, 0, 0, 0.4); - - .modal-content { - background-color: $input-background; - color: $text-color; - padding: 20px; - border-radius: $border-radius; - box-shadow: 0 2px 10px $shadow-color; - display: flex; - flex-direction: column; - align-items: center; - width: auto; - min-width: 300px; - - h4 { - margin-bottom: 15px; - } + z-index: 1000; +} - p { - margin-bottom: 20px; - } +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} - button { - padding: 10px 20px; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - margin: 5px; - transition: background-color 0.3s; +@media (max-width: 768px) { + .chat-box { + border-radius: 0; + } - &:hover { - background-color: $button-hover-color; - } - } + .message { + max-width: 90%; } } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 880c332ac..383be0bb7 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -1,106 +1,157 @@ -import { MathJaxContext } from 'better-react-mathjax'; -import { action, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; +import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; -import { ImageFile, Message } from 'openai/resources/beta/threads/messages'; -import { RunStep } from 'openai/resources/beta/threads/runs/steps'; import * as React from 'react'; -import { Doc } from '../../../../fields/Doc'; -import { Id } from '../../../../fields/FieldSymbols'; -import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; -import { CsvField } from '../../../../fields/URLField'; -import { Networking } from '../../../Network'; -import { DocUtils } from '../../../documents/DocUtils'; +import { Doc, DocListCast } from '../../../../fields/Doc'; +import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../fields/Types'; import { DocumentType } from '../../../documents/DocumentTypes'; import { Docs } from '../../../documents/Documents'; -import { DocumentManager } from '../../../util/DocumentManager'; import { LinkManager } from '../../../util/LinkManager'; import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; -import MessageComponent from './MessageComponent'; -import { ANNOTATION_LINK_TYPE, ASSISTANT_ROLE, AssistantMessage, DOWNLOAD_TYPE } from './types'; +import MessageComponentBox from './MessageComponent'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo, MessageContent } from './types'; +import { Vectorstore } from './vectorstore/Vectorstore'; +import { Agent } from './Agent'; +import dotenv from 'dotenv'; +import { DocData, DocViews } from '../../../../fields/DocSymbols'; +import { AnswerParser } from './AnswerParser'; +import { DocumentManager } from '../../../util/DocumentManager'; +import { v4 as uuidv4 } from 'uuid'; +import { chunk } from 'lodash'; +import { DocUtils } from '../../../documents/DocUtils'; +import { createRef } from 'react'; +import { ClientUtils } from '../../../../ClientUtils'; +import { ProgressBar } from './ProgressBar'; + +dotenv.config(); @observer export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { - @observable modalStatus = false; - @observable currentFile = { url: '' }; @observable history: AssistantMessage[] = []; @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isInitializing: boolean = true; - @observable expandedLogIndex: number | null = null; - @observable linked_docs_to_add: Doc[] = []; - + @observable uploadProgress: number = 0; // Track progress percentage + @observable currentStep: string = ''; // Track current step name + @observable expandedScratchpadIndex: number | null = null; + @observable inputValue: string = ''; + @observable private linked_docs_to_add: ObservableSet<Doc> = observable.set(); + @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; + @observable private isUploadingDocs: boolean = false; private openai: OpenAI; - private interim_history: string = ''; - private assistantID: string = ''; - private threadID: string = ''; - private _oldWheel: any; - private vectorStoreID: string = ''; - private mathJaxConfig: any; - private linkedCsvIDs: string[] = []; + private vectorstore_id: string; + private vectorstore: Vectorstore; + private agent: Agent; // Add the ChatBot instance + private _oldWheel: HTMLDivElement | null = null; + private messagesRef: React.RefObject<HTMLDivElement>; public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); } + constructor(props: FieldViewProps) { super(props); makeObservable(this); this.openai = this.initializeOpenAI(); - this.history = []; - this.threadID = StrCast(this.dataDoc.thread_id); - this.assistantID = StrCast(this.dataDoc.assistant_id); - this.vectorStoreID = StrCast(this.dataDoc.vector_store_id); - this.openai = this.initializeOpenAI(); - if (this.assistantID === '' || this.threadID === '' || this.vectorStoreID === '') { - this.createAssistant(); + if (StrCast(this.dataDoc.vectorstore_id) == '') { + console.log('new_id'); + this.vectorstore_id = uuidv4(); + this.dataDoc.vectorstore_id = this.vectorstore_id; } else { - this.retrieveCsvUrls(); - this.isInitializing = false; + this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } - this.mathJaxConfig = { - loader: { load: ['input/asciimath'] }, - tex: { - inlineMath: [ - ['$', '$'], - ['\\(', '\\)'], - ], - displayMath: [ - ['$$', '$$'], - ['[', ']'], - ], - }, - }; + this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash); + this.messagesRef = React.createRef<HTMLDivElement>(); + reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, image: msg.image, tool_logs: msg.tool_logs, links: msg.links })), + () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, content: msg.content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } ); } - toggleToolLogs = (index: number) => { - this.expandedLogIndex = this.expandedLogIndex === index ? null : index; + @action + addDocToVectorstore = async (newLinkedDoc: Doc) => { + this.uploadProgress = 0; + this.currentStep = 'Initializing...'; + this.isUploadingDocs = true; + + try { + await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); + } catch (error) { + console.error('Error uploading document:', error); + this.currentStep = 'Error during upload'; + } finally { + this.isUploadingDocs = false; + this.uploadProgress = 0; + this.currentStep = ''; + } }; - retrieveCsvUrls() { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); + @action + updateProgress = (progress: number, step: string) => { + console.log('Progress:', progress, step); + this.uploadProgress = progress; + this.currentStep = step; + }; - linkedDocs.forEach(doc => { - const aiFieldId = StrCast(doc[this.Document[Id] + '_ai_field_id']); - if (CsvCast(doc.data)) { - this.linkedCsvIDs.push(StrCast(aiFieldId)); - console.log(this.linkedCsvIDs); - } - }); - } + @action + addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { + console.log('adding csv file for analysis'); + if (!newLinkedDoc.chunk_simpl) { + const csvData: string = StrCast(newLinkedDoc.text); + console.log('CSV Data:', csvData); + const completion = await this.openai.chat.completions.create({ + messages: [ + { + role: 'system', + content: + 'You are an AI assistant tasked with summarizing the content of a CSV file. You will be provided with the data from the CSV file and your goal is to generate a concise summary that captures the main themes, trends, and key points represented in the data.', + }, + { + role: 'user', + content: `Please provide a comprehensive summary of the CSV file based on the provided data. Ensure the summary highlights the most important information, patterns, and insights. Your response should be in paragraph form and be concise. + + CSV Data: + + ${csvData} + + ********** + Summary:`, + }, + ], + model: 'gpt-3.5-turbo', + }); + console.log('CSV Data:', csvData); + const csvId = id ?? uuidv4(); + + this.linked_csv_files.push({ + filename: CsvCast(newLinkedDoc.data).url.pathname, + id: csvId, + text: csvData, + }); + + console.log(this.linked_csv_files); + const chunkToAdd = { + chunkId: csvId, + chunkType: CHUNK_TYPE.CSV, + }; + newLinkedDoc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + newLinkedDoc.summary = completion.choices[0].message.content!; + } + }; + + @action + toggleToolLogs = (index: number) => { + this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; + }; initializeOpenAI() { + console.log(process.env.OPENAI_KEY); const configuration: ClientOptions = { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, @@ -108,396 +159,203 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { return new OpenAI(configuration); } - onPassiveWheel = (e: WheelEvent) => { - if (this._props.isContentActive()) { - e.stopPropagation(); + addScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false }); } }; - createLink = (linkInfo: string, startIndex: number, endIndex: number, linkType: ANNOTATION_LINK_TYPE, annotationIndex: number = 0) => { - const text = this.interim_history; - const subString = this.current_message?.text.substring(startIndex, endIndex) ?? ''; - if (!text) return; - const textToDisplay = `${annotationIndex}`; - let fileInfo = linkInfo; - const fileName = subString.split('/')[subString.split('/').length - 1]; - if (linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE) { - fileInfo = linkInfo + '!!!' + fileName; + removeScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel); } - - const formattedLink = `[${textToDisplay}](${fileInfo}~~~${linkType})`; - console.log(formattedLink); - const newText = text.replace(subString, formattedLink); - runInAction(() => { - this.interim_history = newText; - console.log(newText); - this.current_message?.links?.push({ - start: startIndex, - end: endIndex, - url: linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE ? fileName : linkInfo, - id: linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE ? linkInfo : undefined, - link_type: linkType, - }); - }); }; - @action - createAssistant = async () => { - this.isInitializing = true; - try { - const vectorStore = await this.openai.beta.vectorStores.create({ - name: 'Vector Store for Assistant', - }); - const assistant = await this.openai.beta.assistants.create({ - name: 'Document Analyser Assistant', - instructions: ` - You will analyse documents with which you are provided. You will answer questions and provide insights based on the information in the documents. - For writing math formulas: - You have a MathJax render environment. - - Write all in-line equations within a single dollar sign, $, to render them as TeX (this means any time you want to use a dollar sign to represent a dollar sign itself, you must escape it with a backslash: "$"); - - Use a double dollar sign, $$, to render equations on a new line; - Example: $$x^2 + 3x$$ is output for "x² + 3x" to appear as TeX.`, - model: 'gpt-4-turbo', - tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], - tool_resources: { - file_search: { - vector_store_ids: [vectorStore.id], - }, - code_interpreter: { - file_ids: this.linkedCsvIDs, - }, - }, - }); - const thread = await this.openai.beta.threads.create(); + scrollToBottom = () => { + if (this.messagesRef.current) { + this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; + } + }; - runInAction(() => { - this.dataDoc.assistant_id = assistant.id; - this.dataDoc.thread_id = thread.id; - this.dataDoc.vector_store_id = vectorStore.id; - this.assistantID = assistant.id; - this.threadID = thread.id; - this.vectorStoreID = vectorStore.id; - this.isInitializing = false; - }); - } catch (error) { - console.error('Initialization failed:', error); - this.isInitializing = false; + onPassiveWheel = (e: WheelEvent) => { + if (this._props.isContentActive()) { + e.stopPropagation(); } }; @action - runAssistant = async (inputText: string) => { - // Ensure an assistant and thread are created - if (!this.assistantID || !this.threadID || !this.vectorStoreID) { - await this.createAssistant(); - console.log('Assistant and thread created:', this.assistantID, this.threadID); - } - let currentText: string = ''; - let currentToolCallMessage: string = ''; + askGPT = async (event: React.FormEvent<HTMLFormElement>): Promise<void> => { + event.preventDefault(); + this.inputValue = ''; - // Send the user's input to the assistant - await this.openai.beta.threads.messages.create(this.threadID, { - role: 'user', - content: inputText, - }); + const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; + const trimmedText = textInput.value.trim(); - // Listen to the streaming responses - const stream = this.openai.beta.threads.runs - .stream(this.threadID, { - assistant_id: this.assistantID, - }) - .on('runStepCreated', (runStep: RunStep) => { - currentText = ''; - runInAction(() => { - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, text: currentText, tool_logs: '', links: [] }; - }); + if (trimmedText) { + try { + textInput.value = ''; + this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], processing_info: [] }); this.isLoading = true; - }) - .on('toolCallDelta', (toolCallDelta, snapshot) => { - this.isLoading = false; - if (toolCallDelta.type === 'code_interpreter') { - if (toolCallDelta.code_interpreter?.input) { - currentToolCallMessage += toolCallDelta.code_interpreter.input; - runInAction(() => { - if (this.current_message) { - this.current_message.tool_logs = currentToolCallMessage; - } - }); - } - if (toolCallDelta.code_interpreter?.outputs) { - currentToolCallMessage += '\n Code interpreter output:'; - toolCallDelta.code_interpreter.outputs.forEach(output => { - if (output.type === 'logs') { - runInAction(() => { - if (this.current_message) { - this.current_message.tool_logs += '\n|' + output.logs; - } - }); - } - }); - } - } - }) - .on('textDelta', (textDelta, snapshot) => { - this.isLoading = false; - currentText += textDelta.value; - runInAction(() => { - if (this.current_message) { - // this.current_message = {...this.current_message, text: current_text}; - this.current_message.text = currentText; - } - }); - }) - .on('messageDone', async event => { - console.log(event); - const textItem = event.content.find(item => item.type === 'text'); - if (textItem && textItem.type === 'text') { - const { text } = textItem; - console.log(text.value); - try { - runInAction(() => { - this.interim_history = text.value; - }); - } catch (e) { - console.error('Error parsing JSON response:', e); - } + this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], citations: [], processing_info: [] }; - const { annotations } = text; - console.log('Annotations: ' + annotations); - let index = 0; - annotations.forEach(async annotation => { - console.log(' ' + annotation); - console.log(' ' + annotation.text); - if (annotation.type === 'file_path') { - const { file_path: filePath } = annotation; - const fileToDownload = filePath.file_id; - console.log(fileToDownload); - if (filePath) { - console.log(filePath); - console.log(fileToDownload); - this.createLink(fileToDownload, annotation.start_index, annotation.end_index, ANNOTATION_LINK_TYPE.DOWNLOAD_FILE); - } - } else { - const { file_citation: fileCitation } = annotation; - if (fileCitation) { - const citedFile = await this.openai.files.retrieve(fileCitation.file_id); - const citationUrl = citedFile.filename; - this.createLink(citationUrl, annotation.start_index, annotation.end_index, ANNOTATION_LINK_TYPE.DASH_DOC, index); - index++; - } + const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => { + runInAction(() => { + if (this.current_message) { + this.current_message = { ...this.current_message, processing_info: processingUpdate }; } }); + this.scrollToBottom(); + }; + + const onAnswerUpdate = (answerUpdate: string) => { runInAction(() => { if (this.current_message) { - console.log('current message: ' + this.current_message.text); - this.current_message.text = this.interim_history; - this.history.push({ ...this.current_message }); - this.current_message = undefined; + this.current_message = { ...this.current_message, content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }] }; } }); - } - }) - .on('toolCallDone', toolCall => { + }; + + const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate); + runInAction(() => { - if (this.current_message && currentToolCallMessage) { - this.current_message.tool_logs = currentToolCallMessage; + if (this.current_message) { + this.history.push({ ...finalMessage }); + this.current_message = undefined; + this.dataDoc.data = JSON.stringify(this.history); } }); - }) - .on('imageFileDone', (content: ImageFile, snapshot: Message) => { - console.log('Image file done:', content); - }) - .on('end', () => { - console.log('Streaming done'); - }); + } catch (err) { + console.error('Error:', err); + this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], processing_info: [] }); + } finally { + this.isLoading = false; + this.scrollToBottom(); + } + } + this.scrollToBottom(); }; @action - goToLinkedDoc = async (link: string) => { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - - const linkedDoc = linkedDocs.find(doc => { - const docUrl = CsvCast(doc.data, PDFCast(doc.data)).url.pathname.replace('/files/pdfs/', '').replace('/files/csvs/', ''); - console.log('URL: ' + docUrl + ' Citation URL: ' + link); - return link === docUrl; - }); - - if (linkedDoc) { - await DocumentManager.Instance.showDocument(DocCast(linkedDoc), { willZoomCentered: true }, () => {}); + updateMessageCitations = (index: number, citations: Citation[]) => { + if (this.history[index]) { + this.history[index].citations = citations; } }; @action - askGPT = async (event: React.FormEvent<HTMLFormElement>): Promise<void> => { - event.preventDefault(); + addLinkedUrlDoc = async (url: string, id: string) => { + const doc = Docs.Create.WebDocument(url); - const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; - const trimmedText = textInput.value.trim(); + const linkDoc = Docs.Create.LinkDocument(this.Document, doc); + LinkManager.Instance.addLink(linkDoc); - if (!this.assistantID || !this.threadID) { - try { - await this.createAssistant(); - } catch (err) { - console.error('Error:', err); - } - } + const chunkToAdd = { + chunkId: id, + chunkType: CHUNK_TYPE.URL, + }; - if (trimmedText) { - try { - textInput.value = ''; - runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); - }); - await this.runAssistant(trimmedText); - this.dataDoc.data = this.history.toString(); - } catch (err) { - console.error('Error:', err); - } - } + doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); }; - @action - uploadLinks = async (linkedDocs: Doc[]) => { - if (this.isInitializing) { - console.log('Initialization in progress, upload aborted.'); - return; - } - const urls = linkedDocs.map(doc => CsvCast(doc.data, PDFCast(doc.data)).url.pathname); - const csvUrls = urls.filter(url => url.endsWith('.csv')); - console.log(this.assistantID, this.threadID, urls); + @computed + get userName() { + return ClientUtils.CurrentUserEmail; + } - const { openai_file_ids: openaiFileIds } = await Networking.PostToServer('/uploadPDFToVectorStore', { urls, threadID: this.threadID, assistantID: this.assistantID, vector_store_id: this.vectorStoreID }); + @action + createCSVInDash = async (url: string, title: string, id: string, data: string) => { + console.log('Creating CSV in Dash:', url, title); + const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) })); - linkedDocs.forEach((doc, i) => { - doc[this.Document[Id] + '_ai_field_id'] = openaiFileIds[i]; - console.log('AI Field ID: ' + openaiFileIds[i]); - }); + const linkDoc = Docs.Create.LinkDocument(this.Document, doc); + LinkManager.Instance.addLink(linkDoc); - if (csvUrls.length > 0) { - for (let i = 0; i < csvUrls.length; i++) { - this.linkedCsvIDs.push(openaiFileIds[urls.indexOf(csvUrls[i])]); - } - console.log('linked csvs:' + this.linkedCsvIDs); - await this.openai.beta.assistants.update(this.assistantID, { - tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], - tool_resources: { - file_search: { - vector_store_ids: [this.vectorStoreID], - }, - code_interpreter: { - file_ids: this.linkedCsvIDs, - }, - }, - }); - } - }; + doc && this._props.addDocument?.(doc); + await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - downloadToComputer = (url: string, fileName: string) => { - fetch(url, { method: 'get', mode: 'no-cors', referrerPolicy: 'no-referrer' }) - .then(res => res.blob()) - .then(res => { - const aElement = document.createElement('a'); - aElement.setAttribute('download', fileName); - const href = URL.createObjectURL(res); - aElement.href = href; - aElement.setAttribute('target', '_blank'); - aElement.click(); - URL.revokeObjectURL(href); - }); + this.addCSVForAnalysis(doc, id); }; - createDocumentInDash = async (url: string) => { - const fileSuffix = url.substring(url.lastIndexOf('.') + 1); - console.log(fileSuffix); - let doc: Doc | null = null; - switch (fileSuffix) { - case 'pdf': - doc = DocCast(await DocUtils.DocumentFromType('pdf', url, {})); - break; - case 'csv': - doc = DocCast(await DocUtils.DocumentFromType('csv', url, {})); - break; - case 'png': - case 'jpg': - case 'jpeg': - doc = DocCast(await DocUtils.DocumentFromType('image', url, {})); - break; - default: - console.error('Unsupported file type:', fileSuffix); - break; - } - if (doc) { - doc && this._props.addDocument?.(doc); - await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - }; + @action + handleCitationClick = (citation: Citation) => { + console.log('Citation clicked:', citation); + const currentLinkedDocs: Doc[] = this.linkedDocs; + + const chunkId = citation.chunk_id; + + for (let doc of currentLinkedDocs) { + if (doc.chunk_simpl) { + const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + console.log(docChunkSimpl); + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + console.log(foundChunk); + if (foundChunk) { + console.log(getChunkType(foundChunk.chunkType)); + switch (foundChunk.chunkType) { + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + const values = foundChunk.location?.replace(/[\[\]]/g, '').split(','); + + if (values?.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; + } - downloadFile = async (fileInfo: string, downloadType: DOWNLOAD_TYPE) => { - try { - console.log(fileInfo); - const [fileId, fileName] = fileInfo.split(/!!!/); - const { file_path: filePath } = await Networking.PostToServer('/downloadFileFromOpenAI', { file_id: fileId, file_name: fileName }); - const fileLink = CsvCast(new CsvField(filePath)).url.href; - if (downloadType === DOWNLOAD_TYPE.DASH) { - this.createDocumentInDash(fileLink); - } else { - this.downloadToComputer(fileLink, fileName); + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; + + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); + + DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + break; + case CHUNK_TYPE.TEXT: + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0]; + firstView.ComponentView?.search?.(citation.direct_text); + }); + break; + case CHUNK_TYPE.URL: + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0]; + }); + break; + case CHUNK_TYPE.CSV: + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0]; + }); + break; + default: + console.log('Chunk type not supported', foundChunk.chunkType); + break; + } + } } - } catch (error) { - console.error('Error downloading file:', error); } }; - handleDownloadToDevice = () => { - this.downloadFile(this.currentFile.url, DOWNLOAD_TYPE.DEVICE); - this.modalStatus = false; // Close the modal after the action - this.currentFile = { url: '' }; // Reset the current file - }; - - handleAddToDash = () => { - // Assuming `downloadFile` is a method that handles adding to Dash - this.downloadFile(this.currentFile.url, DOWNLOAD_TYPE.DASH); - this.modalStatus = false; // Close the modal after the action - this.currentFile = { url: '' }; // Reset the current file - }; - - renderModal = () => { - if (!this.modalStatus) return null; - - return ( - <div className="modal"> - <div className="modal-content"> - <h4>File Actions</h4> - <p>Choose an action for the file:</p> - <button type="button" onClick={this.handleDownloadToDevice}> - Download to Device - </button> - <button type="button" onClick={this.handleAddToDash}> - Add to Dash - </button> - <button - type="button" - onClick={() => { - this.modalStatus = false; - }}> - Cancel - </button> - </div> - </div> - ); - }; - @action - showModal = () => { - this.modalStatus = true; + createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + highlight_doc[DocData].citation_id = citation.citation_id; + Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); + highlight_doc.annotationOn = pdfDoc; + Doc.SetContainer(highlight_doc, pdfDoc); + return highlight_doc; }; - @action - setCurrentFile = (file: { url: string }) => { - this.currentFile = file; - }; + componentDidUpdate() { + this.scrollToBottom(); + } componentDidMount() { this._props.setContentViewBox?.(this); @@ -505,17 +363,26 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { try { const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); runInAction(() => { - this.history = storedHistory.map((msg: AssistantMessage) => ({ - role: msg.role, - text: msg.text, - quote: msg.quote, - tool_logs: msg.tool_logs, - image: msg.image, - })); + this.history.push( + ...storedHistory.map((msg: AssistantMessage) => ({ + role: msg.role, + content: msg.content, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, + })) + ); }); } catch (e) { console.error('Failed to parse history from dataDoc:', e); } + } else { + runInAction(() => { + this.history.push({ + role: ASSISTANT_ROLE.ASSISTANT, + content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: `Hey, ${this.userName()} Welcome to the Your Friendly Assistant! Link a document or ask questions about anything to get started.`, citation_ids: null }], + processing_info: [], + }); + }); } reaction( () => { @@ -526,79 +393,150 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { return linkedDocs; }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) + linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) ); - observe( - // right now this skips during initialization which is necessary because it would be blank - // However, it will upload the same link twice when it is - this.linked_docs_to_add, - change => { - // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) - switch (change.type as any) { - case 'splice': - if ((change as any).addedCount > 0) { - // maybe check here if its already in the urls datadoc array so doesn't add twice - console.log((change as any).added as Doc[]); - this.uploadLinks((change as any).added as Doc[]); - } - // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); - break; - case 'update': // let oldValue = change.oldValue; - default: + observe(this.linked_docs_to_add, change => { + if (change.type === 'add') { + if (PDFCast(change.newValue.data)) { + this.addDocToVectorstore(change.newValue); + } else if (CsvCast(change.newValue.data)) { + this.addCSVForAnalysis(change.newValue); } - }, - true + } else if (change.type === 'delete') { + console.log('Deleted docs: ', change.oldValue); + } + }); + this.addScrollListener(); + } + + componentWillUnmount() { + this.removeScrollListener(); + } + + @computed + get linkedDocs() { + return LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + } + + @computed + get docIds() { + return LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .filter(d => d.ai_doc_id) + .map(d => StrCast(d.ai_doc_id)); + } + + @computed + get summaries(): string { + return ( + LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .filter(d => d.summary) + .map((doc, index) => { + if (PDFCast(doc.data)) { + return `<summary file_name="${PDFCast(doc.data).url.pathname}" applicable_tools=["rag"]>${doc.summary}</summary>`; + } else if (CsvCast(doc.data)) { + return `<summary file_name="${CsvCast(doc.data).url.pathname}" applicable_tools=["dataAnalysis"]>${doc.summary}</summary>`; + } else { + return `${index + 1}) ${doc.summary}`; + } + }) + .join('\n') + '\n' ); } + @computed + get linkedCSVs(): { filename: string; id: string; text: string }[] { + return this.linked_csv_files; + } + + @computed + get formattedHistory(): string { + let history = '<chat_history>\n'; + for (const message of this.history) { + history += `<${message.role}>${message.content.map(content => content.text).join(' ')}`; + if (message.loop_summary) { + history += `<loop_summary>${message.loop_summary}</loop_summary>`; + } + history += `</${message.role}>\n`; + } + history += '</chat_history>'; + return history; + } + + retrieveSummaries = () => { + return this.summaries; + }; + + retrieveCSVData = () => { + return this.linkedCSVs; + }; + + retrieveFormattedHistory = () => { + return this.formattedHistory; + }; + + retrieveDocIds = () => { + return this.docIds; + }; + + @action + handleFollowUpClick = (question: string) => { + console.log('Follow-up question clicked:', question); + this.inputValue = question; + }; + render() { return ( - <MathJaxContext config={this.mathJaxConfig}> - <div className="chatBox"> - {this.isInitializing && <div className="initializing-overlay">Initializing...</div>} - {this.renderModal()} - <div - className="scroll-box chat-content" - ref={r => { - this._oldWheel?.removeEventListener('wheel', this.onPassiveWheel); - this._oldWheel = r; - r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); - }}> - <div className="messages"> - {this.history.map((message, index) => ( - <MessageComponent - key={index} - message={message} - toggleToolLogs={this.toggleToolLogs} - expandedLogIndex={this.expandedLogIndex} - index={index} - showModal={this.showModal} - goToLinkedDoc={this.goToLinkedDoc} - setCurrentFile={this.setCurrentFile} - /> - ))} - {!this.current_message ? null : ( - <MessageComponent - key={this.history.length} - message={this.current_message} - toggleToolLogs={this.toggleToolLogs} - expandedLogIndex={this.expandedLogIndex} - index={this.history.length} - showModal={this.showModal} - goToLinkedDoc={this.goToLinkedDoc} - setCurrentFile={this.setCurrentFile} - isCurrent - /> - )} + <div className="chat-box"> + {this.isUploadingDocs && ( + <div className="uploading-overlay"> + <div className="progress-container"> + <ProgressBar /> + <div className="step-name">{this.currentStep}</div> </div> </div> - <form onSubmit={this.askGPT} className="chat-form"> - <input type="text" name="messageInput" autoComplete="off" placeholder="Type a message..." /> - <button type="submit">Send</button> - </form> + )} + <div className="chat-header"> + <h2>{this.userName()}'s AI Assistant</h2> </div> - </MathJaxContext> + <div className="chat-messages" ref={this.messagesRef}> + {this.history.map((message, index) => ( + <MessageComponentBox key={index} message={message} index={index} onFollowUpClick={this.handleFollowUpClick} onCitationClick={this.handleCitationClick} updateMessageCitations={this.updateMessageCitations} /> + ))} + {this.current_message && ( + <MessageComponentBox + key={this.history.length} + message={this.current_message} + index={this.history.length} + onFollowUpClick={this.handleFollowUpClick} + onCitationClick={this.handleCitationClick} + updateMessageCitations={this.updateMessageCitations} + /> + )} + </div> + <form onSubmit={this.askGPT} className="chat-input"> + <input type="text" name="messageInput" autoComplete="off" placeholder="Type your message here..." value={this.inputValue} onChange={e => (this.inputValue = e.target.value)} /> + <button className="submit-button" type="submit" disabled={this.isLoading}> + {this.isLoading ? ( + <div className="spinner"></div> + ) : ( + <svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" strokeWidth="2" fill="none" strokeLinecap="round" strokeLinejoin="round"> + <line x1="22" y1="2" x2="11" y2="13"></line> + <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon> + </svg> + )} + </button> + </form> + </div> ); } } diff --git a/src/client/views/nodes/ChatBox/ChunkManager.ts b/src/client/views/nodes/ChatBox/ChunkManager.ts new file mode 100644 index 000000000..64c073640 --- /dev/null +++ b/src/client/views/nodes/ChatBox/ChunkManager.ts @@ -0,0 +1,24 @@ +import { SimplifiedChunk } from './types'; + +class ChunkManager { + private chunks: SimplifiedChunk[]; + + constructor() { + this.chunks = []; + } + + addChunk(chunk: SimplifiedChunk) { + this.chunks.push(chunk); + } + + removeChunk(chunk: SimplifiedChunk) { + const index = this.chunks.indexOf(chunk); + if (index !== -1) { + this.chunks.splice(index, 1); + } + } + + getChunks() { + return this.chunks; + } +} diff --git a/src/client/views/nodes/ChatBox/MessageComponent.scss b/src/client/views/nodes/ChatBox/MessageComponent.scss deleted file mode 100644 index 6fcc0e5e7..000000000 --- a/src/client/views/nodes/ChatBox/MessageComponent.scss +++ /dev/null @@ -1,10 +0,0 @@ -MessageComponent-citation { - color: lightblue; - vertical-align: super; - font-size: smaller; -} -MessageComponent-file_path { - color: lightblue; - vertical-align: baseline; - font-size: inherit; -} diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index f27a18891..812e52ee0 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,82 +1,105 @@ -/* eslint-disable jsx-a11y/control-has-associated-label */ -/* eslint-disable react/require-default-props */ -import { MathJax, MathJaxContext } from 'better-react-mathjax'; +import React, { useState } from 'react'; import { observer } from 'mobx-react'; -import React from 'react'; -import * as Tb from 'react-icons/tb'; +import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from './types'; import ReactMarkdown from 'react-markdown'; -import './MessageComponent.scss'; -import { AssistantMessage } from './types'; -const TbCircles = [ - Tb.TbCircleNumber0Filled, - Tb.TbCircleNumber1Filled, - Tb.TbCircleNumber2Filled, - Tb.TbCircleNumber3Filled, - Tb.TbCircleNumber4Filled, - Tb.TbCircleNumber5Filled, - Tb.TbCircleNumber6Filled, - Tb.TbCircleNumber7Filled, - Tb.TbCircleNumber8Filled, - Tb.TbCircleNumber9Filled, -]; interface MessageComponentProps { message: AssistantMessage; - toggleToolLogs: (index: number) => void; - expandedLogIndex: number | null; index: number; - showModal: () => void; - goToLinkedDoc: (url: string) => void; - setCurrentFile: (file: { url: string }) => void; - isCurrent?: boolean; + onFollowUpClick: (question: string) => void; + onCitationClick: (citation: Citation) => void; + updateMessageCitations: (index: number, citations: Citation[]) => void; } -const LinkRendererWrapper = (goToLinkedDoc: (url: string) => void, showModal: () => void, setCurrentFile: (file: { url: string }) => void) => - function LinkRenderer({ href, children }: { href?: string; children?: React.ReactNode }) { - const Children = TbCircles[Number(children)]; // pascal case variable needed to convert IconType to JSX.Element tag - const [, aurl, linkType] = href?.match(/([a-zA-Z0-9_.!-]+)~~~(citation|file_path)/) ?? [undefined, href, null]; - const renderType = (content: JSX.Element | null, click: (url: string) => void):JSX.Element => ( - // eslint-disable-next-line jsx-a11y/anchor-is-valid - <a className={`MessageComponent-${linkType}`} - href="#" - onClick={e => { - e.preventDefault(); - aurl && click(aurl); - }}> - {content} - </a> - ); // prettier-ignore - switch (linkType) { - case 'citation': return renderType(<Children />, (url: string) => goToLinkedDoc(url)); - case 'file_path': return renderType(null, (url: string) => { showModal(); setCurrentFile({ url }); }); - default: return null; - } // prettier-ignore +const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { + const [dropdownOpen, setDropdownOpen] = useState(false); + + const renderContent = (item: MessageContent) => { + const i = item.index; + //console.log('item', item, 'index', i); + if (item.type === TEXT_TYPE.GROUNDED) { + const citation_ids = item.citation_ids || []; + return ( + <span key={i} className="grounded-text"> + <ReactMarkdown>{item.text}</ReactMarkdown> + {citation_ids.map((id, idx) => { + const citation = message.citations?.find(c => c.citation_id === id); + if (!citation) return null; + return ( + <button key={i + idx} className="citation-button" onClick={() => onCitationClick(citation)}> + {i + 1} + </button> + ); + })} + </span> + ); + } else if (item.type === TEXT_TYPE.NORMAL) { + return ( + <span key={i} className="normal-text"> + <ReactMarkdown>{item.text}</ReactMarkdown> + </span> + ); + } else if ('query' in item) { + return ( + <span key={i} className="query-text"> + <ReactMarkdown>{JSON.stringify(item.query)}</ReactMarkdown> + </span> + ); + } else { + return ( + <span key={i}> + <ReactMarkdown>{JSON.stringify(item)}</ReactMarkdown> + </span> + ); + } + }; + + const hasProcessingInfo = message.processing_info && message.processing_info.length > 0; + + const renderProcessingInfo = (info: ProcessingInfo) => { + if (info.type === PROCESSING_TYPE.THOUGHT) { + return ( + <div key={info.index} className="dropdown-item"> + <strong>Thought:</strong> {info.content} + </div> + ); + } else if (info.type === PROCESSING_TYPE.ACTION) { + return ( + <div key={info.index} className="dropdown-item"> + <strong>Action:</strong> {info.content} + </div> + ); + } else { + return null; + } }; -const MessageComponent: React.FC<MessageComponentProps> = function ({ message, toggleToolLogs, expandedLogIndex, goToLinkedDoc, index, showModal, setCurrentFile, isCurrent = false }) { - // const messageClass = `${message.role} ${isCurrent ? 'current-message' : ''}`; return ( <div className={`message ${message.role}`}> - <MathJaxContext> - <MathJax dynamic hideUntilTypeset="every"> - <ReactMarkdown components={{ a: LinkRendererWrapper(goToLinkedDoc, showModal, setCurrentFile) }}>{message.text}</ReactMarkdown> - </MathJax> - </MathJaxContext> - {message.image && <img src={message.image} alt="" />} - <div className="message-footer"> - {message.tool_logs && ( - <button type="button" className="toggle-logs-button" onClick={() => toggleToolLogs(index)}> - {expandedLogIndex === index ? 'Hide Code Interpreter Logs' : 'Show Code Interpreter Logs'} + {hasProcessingInfo && ( + <div className="processing-info"> + <button className="toggle-info" onClick={() => setDropdownOpen(!dropdownOpen)}> + {dropdownOpen ? 'Hide Agent Thoughts/Actions' : 'Show Agent Thoughts/Actions'} </button> - )} - {expandedLogIndex === index && ( - <div className="tool-logs"> - <pre>{message.tool_logs}</pre> + {dropdownOpen && <div className="info-content">{message.processing_info.map(renderProcessingInfo)}</div>} + <br /> + </div> + )} + <div className="message-content">{message.content && message.content.map(messageFragment => <React.Fragment key={messageFragment.index}>{renderContent(messageFragment)}</React.Fragment>)}</div> + {message.follow_up_questions && message.follow_up_questions.length > 0 && ( + <div className="follow-up-questions"> + <h4>Follow-up Questions:</h4> + <div className="questions-list"> + {message.follow_up_questions.map((question, idx) => ( + <button key={idx} className="follow-up-button" onClick={() => onFollowUpClick(question)}> + {question} + </button> + ))} </div> - )} - </div> + </div> + )} </div> ); }; -export default observer(MessageComponent); +export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/ChatBox/ProgressBar.scss b/src/client/views/nodes/ChatBox/ProgressBar.scss new file mode 100644 index 000000000..ff5be4a38 --- /dev/null +++ b/src/client/views/nodes/ChatBox/ProgressBar.scss @@ -0,0 +1,69 @@ +.spinner-container { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100%; +} + +.spinner { + width: 60px; + height: 60px; + position: relative; + margin-bottom: 20px; // Space between spinner and text +} + +.double-bounce1, +.double-bounce2 { + width: 100%; + height: 100%; + border-radius: 50%; + background-color: #4a90e2; + opacity: 0.6; + position: absolute; + top: 0; + left: 0; + animation: bounce 2s infinite ease-in-out; +} + +.double-bounce2 { + animation-delay: -1s; +} + +@keyframes bounce { + 0%, + 100% { + transform: scale(0); + } + 50% { + transform: scale(1); + } +} + +.uploading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); + display: flex; + align-items: center; + justify-content: center; + z-index: 1000; +} + +.progress-container { + display: flex; + flex-direction: column; + align-items: center; + text-align: center; +} + +.step-name { + font-size: 18px; + color: #333; + text-align: center; + width: 100%; + margin-top: -10px; // Adjust to move the text closer to the spinner +} diff --git a/src/client/views/nodes/ChatBox/ProgressBar.tsx b/src/client/views/nodes/ChatBox/ProgressBar.tsx new file mode 100644 index 000000000..0aa07213f --- /dev/null +++ b/src/client/views/nodes/ChatBox/ProgressBar.tsx @@ -0,0 +1,13 @@ +import React from 'react'; +import './ProgressBar.scss'; + +export const ProgressBar: React.FC = () => { + return ( + <div className="spinner-container"> + <div className="spinner"> + <div className="double-bounce1"></div> + <div className="double-bounce2"></div> + </div> + </div> + ); +}; diff --git a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts new file mode 100644 index 000000000..3585cab4a --- /dev/null +++ b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts @@ -0,0 +1,73 @@ +import { threadId } from 'worker_threads'; + +enum ParserState { + Outside, + InGroundedText, + InNormalText, +} + +export class StreamedAnswerParser { + private state: ParserState = ParserState.Outside; + private buffer: string = ''; + private result: string = ''; + private isStartOfLine: boolean = true; + + public parse(char: string): string { + switch (this.state) { + case ParserState.Outside: + if (char === '<') { + this.buffer = '<'; + } else if (char === '>') { + if (this.buffer.startsWith('<grounded_text')) { + this.state = ParserState.InGroundedText; + } else if (this.buffer.startsWith('<normal_text')) { + this.state = ParserState.InNormalText; + } + this.buffer = ''; + } else { + this.buffer += char; + } + break; + + case ParserState.InGroundedText: + case ParserState.InNormalText: + if (char === '<') { + this.buffer = '<'; + } else if (this.buffer.startsWith('</grounded_text') && char === '>') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('</normal_text') && char === '>') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('<')) { + this.buffer += char; + } else { + this.processChar(char); + } + break; + } + + return this.result.trim(); + } + + private processChar(char: string): void { + if (this.isStartOfLine && char === ' ') { + // Skip leading spaces + return; + } + if (char === '\n') { + this.result += char; + this.isStartOfLine = true; + } else { + this.result += char; + this.isStartOfLine = false; + } + } + + public reset(): void { + this.state = ParserState.Outside; + this.buffer = ''; + this.result = ''; + this.isStartOfLine = true; + } +} diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts new file mode 100644 index 000000000..9ea86dbf4 --- /dev/null +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -0,0 +1,464 @@ +// prompts.ts + +import { Tool } from './types'; + +export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { + const toolDescriptions: string = tools + .map( + tool => ` + <tool> + <title>${tool.name}</title> + <brief_summary>${tool.briefSummary}</brief_summary> + </tool> + ` + ) + .join('\n'); + + return `<system_message> + <task> + You are an advanced AI assistant equipped with various tools to answer user queries accurately and efficiently. Your task is to provide a comprehensive response based on the user's prompt using available tools, chat history, and provided information. Follow these guidelines meticulously to ensure the accuracy and structure of your response. + </task> + + <critical_points> + <point>**MOST IMPORTANT**: Always output responses within stage number tags, using the stage number and the system role as the root tag (e.g., <stage number="2" role="system">, <stage number="4" role="system">, etc.). This is crucial and should never be overlooked.</point> + <point>**STOP after every stage and wait for the system to provide the next input (e.g., action rules or observations).</point> + <point>Only output **ONE stage at a time** in your responses. Do not skip stages or provide multiple stages at once. Thus, you should only output even stage number root tags.</point> + <point>Always structure your responses using valid, well-formed XML with properly nested tags.</point> + <point>If a tool is needed, ALWAYS select the most appropriate tool based on the user's query.</point> + <point>If the query could relate to user documents or require external information (e.g., RAG, search + website scraping, data analysis), USE the appropriate tool to gather that information.</point> + <point>If there are no user docs or the user docs have not yielded helpful information, use the search tool to find websites followed by the website scraper tool to get useful infromation from one of those websites. You can use the website scraper (or even the search tool), multiple times to find information from multiple websites either from the same search or different searches.</point> + <point>Ensure at the end of every final answer, you provide exactly three follow-up questions from the user's perspective—from the perspective that they are asking the question.</point> + <point>Always follow the response structure provided in the instructions.</point> + <point>If a tool doesn't work—or yield helpful results—after two tries, EITHER use another tool or proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. DO NOT CONTINUE WITH THE SAME TOOL 3 TIMES.</point> + </critical_points> + + <response_structure> + <instruction> + When providing your final response, use the following structure: + </instruction> + <answer> + <tag><grounded_text> - Wrap text that is derived from tool-based or chunk-based information within these tags, ensuring proper citation.</tag> + <tag><normal_text> - Wrap text that is not derived from tool-based or chunk-based information within these tags.</tag> + <citations> + <tag><citation> - Provide citations for each grounded text, referencing the tool or chunk used.</tag> + </citations> + <follow_up_questions> + <tag><question> - Include exactly three follow-up questions from the user's perspective within these tags.</tag> + </follow_up_questions> + <loop_summary> + <tag><loop_summary> - Provide a summary of the actions and tools used by the assistant throughout the interaction within these tags.</tag> + </loop_summary> + </answer> + </response_structure> + + <grounded_text_guidelines> + <step>Wrap all information derived from tools (e.g., RAG, search + website scraping, data analysis)—which will be provided in chunks—in <grounded_text> tags.</step> + <step>DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM TOOLS OR CHUNKS IN <grounded_text> TAGS.</step> + <step>Use a single <grounded_text> tag for sequential and closely related information that references the same citation.</step> + <step>If other citations are used sequentially, create new <grounded_text> tags.</step> + <step>Ensure each <grounded_text> tag has corresponding citations (up to three, and one is fine). Separate multiple citation indices with commas.</step> + <step>Grounded text can be as short as a few words or as long as several sentences.</step> + <step>Avoid overlapping or nesting <grounded_text> tags; use sequential tags instead.</step> + <step>Grounded text tags should always have a citation_index attribute that references a citation index number that the text is grounded in.</step> + <step>Content within the <grounded_text> tags should be in Markdown format.</step> + </grounded_text_guidelines> + + <normal_text_guidelines> + <step>Wrap all text that is not derived from tools or chunks in <normal_text> tags (any text outputted in the answer that is not in a <grounded_text> tag should be within a normal text tag).</step> + <step>Ensure that these tags are used for your reasoning, background knowledge, or general information that does not require a citation.</step> + <step>Do not use <normal_text> tags for information that needs grounding or citation.</step> + <step>Anything that is in any user docs should be grounded text and cited, not normal text, even if it is background or general information.</step> + <step>Content within the <normal_text> tags should be in Markdown format.</step> + </normal_text_guidelines> + + <citation_guidelines> + <step>Create a unique citation for each distinct piece of information from tools or chunks that is used to support <grounded_text>.</step> + <step>Ensure each citation has a unique index number.</step> + <step>Specify the correct type: "text", "image", "table", "csv", or "url".</step> + <step>For text-based information, include only the relevant subset of the original information that the <grounded_text> is based on.</step> + <step>For image, table, csv, or url citation types, leave the citation content empty.</step> + <step>ALL CITATIONS MUST use the chunk_id field to reference the source, whether it's from RAG, search + website scraping, data analysis, or any other tool.</step> + <step>One citation can be used for multiple <grounded_text> tags if they are based on the same tool or chunk information.</step> + <step>!!!DO NOT OVERCITE - only include citations for information that is directly relevant to the <grounded_text>.</step> + </citation_guidelines> + + <operational_process> + <step>Analyze the user's query carefully.</step> + <step>Determine whether a tool is required to answer the query accurately.</step> + <step>If a tool is necessary:</step> + <substeps> + <substep>Select the most appropriate tool.</substep> + <substep>Use the <action> tag to specify the tool.</substep> + <substep>End your response after the <action> tag and wait for action rules to be provided.</substep> + <substep>Based on the action rules, provide the necessary tool parameters within the <inputs>. The <inputs> tag should be within the <action_input> tag, and should follow an <action_description> tag that contains a brief description of what you're doing with the action.</substep> + <substep>For each input, you may provide as many different iterations of the same tag (i.e. to provide multiple inputs to the tool) as is specified in the input's max_input's field.</substep> + <substep>End your response again and wait for the observation from the tool.</substep> + </substeps> + <step>If no tool is needed, use the 'no_tool' action but still follow the same response structure.</step> + <step>If the query might relate to user documents or requires external information, **ALWAYS** use the appropriate tool to retrieve the information (either rag or dataAnalysis).</step> + <step>Once all observations are collected, or if no tool was needed, provide your comprehensive answer within the <answer> tag, using the <grounded_text> and <normal_text> tags as required.</step> + </operational_process> + + <final_answer_requirements> + <requirement>Your final <answer> tag must contain:</requirement> + <elements> + <element>The complete answer to the user's query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags.</element> + <element>Exactly three follow-up questions written from the user's perspective, enclosed within <follow_up_questions> tags.</element> + <element>A concise <loop_summary> that describes the actions and tools used throughout the interaction.</element> + </elements> + </final_answer_requirements> + + <tools> + ${toolDescriptions} + <note>If no external tool is required to answer the question, use the 'no_tool' action. However, if the query might relate to user documents or require external information, do not use 'no_tool'—instead, use the appropriate tool (RAG, search + website scraping, data analysis), even if unsure.</note> + </tools> + + <user_information> + <note>ENSURE THAT YOU ONLY USE TOOLS THAT ANALYZE OR OTHERWISE USE USER DOCS IF THE QUERY APPLIES TO ONE OF THESE USER SUMMARIES (AT LEAST SOMEWHAT). IF THERE ARE NO SUMMARIES, THERE ARE NO USER DOCUMENTS.</note> + <summaries> + ${summaries()} + </summaries> + </user_information> + + <example_interactions> + <note>These examples are not, by any means, exhaustive in terms of how tools can be used in conjunction with one another. They simply are to provide you with examples of how to structure your outputs and use some of the tools in some contexts.</note> + <reminder>YOU ONLY OUTPUT THE ASSISTANT STAGES:</reminder> + <interaction description="rag and data analysis tool example"> + <system_message> + ***SYSTEM MESSAGE ELIDED*** + </system_message> + <stage number="1" role="user"> + <query>Could you provide a detailed overview of the 2010 Vancouver Winter Olympics's impact, including the overall summary of the games, key moments from official statements, and how the medal count compared across countries?</query> + </stage> + + <stage number="2" role="assistant"> + <thought> + Since the user has Olympics related docs, I will use the RAG tool to find relevant information from the user's documents, specifically focusing on key moments and statements from an official press release. + </thought> + <action>rag</action> + </stage> + + <stage number="3" role="user"> + <action_rules>***Action rules elided***</action_rules> + </stage> + + <stage number="4" role="assistant"> + <action_input> + <action_input_description>Searching user documents for official statements and key moments of the 2010 Vancouver Winter Olympics.</action_input_description> + <inputs> + <hypothetical_document_chunk> + The user is asking for key moments and statements from official sources regarding the 2010 Vancouver Winter Olympics. Search the provided documents for any press releases or official statements that highlight significant events, achievements, or noteworthy aspects of the games. + </hypothetical_document_chunk> + </inputs> + </action_input> + </stage> + + <stage number="5" role="user"> + <chunk chunk_id="987f6543-e21b-43c9-a987-654321fedcba" chunk_type="text"> + PRESS RELEASE: 2010 VANCOUVER WINTER OLYMPICS + + Vancouver, BC - + + The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. This edition of the Winter Olympics was notable for being the first hosted by Canada since the 1988 Winter Olympics in Calgary. + + Overall, the 2010 Winter Olympics have been hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The Opening Ceremony, held on February 12, 2010, was a spectacular celebration of the nation's history and diversity... + + Key Moments: + - The Canadian men's and women's ice hockey teams both claimed gold medals, an achievement that brought immense pride to the host nation. + - Tragedy struck early in the games with the death of Georgian luger Nodar Kumaritashvili during a training run. This incident led to increased scrutiny of safety measures across all events... + + ***More chunk content elided for brevity*** + + The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined the 2010 Vancouver Winter Olympics. + </chunk> + </stage> + + <stage number="6" role="assistant"> + <thought> + With information on key moments from the press release, I will now analyze the medal count data and economic impact data to provide a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. + </thought> + <action>dataAnalysis</action> + </stage> + + <stage number="7" role="user"> + <action_rules>***Action rules elided***</action_rules> + </stage> + + <stage number="8" role="assistant"> + <action_input> + <action_input_description>Analyzing the medal count data and economic impact data for the 2010 Vancouver Winter Olympics to compare country performances and assess the games' overall impact.</action_input_description> + <inputs> + <csv_file_name>2010_Vancouver_Olympics_Medal_Count.csv</csv_file_name> + <csv_file_name>2010_Vancouver_Olympics_Economic_Impact.csv</csv_file_name> + </inputs> + </action_input> + </stage> + + <stage number="9" role="user"> + <chunk chunk_id="789a1234-e567-890f-gh12-34567ijklmn8" chunk_type="csv"> + Country,Gold,Silver,Bronze,Total + USA,9,15,13,37 + Germany,10,13,7,30 + Canada,14,7,5,26 + Norway,9,8,6,23 + Russia,3,5,7,15 + South Korea,6,6,2,14 + China,5,2,4,11 + Sweden,5,2,4,11 + France,2,3,6,11 + Austria,4,6,6,16 + </chunk> + <chunk chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" chunk_type="csv"> + Category,Value + Total Cost (CAD),7.7 billion + Revenue Generated (CAD),2.5 billion + Jobs Created,45000 + Tourism Increase (%),35 + Infrastructure Investment (CAD),1.9 billion + Post-Olympic Annual Tourism Revenue Increase (CAD),220 million + Venues Repurposed,8 + Carbon Footprint Offset (%),118 + </chunk> + </stage> + + <stage number="10" role="assistant"> + <thought> + Now that I have gathered comprehensive information from the official press release, medal count data, and economic impact data, I will provide a detailed answer to the user's query, incorporating all aspects of the 2010 Vancouver Winter Olympics's impact. + </thought> + <answer> + <grounded_text citation_index="1"> + The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. + </grounded_text> + + <grounded_text citation_index="2"> + The Opening Ceremony was one of the most spectacular in Olympic history, celebrating Canadian culture and heritage. A significant highlight for the host nation was the victory of the Canadian men's and women's ice hockey teams, both securing gold medals, which was a source of immense national pride. + </grounded_text> + + <grounded_text citation_index="3"> + In terms of medal performance, the United States led the overall tally with 37 medals (9 gold, 15 silver, 13 bronze), followed by Germany with 30 medals (10 gold, 13 silver, 7 bronze). Canada, while placing third in total medals with 26, achieved a record number of 14 gold medals, the highest of any country in the 2010 games. Other notable performances came from Norway (23 medals), Russia (15 medals), and South Korea (14 medals). + </grounded_text> + + <grounded_text citation_index="4"> + The economic impact of the 2010 Vancouver Winter Olympics was significant. The total cost of hosting the games was approximately 7.7 billion CAD, while generating revenue of 2.5 billion CAD. The games created about 45,000 jobs and led to a 35% increase in tourism during the event. The long-term benefits include an annual increase of 220 million CAD in tourism revenue and 1.9 billion CAD invested in infrastructure. + </grounded_text> + + <grounded_text citation_index="5"> + From an environmental perspective, the Vancouver Olympics made efforts to be sustainable, with 118% of the carbon footprint offset and 8 venues repurposed after the games, demonstrating a commitment to leaving a positive legacy. + </grounded_text> + + <grounded_text citation_index="6"> + However, the games were also marked by tragedy with the unfortunate death of Georgian luger Nodar Kumaritashvili during a training run, which deeply affected the Olympic community and led to increased scrutiny of safety measures across all events. + </grounded_text> + + <grounded_text citation_index="7"> + Despite this tragedy, the 2010 Vancouver Winter Olympics were generally hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined these Olympics. + </grounded_text> + + <citations> + <citation index="1" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> + <citation index="2" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> + <citation index="3" chunk_id="789a1234-e567-890f-gh12-34567ijklmn8" type="csv"></citation> + <citation index="4" chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" type="csv"></citation> + <citation index="5" chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" type="csv"></citation> + <citation index="6" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> + <citation index="7" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> + </citations> + + <follow_up_questions> + <question>How did the 2010 Vancouver Winter Olympics compare to previous Winter Olympics in terms of economic impact and sustainability efforts?</question> + <question>What specific infrastructure improvements were made in Vancouver as a result of hosting the 2010 Winter Olympics?</question> + <question>How did the performance of Canadian athletes in the 2010 Vancouver Winter Olympics compare to their performance in previous Winter Olympics?</question> + </follow_up_questions> + <loop_summary> + The assistant used the RAG tool to gather information from the user's documents, focusing on key moments and official statements. It then used the dataAnalysis tool to analyze the medal count and economic impact data, providing a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. + </loop_summary> + </answer> + </stage> + </interaction> + <interaction description="multistep web-scrape example"> + <system_message> + ***SYSTEM MESSAGE ELIDED*** + </system_message> + <stage number="1" role="user"> + <query>What are the best hiking trails in Yosemite National Park? And which are the safest?</query> + </stage> + <stage number="2" role="assistant"> + <thought> + To provide comprehensive information about the best hiking trails in Yosemite National Park, I'll use the search tool to find relevant websites with hiking recommendations, safety information, and general park information. + </thought> + <action>searchTool</action> + </stage> + <stage number="3" role="user"> + <action_rules>***Action rules elided***</action_rules> + </stage> + <stage number="4" role="assistant"> + <action_input> + <action_input_description>Searching the web for information about the best and safest hiking trails in Yosemite National Park.</action_input_description> + <inputs> + <query>Best hiking trails Yosemite National Park</query> + <query>Safest hiking trails Yosemite National Park</query> + <query>Information about hiking in Yosemite National Park</query> + </inputs> + </action_input> + </stage> + <stage number="5" role="user"> + <observation> + <chunk chunk_id="a1b2c3d4-e5f6-7g8h-9i10-j11k12l13m14" chunk_type="url"> + <url>https://www.nps.gov/yose/planyourvisit/hiking.htm</url> + <overview>Official National Park Service website with hiking information for Yosemite</overview> + </chunk> + <chunk chunk_id="n15o16p17-q18r-19s20-t21u-v22w23x24y25" chunk_type="url"> + <url>https://www.alltrails.com/parks/us/california/yosemite-national-park</url> + <overview>AllTrails page featuring user-rated hiking trails in Yosemite</overview> + </chunk> + <chunk chunk_id="z26a27b28-c29d-30e31-f32g-h33i34j35k36" chunk_type="url"> + <url>https://www.yosemitehikes.com/</url> + <overview>Comprehensive guide to Yosemite hikes with detailed trail information</overview> + </chunk> + </observation> + </stage> + <stage number="6" role="assistant"> + <thought> + I've found several relevant websites about hiking in Yosemite National Park. I'll start by scraping content from the official National Park Service website for the most reliable information, then move on to AllTrails for user ratings and reviews, and finally to YosemiteHikes.com for more detailed trail descriptions. + </thought> + <action>websiteInfoScraper</action> + </stage> + <stage number="7" role="user"> + <action_rules>***Action rules elided***</action_rules> + </stage> + <stage number="8" role="assistant"> + <action_input> + <action_input_description>Scraping information from three websites about hiking trails in Yosemite National Park.</action_input_description> + <inputs> + <url>https://www.nps.gov/yose/planyourvisit/hiking.htm</url> + <url>https://www.alltrails.com/parks/us/california/yosemite-national-park</url> + <url>https://www.yosemitehikes.com/</url> + </inputs> + </action_input> + </stage> + <stage number="9" role="user"> + <observation> + <chunk chunk_id="l37m38n39-o40p-41q42-r43s-t44u45v46w47" chunk_type="url"> + ***NPS general planning website image elided*** + </chunk> + <chunk chunk_id="x48y49z50-a51b-52c53-d54e-f55g56h57i58" chunk_type="url"> + ***AllTrails website image elided*** + </chunk> + <chunk chunk_id="j59k60l61-m62n-63o64-p65q-r66s67t68u69" chunk_type="url"> + ***YosemiteHikes.com website image elided*** + </chunk> + </observation> + </stage> + <stage number="10" role="assistant"> + <thought> + The information gathered from the three websites provides a good overview of the best hiking trails and safety considerations in Yosemite National Park. However, I noticed that there's limited information about seasonal trail conditions and closures, which could be crucial for planning a safe hike. To address this, I'll scrape additional information from the Yosemite National Park's current conditions page. + </thought> + <action>websiteInfoScraper</action> + </stage> + <stage number="11" role="user"> + <action_rules>***Action rules elided***</action_rules> + </stage> + <stage number="12" role="assistant"> + <action_input> + <action_input_description>Scraping information about current trail conditions and closures in Yosemite National Park.</action_input_description> + <inputs> + <url>https://www.nps.gov/yose/planyourvisit/conditions.htm</url> + </inputs> + </action_input> + </stage> + <stage number="13" role="user"> + <observation> + <chunk chunk_id="k70l71m72-n73o-74p75-q76r-s77t78u79v80" chunk_type="url"> + ***NPS conditions website image elided*** + </chunk> + </observation> + </stage> + <stage number="14" role="assistant"> + <thought> + Now that I have additional information about current trail conditions and seasonal closures, I can provide a more comprehensive and up-to-date answer to the user's query about the best and safest hiking trails in Yosemite National Park. + </thought> + <answer> + <grounded_text citation_index="1"> + Yosemite National Park offers a wide range of hiking trails suitable for various skill levels. Some of the best hiking trails in Yosemite, based on the information gathered, include: + + 1. Mist Trail to Vernal Fall: A moderate 3-mile round-trip hike in Yosemite Valley, offering close-up views of waterfalls... ***REST ELIDED FOR BREVITY*** </grounded_text> + + <grounded_text citation_index="2"> + ***CONTENT ELIDED FOR BREVITY*** + </grounded_text> + + <grounded_text citation_index="3"> + ***CONTENT ELIDED FOR BREVITY*** + </grounded_text> + <normal_text> + Regarding safety, some of the safest trails for hikers of all skill levels include: + </normal_text> + <grounded_text citation_index="1,2"> + 1. Lower Yosemite Fall Trail: An easy 1-mile loop in Yosemite Valley... ***REST ELIDED FOR BREVITY*** + </grounded_text> + + <grounded_text citation_index="4"> + ***CONTENT ELIDED FOR BREVITY*** + </grounded_text> + <normal_text> + To ensure a safe hiking experience in Yosemite, regardless of the trail you choose, follow these safety tips: + </normal_text> + <grounded_text citation_index="1,2,3,4"> + 1. Stay on marked trails at all times... + ***REST ELIDED FOR BREVITY*** + </grounded_text> + <normal_text> + Remember that even the "safest" trails can pose risks if proper precautions are not taken. Always hike within your ability level and be prepared for the specific conditions of the trail you choose. It's essential to check for daily updates on trail conditions before starting your hike, as they can change rapidly, especially during spring and fall. + </normal_text> + <citations> + <citation index="1" chunk_id="l37m38n39-o40p-41q42-r43s-t44u45v46w47" type="url"> + https://www.nps.gov/yose/planyourvisit/hiking.htm + </citation> + <citation index="2" chunk_id="x48y49z50-a51b-52c53-d54e-f55g56h57i58" type="url"> + https://www.alltrails.com/parks/us/california/yosemite-national-park + </citation> + <citation index="3" chunk_id="j59k60l61-m62n-63o64-p65q-r66s67t68u69" type="url"> + https://www.yosemitehikes.com/ + </citation> + <citation index="4" chunk_id="k70l71m72-n73o-74p75-q76r-s77t78u79v80" type="url"> + https://www.nps.gov/yose/planyourvisit/conditions.htm + </citation> + </citations> + <follow_up_questions> + <question>What is the best time of year to hike in Yosemite National Park?</question> + <question>Are there any special permits required for hiking certain trails in Yosemite?</question> + <question>What wildlife might I encounter while hiking in Yosemite, and how should I prepare for potential encounters?</question> + </follow_up_questions> + <loop_summary> + The assistant used the search tool to find relevant websites, then scraped information from those sites using the websiteInfoScraper tool. Additional information about current trail conditions was gathered from the NPS website to provide a comprehensive and safe hiking guide. + </loop_summary> + </answer> + </stage> + </interaction> + </example_interactions> + + <chat_history> + ${chatHistory} + <note>Use this history for context when appropriate.</note> + </chat_history> + + <final_instruction> + Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive, correctly cited, and entirely contained within the structured tags. Do not get stuck in infinite loops and keep responses concise, grounded, and most importantly, HELPFUL AND USEFUL! + </final_instruction> +</system_message> +`; +} + +export function getSummarizedChunksPrompt(chunks: string): string { + return `Please provide a comprehensive summary of what you think the document from which these chunks originated. + Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. + + Text chunks: + \`\`\` + ${chunks} + \`\`\``; +} + +export function getSummarizedSystemPrompt(): string { + return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; +} diff --git a/src/client/views/nodes/ChatBox/tools.ts b/src/client/views/nodes/ChatBox/tools.ts new file mode 100644 index 000000000..4035280a8 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools.ts @@ -0,0 +1,26 @@ +import { DocCast } from '../../../../fields/Types'; +import { DocServer } from '../../../DocServer'; +import { Docs } from '../../../documents/Documents'; +import { DocUtils } from '../../../documents/DocUtils'; +import { TabDocView } from '../../collections/TabDocView'; +import { DocumentView } from '../DocumentView'; +import { OpenWhere } from '../OpenWhere'; + +export function retrieval(json: any): string { + return ''; +} + +export function create_collection(docView: DocumentView, document_ids: string[], title: string): string { + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title }); + docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return 'Collection created in Dash called ' + title; +} + +export function create_link(docView: DocumentView, document_ids: string[]): string { + //Make document_ids a size 2 array + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const linkDoc = DocUtils.MakeLink(docs[0], docs[1], {})!; + DocumentView.linkCommonAncestor(linkDoc)?.ComponentView?.addDocument?.(linkDoc); + return 'Link created between ' + docs[0].title + ' and ' + docs[1].title; +} diff --git a/src/client/views/nodes/ChatBox/tools/BaseTool.ts b/src/client/views/nodes/ChatBox/tools/BaseTool.ts new file mode 100644 index 000000000..2e2267653 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/BaseTool.ts @@ -0,0 +1,24 @@ +import { Tool } from '../types'; + +export abstract class BaseTool<T extends Record<string, any> = Record<string, any>> implements Tool<T> { + constructor( + public name: string, + public description: string, + public parameters: Record<string, any>, + public citationRules: string, + public briefSummary: string + ) {} + + abstract execute(args: T): Promise<any>; + + getActionRule(): Record<string, any> { + return { + [this.name]: { + name: this.name, + citationRules: this.citationRules, + description: this.description, + parameters: this.parameters, + }, + }; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts new file mode 100644 index 000000000..74b7ca27b --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts @@ -0,0 +1,26 @@ +import { BaseTool } from './BaseTool'; + +export class CalculateTool extends BaseTool<{ expression: string }> { + constructor() { + super( + 'calculate', + 'Perform a calculation', + { + expression: { + type: 'string', + description: 'The mathematical expression to evaluate', + required: 'true', + max_inputs: '1', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + } + + async execute(args: { expression: string }): Promise<any> { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const result = eval(args.expression); + return [{ type: 'text', text: result.toString() }]; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts new file mode 100644 index 000000000..55015846b --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts @@ -0,0 +1,51 @@ +import { BaseTool } from './BaseTool'; +import { Networking } from '../../../../Network'; + +export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string }> { + private _handleCSVResult: (url: string, filename: string, id: string, data: string) => void; + + constructor(handleCSVResult: (url: string, title: string, id: string, data: string) => void) { + super( + 'createCSV', + 'Creates a CSV file from raw CSV data and saves it to the server', + { + type: 'object', + properties: { + csvData: { + type: 'string', + description: 'A string of comma-separated values representing the CSV data.', + }, + filename: { + type: 'string', + description: 'The base name of the CSV file to be created. Should end in ".csv".', + }, + }, + required: ['csvData', 'filename'], + }, + 'Provide a CSV string and a filename to create a CSV file.', + 'Creates a CSV file from the provided CSV string and saves it to the server with a unique identifier, returning the file URL and UUID.' + ); + this._handleCSVResult = handleCSVResult; + } + + async execute(args: { csvData: string; filename: string }): Promise<any> { + try { + console.log('Creating CSV file:', args.filename, ' with data:', args.csvData); + // Post the raw CSV data to the createCSV endpoint on the server + const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: args.filename, data: args.csvData }); + + // Handle the result by invoking the callback + this._handleCSVResult(fileUrl, args.filename, id, args.csvData); + + return [ + { + type: 'text', + text: `File successfully created: ${fileUrl}. \nNow a CSV file with this data and the name ${args.filename} is available as a user doc.`, + }, + ]; + } catch (error) { + console.error('Error creating CSV file:', error); + throw new Error('Failed to create CSV file.'); + } + } +} diff --git a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts new file mode 100644 index 000000000..573428179 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts @@ -0,0 +1,36 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash ', + { + title: { + type: 'string', + description: 'the title of the collection that you will be making', + required: 'true', + max_inputs: '1', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise<any> { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts new file mode 100644 index 000000000..a12ee46e5 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts @@ -0,0 +1,59 @@ +import { BaseTool } from './BaseTool'; + +export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[] }> { + private csv_files_function: () => { filename: string; id: string; text: string }[]; + + constructor(csv_files: () => { filename: string; id: string; text: string }[]) { + super( + 'dataAnalysis', + 'Analyzes, and provides insights, from one or more CSV files', + { + csv_file_name: { + type: 'string', + description: 'Name(s) of the CSV file(s) to analyze', + required: 'true', + max_inputs: '3', + }, + }, + 'Provide the name(s) of up to 3 CSV files to analyze based on the user query and whichever available CSV files may be relevant.', + 'Provides the full CSV file text for your analysis based on the user query and the available CSV file(s). ' + ); + this.csv_files_function = csv_files; + } + + getFileContent(filename: string): string | undefined { + const files = this.csv_files_function(); + const file = files.find(f => f.filename === filename); + return file?.text; + } + + getFileID(filename: string): string | undefined { + const files = this.csv_files_function(); + const file = files.find(f => f.filename === filename); + return file?.id; + } + + async execute(args: { csv_file_name: string | string[] }): Promise<any> { + const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name]; + const results = []; + + for (const filename of filenames) { + const fileContent = this.getFileContent(filename); + const fileID = this.getFileID(filename); + + if (fileContent && fileID) { + results.push({ + type: 'text', + text: `<chunk chunk_id=${fileID} chunk_type=csv>${fileContent}</chunk>`, + }); + } else { + results.push({ + type: 'text', + text: `File not found: ${filename}`, + }); + } + } + + return results; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts b/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts new file mode 100644 index 000000000..f970ca8ee --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts @@ -0,0 +1,29 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash', + {}, + 'No need to provide anything. Just run the tool and it will retrieve the contents of all Documents that the user is interacting with in Dash.', + 'Returns the the documents in Dash in JSON form. This will include the title of the document, the location in the FreeFormDocument, and the content of the document, any applicable data fields, the layout of the document, etc.' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise<any> { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/NoTool.ts b/src/client/views/nodes/ChatBox/tools/NoTool.ts new file mode 100644 index 000000000..1f0830a77 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/NoTool.ts @@ -0,0 +1,18 @@ +// tools/NoTool.ts +import { BaseTool } from './BaseTool'; + +export class NoTool extends BaseTool<{}> { + constructor() { + super( + 'no_tool', + 'Use this when no external tool or action is required to answer the question.', + {}, + 'When using the "no_tool" action, simply provide an empty <action_input> element. The observation will always be "No tool used. Proceed with answering the question."', + 'Use when no external tool or action is required to answer the question.' + ); + } + + async execute(args: {}): Promise<any> { + return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }]; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts new file mode 100644 index 000000000..fe8deae6b --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -0,0 +1,136 @@ +import { BaseTool } from './BaseTool'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { RAGChunk } from '../types'; +import * as fs from 'fs'; +import { Networking } from '../../../../Network'; +import { file } from 'jszip'; +import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; + +export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { + constructor(private vectorstore: Vectorstore) { + super( + 'rag', + 'Perform a RAG search on user documents', + { + hypothetical_document_chunk: { + type: 'string', + description: + "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query", + required: 'true', + }, + }, + ` + Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously: + + 1. Overall Structure: + <answer> + [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)] + <citations> + [Individual citation tags] + </citations> + <follow_up_questions> + [Three question tags] + </follow_up_questions> + </answer> + + 2. Grounded Text Tag Structure: + - Basic format: + <grounded_text citation_index="[citation index number(s)]"> + [Your generated text based on information from a subset of a chunk (a citation's direct text)] + </grounded_text> + + 3. Citation Tag Structure: + <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]"> + [For text: relevant subset of original chunk] + [For image/table: leave empty] + </citation> + + 4. Detailed Grounded Text Guidelines: + a. Wrap all information derived from chunks in grounded_text tags. + b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS. + c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags. + d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas. + e. Grounded text can be as short as a few words or as long as several sentences. + f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags. + + 5. Detailed Citation Guidelines: + a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. + b. ALL TEXT CITATIONS must have direct text in its element content (e.g. <citation ...>DIRECT TEXT HERE</citation>) that is a relevant subset of the original text chunk that is being cited specifically. + b. Ensure each citation has a unique index number. + c. Specify the correct type: "text", "image", or "table". + d. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. + e. For image/table chunks, leave the citation content empty. + f. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. + g. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. + + 6. Structural Integrity Checks: + a. Ensure all opening tags have corresponding closing tags. + b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field). + c. Check that all cited indices in grounded_text tags have corresponding citations. + + Example of grounded_text usage: + + <answer> + <grounded_text citation_index="1,2"> + Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning. + </grounded_text> + <grounded_text citation_index="2,3,4"> + In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. + </grounded_text> + <grounded_text citation_index="4"> + This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress. + </grounded_text> + + <grounded_text citation_index="5,6"> + Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods. + </grounded_text> + <grounded_text citation_index="6"> + This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential. + </grounded_text> + + [... rest of the content ...] + + <citations> + <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation> + <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation> + <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation> + <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation> + <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation> + <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation> + </citations> + + <follow_up_questions> + <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question> + <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question> + <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question> + </follow_up_questions> + </answer> + `, + + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a + set of document chunks (either images or text) that can be used to provide a grounded response based on + user documents` + ); + } + + async execute(args: { hypothetical_document_chunk: string }): Promise<any> { + const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); + const formatted_chunks = await this.getFormattedChunks(relevantChunks); + return formatted_chunks; + } + + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<ChatCompletionContentPart[]> { + try { + const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); + + if (!formattedChunks) { + throw new Error('Failed to format chunks'); + } + + return formattedChunks; + } catch (error) { + console.error('Error formatting chunks:', error); + throw error; + } + } +} diff --git a/src/client/views/nodes/ChatBox/tools/SearchTool.ts b/src/client/views/nodes/ChatBox/tools/SearchTool.ts new file mode 100644 index 000000000..b926cbadc --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/SearchTool.ts @@ -0,0 +1,54 @@ +import { max } from 'lodash'; +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class SearchTool extends BaseTool<{ query: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + private _max_results: number; + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) { + super( + 'searchTool', + 'Search the web to find a wide range of websites related to a query or multiple queries', + { + query: { + type: 'string', + description: 'The search query or queries to use for finding websites', + required: 'true', + max_inputs: '3', + }, + }, + 'Provide up to 3 search queries to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.', + 'Returns a list of websites and their overviews based on the search queries, helping to identify which websites might contain relevant information.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + this._max_results = max_results; + } + + async execute(args: { query: string | string[] }): Promise<any> { + const queries = Array.isArray(args.query) ? args.query : [args.query]; + const allResults = []; + + for (const query of queries) { + try { + const { results } = await Networking.PostToServer('/getWebSearchResults', { query, max_results: this._max_results }); + const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => { + const id = uuidv4(); + return { + type: 'text', + text: `<chunk chunk_id="${id}" chunk_type="text"> + <url>${result.url}</url> + <overview>${result.snippet}</overview> + </chunk>`, + }; + }); + allResults.push(...data); + } catch (error) { + console.log(error); + allResults.push({ type: 'text', text: `An error occurred while performing the web search for query: ${query}` }); + } + } + + return allResults; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts new file mode 100644 index 000000000..4588b5aec --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts @@ -0,0 +1,43 @@ +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'websiteInfoScraper', + 'Scrape detailed information from specific websites identified as relevant to the user query', + { + url: { + type: 'string', + description: 'The URL(s) of the website(s) to scrape', + required: 'true', + max_inputs: '3', + }, + }, + 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide the text content of those specific websites. When providing a final response to the user based on information from these chunks, ideally cite as many of the url chunks as possible (ground your infromation from multiple sources, if possible) in order to provide a well grounded result.', + 'Returns the text content of the webpages for analysis.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { url: string | string[] }): Promise<any> { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` }); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } + } + + return results; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts new file mode 100644 index 000000000..143d91d80 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts @@ -0,0 +1,37 @@ +import { title } from 'process'; +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import axios from 'axios'; +import { v4 as uuidv4 } from 'uuid'; + +export class WikipediaTool extends BaseTool<{ title: string }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'wikipedia', + 'Search Wikipedia and return a summary', + { + title: { + type: 'string', + description: 'The title of the Wikipedia article to search', + required: true, + }, + }, + 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.', + 'Returns a summary from searching an article title on Wikipedia' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { title: string }): Promise<any> { + try { + const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); + const id = uuidv4(); + const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`; + this._addLinkedUrlDoc(url, id); + return [{ type: 'text', text: `<chunk chunk_id=${id} chunk_type=csv}> ${text} </chunk>` }]; + } catch (error) { + return [{ type: 'text', text: 'An error occurred while fetching the article.' }]; + } + } +} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 8212a7050..a59ead067 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,23 +1,167 @@ +import { breadcrumbsClasses } from '@mui/material'; +import { Doc } from '../../../../fields/Doc'; +import { StrCast } from '../../../../fields/Types'; +import e from 'cors'; +import { index } from 'd3'; + export enum ASSISTANT_ROLE { - USER = 'User', - ASSISTANT = 'Assistant', + USER = 'user', + ASSISTANT = 'assistant', +} + +export enum TEXT_TYPE { + NORMAL = 'normal', + GROUNDED = 'grounded', + ERROR = 'error', +} + +export enum CHUNK_TYPE { + TEXT = 'text', + IMAGE = 'image', + TABLE = 'table', + URL = 'url', + CSV = 'CSV', +} + +export enum PROCESSING_TYPE { + THOUGHT = 'thought', + ACTION = 'action', + //eventually migrate error to here } -export enum ANNOTATION_LINK_TYPE { - DASH_DOC = 'citation', - DOWNLOAD_FILE = 'file_path', +export function getChunkType(type: string): CHUNK_TYPE { + switch (type.toLowerCase()) { + case 'text': + return CHUNK_TYPE.TEXT; + break; + case 'image': + return CHUNK_TYPE.IMAGE; + break; + case 'table': + return CHUNK_TYPE.TABLE; + break; + case 'CSV': + return CHUNK_TYPE.CSV; + break; + case 'url': + return CHUNK_TYPE.URL; + break; + default: + return CHUNK_TYPE.TEXT; + break; + } } -export enum DOWNLOAD_TYPE { - DASH = 'dash', - DEVICE = 'device', +export interface ProcessingInfo { + index: number; + type: PROCESSING_TYPE; + content: string; } export interface AssistantMessage { role: ASSISTANT_ROLE; + content: MessageContent[]; + follow_up_questions?: string[]; + citations?: Citation[]; + processing_info: ProcessingInfo[]; + loop_summary?: string; +} + +export interface MessageContent { + index: number; + type: TEXT_TYPE; text: string; - quote?: string; - image?: string; - tool_logs?: string; - links?: { start: number; end: number; url: string; id?: string; link_type: ANNOTATION_LINK_TYPE }[]; + citation_ids: string[] | null; +} + +export interface Citation { + direct_text?: string; + type: CHUNK_TYPE; + chunk_id: string; + citation_id: string; + url?: string; +} + +export interface RAGChunk { + id: string; + values: number[]; + metadata: { + text: string; + type: CHUNK_TYPE; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + base64_data?: string | undefined; + page_width?: number | undefined; + page_height?: number | undefined; + }; } + +export interface SimplifiedChunk { + chunkId: string; + startPage: number; + endPage: number; + location?: string; + chunkType: CHUNK_TYPE; +} + +export interface AI_Document { + purpose: string; + file_name: string; + num_pages: number; + summary: string; + chunks: RAGChunk[]; + type: string; +} + +export interface Tool<T extends Record<string, any> = Record<string, any>> { + name: string; + description: string; + parameters: Record<string, any>; + citationRules: string; + briefSummary: string; + execute: (args: T) => Promise<any>; + getActionRule: () => Record<string, any>; +} + +export interface AgentMessage { + role: 'system' | 'user' | 'assistant'; + content: string | { type: string; text?: string; image_url?: { url: string } }[]; +} + +// export function convertToAIDocument(json: any): AI_Document { +// if (!json) { +// throw new Error('Invalid JSON object'); +// } + +// const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ +// id: chunk.id, +// values: chunk.values, +// metadata: { +// text: chunk.metadata.text, +// type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting +// original_document: chunk.metadata.original_document, +// file_path: chunk.metadata.file_path, +// location: chunk.metadata.location, +// start_page: chunk.metadata.start_page, +// end_page: chunk.metadata.end_page, +// base64_data: chunk.metadata.base64_data, +// width: chunk.metadata.width, +// height: chunk.metadata.height, +// }, +// })); + +// const aiDocument: AI_Document = { +// purpose: json.purpose, +// file_name: json.file_name, +// num_pages: json.num_pages, +// summary: json.summary, +// chunks: chunks, +// type: json.type, +// }; + +// return aiDocument; +// } diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts new file mode 100644 index 000000000..408274703 --- /dev/null +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -0,0 +1,208 @@ +import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; +import { CohereClient } from 'cohere-ai'; +import { EmbedResponse } from 'cohere-ai/api'; +import dotenv from 'dotenv'; +import axios from 'axios'; +import { SimplifiedChunk } from '../types'; + +import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; +import { Doc } from '../../../../../fields/Doc'; +import { DocData } from '../../../../../fields/DocSymbols'; +import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; +import { Networking } from '../../../../Network'; + +dotenv.config(); + +export class Vectorstore { + private pinecone: Pinecone; + private index!: Index; + private cohere: CohereClient; + private indexName: string = 'pdf-chatbot'; + private _id: string; + private _doc_ids: string[] = []; + + documents: AI_Document[] = []; + + constructor(id: string, doc_ids: () => string[]) { + const pineconeApiKey = process.env.PINECONE_API_KEY; + if (!pineconeApiKey) { + throw new Error('PINECONE_API_KEY is not defined.'); + } + + this.pinecone = new Pinecone({ + apiKey: pineconeApiKey, + }); + this.cohere = new CohereClient({ + token: process.env.COHERE_API_KEY, + }); + this._id = id; + this._doc_ids = doc_ids(); + this.initializeIndex(); + } + + private async initializeIndex() { + const indexList: IndexList = await this.pinecone.listIndexes(); + + if (!indexList.indexes?.some(index => index.name === this.indexName)) { + await this.pinecone.createIndex({ + name: this.indexName, + dimension: 1024, + metric: 'cosine', + spec: { + serverless: { + cloud: 'aws', + region: 'us-east-1', + }, + }, + }); + } + + this.index = this.pinecone.Index(this.indexName); + } + + async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { + console.log('Adding AI Document:', doc); + const ai_document_status: string = StrCast(doc.ai_document_status); + + if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status.trim() !== '' && ai_document_status !== '{}') { + if (ai_document_status === 'IN PROGRESS') { + console.log('Already in progress.'); + return; + } + if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) this._doc_ids.push(StrCast(doc.ai_doc_id)); + } else { + doc.ai_document_status = 'PROGRESS'; + console.log(doc); + const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; + console.log('Local File Path:', local_file_path); + + if (local_file_path) { + console.log('Creating AI Document...'); + // Start the document creation process + const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + + // Poll the server for progress updates + let inProgress: boolean = true; + let result: any = null; + while (inProgress) { + await new Promise(resolve => setTimeout(resolve, 2000)); // Polling interval + + const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); + const resultResponseJson = JSON.parse(resultResponse); + //console.log('Result Response:', resultResponseJson); + if (resultResponseJson.status === 'completed') { + console.log('Result here:', resultResponseJson); + result = resultResponseJson; + break; + } + + const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); + const progressResponseJson = JSON.parse(progressResponse); + //console.log('Progress Response:', progressResponseJson); + + if (progressResponseJson) { + console.log('Progress:', progressResponseJson); + const progress = progressResponseJson.progress; + const step = progressResponseJson.step; + progressCallback(progress, step); + } + } + + // Process the final document result + console.log('Document JSON:', result); + this.documents.push(result); + await this.indexDocument(JSON.parse(JSON.stringify(result, (key, value) => (value === null || value === undefined ? undefined : value)))); + console.log(`Document added: ${result.file_name}`); + doc.summary = result.summary; + doc.ai_doc_id = result.doc_id; + this._doc_ids.push(result.doc_id); + doc.ai_purpose = result.purpose; + + if (!doc.vectorstore_id) { + doc.vectorstore_id = JSON.stringify([this._id]); + } else { + doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); + } + + if (!doc.chunk_simpl) { + doc.chunk_simpl = JSON.stringify({ chunks: [] }); + } + + result.chunks.forEach((chunk: RAGChunk) => { + const chunkToAdd = { + chunkId: chunk.id, + startPage: chunk.metadata.start_page, + endPage: chunk.metadata.end_page, + location: chunk.metadata.location, + chunkType: chunk.metadata.type as CHUNK_TYPE, + }; + const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + }); + + doc.ai_document_status = 'COMPLETED'; + } + } + } + + private async indexDocument(document: any) { + console.log('Uploading vectors to content namespace...'); + const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as RAGChunk[]).map( + chunk => + ({ + id: chunk.id, + values: chunk.values, + metadata: { ...chunk.metadata } as RecordMetadata, + }) as PineconeRecord + ); + await this.index.upsert(pineconeRecords); + } + + async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> { + console.log(`Retrieving chunks for query: ${query}`); + try { + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); + } + + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } + + const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({ + vector: queryEmbedding, + filter: { + doc_id: { $in: this._doc_ids }, + }, + topK, + includeValues: true, + includeMetadata: true, + }); + + return queryResponse.matches.map( + match => + ({ + id: match.id, + values: match.values as number[], + metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, + }) as RAGChunk + ); + } catch (error) { + console.error(`Error retrieving chunks: ${error}`); + return []; + } + } +} diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index dee0edfae..b9d5ce857 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -394,6 +394,30 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> { } }; + // @action + // createMarquee = (coords: [x1: number, x2: number, y1: number, y2: number]): void => { + // // const hit = document.elementFromPoint(e.clientX, e.clientY); + // // bcz: Change. drag selecting requires that preventDefault is NOT called. This used to happen in DocumentView, + // // but that's changed, so this shouldn't be needed. + // // if (hit && hit.localName === "span" && this.annotationsActive(true)) { // drag selecting text stops propagation + // // e.button === 0 && e.stopPropagation(); + // // } + // // if alt+left click, drag and annotate + // this._downX = coords[0]; + // this._downY = coords[2]; + + // if ((this._props.Document._freeform_scale || 1) !== 1) return; + // this._props.select(false); + // MarqueeAnnotator.clearAnnotations(this._savedAnnotations); + // this.isAnnotating = true; + // this._textSelecting = false; + // // if textLayer is hit, then we select text instead of using a marquee so clear out the marquee. + // this._styleRule = addStyleSheetRule(PDFViewer._annotationStyle, 'htmlAnnotation', { 'pointer-events': 'none' }); + + // this._marqueeref.current?.onInitiateSelection([coords[0], coords[2]]); + // this._marqueeref.current?.onTerminateSelection(); + // }; + @action finishMarquee = (/* x?: number, y?: number */) => { this._getAnchor = AnchorMenu.Instance?.GetAnchor; diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index b42314e41..2ffc99e58 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -7,6 +7,17 @@ import * as uuid from 'uuid'; import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; +import axios from 'axios'; +import { RAGChunk } from '../../client/views/nodes/ChatBox/types'; +import { UnstructuredClient } from 'unstructured-client'; +import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; +import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; +import * as cheerio from 'cheerio'; +import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk'; +import { google } from 'googleapis'; +import * as puppeteer from 'puppeteer'; +import { JSDOM } from 'jsdom'; +import { Readability } from '@mozilla/readability'; export enum Directory { parsed_files = 'parsed_files', @@ -17,6 +28,8 @@ export enum Directory { pdf_thumbnails = 'pdf_thumbnails', audio = 'audio', csv = 'csv', + chunk_images = 'chunk_images', + scrape_images = 'scrape_images', } export function serverPathToFile(directory: Directory, filename: string) { @@ -36,94 +49,358 @@ const readFileAsync = promisify(fs.readFile); export default class AssistantManager extends ApiManager { protected initialize(register: Registration): void { - const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); + const openai = new OpenAI({ + apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server. + dangerouslyAllowBrowser: true, + }); + const unstructuredClient = new UnstructuredClient({ + security: { + apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!, + }, + }); + const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! }); + const customsearch = google.customsearch('v1'); register({ method: Method.POST, - subscription: '/uploadPDFToVectorStore', + subscription: '/getWikipediaSummary', secureHandler: async ({ req, res }) => { - const { urls, threadID, assistantID, vector_store_id } = req.body; - - const csvFilesIds: string[] = []; - const otherFileIds: string[] = []; - const allFileIds: string[] = []; - - const fileProcesses = urls.map(async (source: string) => { - const fullPath = path.join(publicDirectory, source); - const fileData = await openai.files.create({ file: createReadStream(fullPath), purpose: 'assistants' }); - allFileIds.push(fileData.id); - if (source.endsWith('.csv')) { - console.log(source); - csvFilesIds.push(fileData.id); + const { title } = req.body; + try { + const response = await axios.get('https://en.wikipedia.org/w/api.php', { + params: { + action: 'query', + list: 'search', + srsearch: title, + format: 'json', + }, + }); + const summary = response.data.query.search[0].snippet; + if (!summary || summary.length === 0 || summary === '' || summary === ' ') { + res.send({ text: 'No article found with that title.' }); } else { - openai.beta.vectorStores.files.create(vector_store_id, { file_id: fileData.id }); - otherFileIds.push(fileData.id); + res.send({ text: summary }); } - }); + } catch (error: any) { + console.error('Error retrieving article summary from Wikipedia:', error); + res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message }); + } + }, + }); + + register({ + method: Method.POST, + subscription: '/getWebSearchResults', + secureHandler: async ({ req, res }) => { + const { query, max_results } = req.body; try { - await Promise.all(fileProcesses).then(() => { - res.send({ vector_store_id: vector_store_id, openai_file_ids: allFileIds }); + const response = await customsearch.cse.list({ + q: query, + cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, + key: process.env._CLIENT_GOOGLE_API_KEY, + safe: 'active', + num: max_results, }); - } catch (error) { - res.status(500).send({ error: 'Failed to process files' + error }); + + const results = + response.data.items?.map((item: any) => ({ + url: item.link, + snippet: item.snippet, + })) || []; + + res.send({ results }); + } catch (error: any) { + console.error('Error performing web search:', error); + res.status(500).send({ error: 'Failed to perform web search', details: error.message }); } }, }); + const axiosInstance = axios.create({ + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + }, + }); + + const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + + const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => { + try { + const response = await axiosInstance.get(url); + return response.data; + } catch (error: any) { + if (retries > 0 && error.response && error.response.status === 429) { + console.log(`Rate limited. Retrying in ${backoff}ms...`); + await delay(backoff); + return fetchWithRetry(url, retries - 1, backoff * 2); + } + throw error; + } + }; + register({ method: Method.POST, - subscription: '/downloadFileFromOpenAI', + subscription: '/scrapeWebsite', secureHandler: async ({ req, res }) => { - const { file_id, file_name } = req.body; - //let files_directory: string; - let files_directory = '/files/openAIFiles/'; - switch (file_name.split('.').pop()) { - case 'pdf': - files_directory = '/files/pdfs/'; - break; - case 'csv': - files_directory = '/files/csv/'; - break; - case 'png': - case 'jpg': - case 'jpeg': - files_directory = '/files/images/'; - break; - default: - break; + const { url } = req.body; + try { + // Launch Puppeteer to navigate to the webpage + const browser = await puppeteer.launch({ + args: ['--no-sandbox', '--disable-setuid-sandbox'], + }); + const page = await browser.newPage(); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); + await page.goto(url, { waitUntil: 'networkidle2' }); + + // Get the HTML content of the page + const htmlContent = await page.content(); + await browser.close(); + + // Use JSDOM to parse the HTML content + const dom = new JSDOM(htmlContent, { url }); + + // Use Readability to extract the readable content + const reader = new Readability(dom.window.document); + const article = reader.parse(); + + if (article) { + // Extract the plain text from the article content + const plainText = article.textContent; + + // Return the plain text content + res.send({ website_plain_text: plainText }); + } else { + res.status(500).send({ error: 'Failed to extract readable content' }); + } + } catch (error: any) { + console.error('Error scraping website:', error); + res.status(500).send({ error: 'Failed to scrape website', details: error.message }); } + }, + }); + + register({ + method: Method.POST, + subscription: '/createDocument', + secureHandler: async ({ req, res }) => { + const { file_path } = req.body; + const public_path = path.join(publicDirectory, file_path); + const file_name = path.basename(file_path); + + try { + // Read file data and convert to base64 + const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' }); - const directory = path.join(publicDirectory, files_directory); + const response = await axios.post( + 'http://localhost:8080/createDocument', + { + file_data, + file_name, + }, + { + headers: { + 'Content-Type': 'application/json', + }, + } + ); - if (!fs.existsSync(directory)) { - fs.mkdirSync(directory); + const jobId = response.data['job_id']; + console.log('Job ID:', jobId); + + res.send({ jobId }); + } catch (error: any) { + console.error('Error communicating with chatbot:', error); + res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); + } + }, + }); + + register({ + method: Method.GET, + subscription: '/getProgress/:jobId', + secureHandler: async ({ req, res }) => { + const { jobId } = req.params; + try { + const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); + console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`); + res.json(progressResponse.data); + } catch (error) { + console.error('Error getting progress:', error); + res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message }); } - const file = await openai.files.content(file_id); - const new_file_name = `${uuid.v4()}-${file_name}`; - const file_path = path.join(directory, new_file_name); - const file_array_buffer = await file.arrayBuffer(); - const bufferView = new Uint8Array(file_array_buffer); + }, + }); + + register({ + method: Method.GET, + subscription: '/getResult/:jobId', + secureHandler: async ({ req, res }) => { + const { jobId } = req.params; try { - const written_file = await writeFileAsync(file_path, bufferView); - console.log(written_file); - console.log(file_path); - console.log(file_array_buffer); - console.log(bufferView); - const file_object = new File([bufferView], file_name); - //DashUploadUtils.upload(file_object, 'openAIFiles'); - res.send({ file_path: path.join(files_directory, new_file_name) }); - /* res.send( { - source: "file", - result: { - accessPaths: { - agnostic: {client: path.join('/files/openAIFiles/', `${uuid.v4()}-${file_name}`)} - }, - rawText: "", - duration: 0, - }, - } ); */ + const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); + console.log('Result:', finalResponse.data); + const result = finalResponse.data; + + if (result.chunks && Array.isArray(result.chunks)) { + for (const chunk of result.chunks) { + if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { + let files_directory = '/files/chunk_images/'; + const directory = path.join(publicDirectory, files_directory); + + if (!fs.existsSync(directory)) { + fs.mkdirSync(directory); + } + + const fileName = path.basename(chunk.metadata.file_path); + const filePath = path.join(directory, fileName); + + // Check if base64_data exists + if (chunk.metadata.base64_data) { + // Decode Base64 and save as file + const buffer = Buffer.from(chunk.metadata.base64_data, 'base64'); + await fs.promises.writeFile(filePath, buffer); + + // Update the file path in the chunk + chunk.metadata.file_path = path.join(files_directory, fileName); + chunk.metadata.base64_data = undefined; + } else { + console.warn(`No base64_data found for chunk: ${fileName}`); + } + } + } + result['status'] = 'completed'; + } else { + console.warn('Not ready'); + result.status = 'pending'; + } + res.json(result); } catch (error) { - res.status(500).send({ error: 'Failed to write file' + error }); + console.error('Error getting progress:', error); + res.status(500).send({ error: 'Failed to get progress', details: error }); + } + }, + }); + + register({ + method: Method.POST, + subscription: '/formatChunks', + secureHandler: async ({ req, res }) => { + const { relevantChunks } = req.body; + const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }]; + + for (const chunk of relevantChunks) { + content.push({ + type: 'text', + text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type}>`, + }); + + if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { + try { + const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); + const imageBuffer = await readFileAsync(filePath); + const base64Image = imageBuffer.toString('base64'); + if (base64Image) { + content.push({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64Image}`, + }, + }); + } else { + console.log(`Failed to encode image for chunk ${chunk.id}`); + } + } catch (error) { + console.error(`Error reading image file for chunk ${chunk.id}:`, error); + } + } + content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` }); + } + + content.push({ type: 'text', text: '</chunks>' }); + + res.send({ formattedChunks: content }); + }, + }); + + register({ + method: Method.POST, + subscription: '/createCSV', + secureHandler: async ({ req, res }) => { + const { filename, data } = req.body; + + // Validate input + if (!filename || !data) { + res.status(400).send({ error: 'Filename and data fields are required.' }); + return; + } + + try { + // Generate a UUID for the file + const uuidv4 = uuid.v4(); + + // Construct the full filename with the UUID prefix + const fullFilename = `${uuidv4}-${filename}`; + + // Get the full server path where the file will be saved + const serverFilePath = serverPathToFile(Directory.csv, fullFilename); + + // Write the CSV data (which is a raw string) to the file + await writeFileAsync(serverFilePath, data, 'utf8'); + + // Construct the full client URL for accessing the file + const fileUrl = clientPathToFile(Directory.csv, fullFilename); + + // Return the file URL and UUID to the client + res.send({ fileUrl, id: uuidv4 }); + } catch (error: any) { + console.error('Error creating CSV file:', error); + res.status(500).send({ error: 'Failed to create CSV file.', details: error.message }); + } + }, + }); + + register({ + method: Method.POST, + subscription: '/chunkDocument', + secureHandler: async ({ req, res }) => { + const { file_path } = req.body; + const public_path = path.join(publicDirectory, file_path); + const file_name = path.basename(file_path); + + try { + // Read file data and convert to base64 + const file_data = await fs.promises.readFile(public_path); + + try { + const result = await unstructuredClient.general.partition({ + partitionParameters: { + files: { + content: file_data, + fileName: file_name, + }, + strategy: Strategy.Auto, + chunkingStrategy: ChunkingStrategy.ByTitle, + extractImageBlockTypes: ['Image', 'Table'], + }, + }); + + if (result.statusCode === 200) { + console.log(result.elements); + const jsonElements = JSON.stringify(result.elements, null, 2); + // Print the processed data. + console.log(jsonElements); + res.send({ document_json: jsonElements }); + } else { + console.error(`Unexpected status code: ${result.statusCode}`); + res.status(result.statusCode).send({ error: 'Failed to process the document', details: result }); + } + } catch (e: any) { + console.error('Error during partitioning:', e); + res.status(500).send({ error: 'Failed to partition the document', details: e.message }); + } + } catch (error: any) { + console.error('Error reading file:', error); + res.status(500).send({ error: 'Failed to read the file', details: error.message }); } }, }); diff --git a/src/server/RouteManager.ts b/src/server/RouteManager.ts index d8e0455f6..22e608868 100644 --- a/src/server/RouteManager.ts +++ b/src/server/RouteManager.ts @@ -8,6 +8,7 @@ import { DashUserModel } from './authentication/DashUserModel'; export enum Method { GET, POST, + PUT, } export interface CoreArguments { @@ -208,6 +209,9 @@ export default class RouteManager { case Method.POST: this.server.post(route, supervised); break; + case Method.PUT: + this.server.put(route, supervised); + break; default: } } |