From 4791cd23af08da70895204a3a7fbaf889d9af2d5 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 12:43:05 -0400 Subject: completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper) --- .../nodes/ChatBox/response_parsers/AnswerParser.ts | 125 --------------------- 1 file changed, 125 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts (limited to 'src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts') diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts deleted file mode 100644 index 79b53b0a3..000000000 --- a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; -import { v4 as uuid } from 'uuid'; - -export class AnswerParser { - static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { - const answerRegex = /([\s\S]*?)<\/answer>/; - const citationsRegex = /([\s\S]*?)<\/citations>/; - const citationRegex = /([\s\S]*?)<\/citation>/g; - const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; - const questionRegex = /(.*?)<\/question>/g; - const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; - const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; - const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; - - const answerMatch = answerRegex.exec(xml); - const citationsMatch = citationsRegex.exec(xml); - const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); - const loopSummaryMatch = loopSummaryRegex.exec(xml); - - if (!answerMatch) { - throw new Error('Invalid XML: Missing tag.'); - } - - let rawTextContent = answerMatch[1].trim(); - let content: AssistantMessage['content'] = []; - let citations: Citation[] = []; - let contentIndex = 0; - - // Remove citations and follow-up questions from rawTextContent - if (citationsMatch) { - rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); - } - if (followUpQuestionsMatch) { - rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); - } - if (loopSummaryMatch) { - rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); - } - - // Parse citations - let citationMatch; - const citationMap = new Map(); - if (citationsMatch) { - const citationsContent = citationsMatch[1]; - while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { - const [_, index, chunk_id, type, direct_text] = citationMatch; - const citation_id = uuid(); - citationMap.set(index, citation_id); - citations.push({ - direct_text: direct_text.trim(), - type: getChunkType(type), - chunk_id, - citation_id, - }); - } - } - - rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); - - // Parse text content (normal and grounded) - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text that is before the grounded text - if (match.index > lastIndex) { - const normalText = rawTextContent.slice(lastIndex, match.index).trim(); - if (normalText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - content.push({ - index: contentIndex++, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids, - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < rawTextContent.length) { - const remainingText = rawTextContent.slice(lastIndex).trim(); - if (remainingText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - - let followUpQuestions: string[] = []; - if (followUpQuestionsMatch) { - const questionsText = followUpQuestionsMatch[1]; - let questionMatch; - while ((questionMatch = questionRegex.exec(questionsText)) !== null) { - followUpQuestions.push(questionMatch[1].trim()); - } - } - - const assistantResponse: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content, - follow_up_questions: followUpQuestions, - citations, - processing_info: processingInfo, - loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, - }; - - return assistantResponse; - } -} -- cgit v1.2.3-70-g09d2