diff options
| author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-09-07 12:43:05 -0400 |
|---|---|---|
| committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-09-07 12:43:05 -0400 |
| commit | 4791cd23af08da70895204a3a7fbaf889d9af2d5 (patch) | |
| tree | c4c2534e64724d62bae9152763f1a74cd5a963e0 /src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts | |
| parent | 210f8f5f1cd19e9416a12524cce119b273334fd3 (diff) | |
completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper)
Diffstat (limited to 'src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts')
| -rw-r--r-- | src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts | 125 |
1 files changed, 0 insertions, 125 deletions
diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts deleted file mode 100644 index 79b53b0a3..000000000 --- a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; -import { v4 as uuid } from 'uuid'; - -export class AnswerParser { - static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { - const answerRegex = /<answer>([\s\S]*?)<\/answer>/; - const citationsRegex = /<citations>([\s\S]*?)<\/citations>/; - const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g; - const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/; - const questionRegex = /<question>(.*?)<\/question>/g; - const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; - const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g; - const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/; - - const answerMatch = answerRegex.exec(xml); - const citationsMatch = citationsRegex.exec(xml); - const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); - const loopSummaryMatch = loopSummaryRegex.exec(xml); - - if (!answerMatch) { - throw new Error('Invalid XML: Missing <answer> tag.'); - } - - let rawTextContent = answerMatch[1].trim(); - let content: AssistantMessage['content'] = []; - let citations: Citation[] = []; - let contentIndex = 0; - - // Remove citations and follow-up questions from rawTextContent - if (citationsMatch) { - rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); - } - if (followUpQuestionsMatch) { - rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); - } - if (loopSummaryMatch) { - rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); - } - - // Parse citations - let citationMatch; - const citationMap = new Map<string, string>(); - if (citationsMatch) { - const citationsContent = citationsMatch[1]; - while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { - const [_, index, chunk_id, type, direct_text] = citationMatch; - const citation_id = uuid(); - citationMap.set(index, citation_id); - citations.push({ - direct_text: direct_text.trim(), - type: getChunkType(type), - chunk_id, - citation_id, - }); - } - } - - rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); - - // Parse text content (normal and grounded) - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text that is before the grounded text - if (match.index > lastIndex) { - const normalText = rawTextContent.slice(lastIndex, match.index).trim(); - if (normalText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - content.push({ - index: contentIndex++, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids, - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < rawTextContent.length) { - const remainingText = rawTextContent.slice(lastIndex).trim(); - if (remainingText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - - let followUpQuestions: string[] = []; - if (followUpQuestionsMatch) { - const questionsText = followUpQuestionsMatch[1]; - let questionMatch; - while ((questionMatch = questionRegex.exec(questionsText)) !== null) { - followUpQuestions.push(questionMatch[1].trim()); - } - } - - const assistantResponse: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content, - follow_up_questions: followUpQuestions, - citations, - processing_info: processingInfo, - loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, - }; - - return assistantResponse; - } -} |
