diff options
Diffstat (limited to 'src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts')
-rw-r--r-- | src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts new file mode 100644 index 000000000..ed78cc7cb --- /dev/null +++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts @@ -0,0 +1,134 @@ +/** + * @file AnswerParser.ts + * @description This file defines the AnswerParser class, which processes structured XML-like responses + * from the AI system, parsing grounded text, normal text, citations, follow-up questions, and loop summaries. + * The parser converts the XML response into an AssistantMessage format, extracting key information like + * citations and processing steps for further use in the assistant's workflow. + */ + +import { v4 as uuid } from 'uuid'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, ProcessingInfo, TEXT_TYPE, getChunkType } from '../types/types'; + +export class AnswerParser { + static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { + const answerRegex = /<answer>([\s\S]*?)<\/answer>/; + const citationsRegex = /<citations>([\s\S]*?)<\/citations>/; + const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g; + const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/; + const questionRegex = /<question>(.*?)<\/question>/g; + const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; + const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g; + const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/; + + const answerMatch = answerRegex.exec(xml); + const citationsMatch = citationsRegex.exec(xml); + const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + const loopSummaryMatch = loopSummaryRegex.exec(xml); + + if (!answerMatch) { + throw new Error('Invalid XML: Missing <answer> tag.'); + } + + let rawTextContent = answerMatch[1].trim(); + const content: AssistantMessage['content'] = []; + const citations: Citation[] = []; + let contentIndex = 0; + + // Remove citations and follow-up questions from rawTextContent + if (citationsMatch) { + rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); + } + if (followUpQuestionsMatch) { + rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); + } + if (loopSummaryMatch) { + rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); + } + + // Parse citations + let citationMatch; + const citationMap = new Map<string, string>(); + if (citationsMatch) { + const citationsContent = citationsMatch[1]; + while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id, + citation_id, + }); + } + } + + rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); + + // Parse text content (normal and grounded) + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text that is before the grounded text + if (match.index > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, match.index).trim(); + if (normalText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); + content.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids, + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < rawTextContent.length) { + const remainingText = rawTextContent.slice(lastIndex).trim(); + if (remainingText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + + const followUpQuestions: string[] = []; + if (followUpQuestionsMatch) { + const questionsText = followUpQuestionsMatch[1]; + let questionMatch; + while ((questionMatch = questionRegex.exec(questionsText)) !== null) { + followUpQuestions.push(questionMatch[1].trim()); + } + } + + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content, + follow_up_questions: followUpQuestions, + citations, + processing_info: processingInfo, + loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, + }; + + return assistantResponse; + } +} |