1 files changed, 134 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
new file mode 100644
index 000000000..ed78cc7cb
--- /dev/null
+++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
@@ -0,0 +1,134 @@
+/**
+ * @file AnswerParser.ts
+ * @description This file defines the AnswerParser class, which processes structured XML-like responses
+ * from the AI system, parsing grounded text, normal text, citations, follow-up questions, and loop summaries.
+ * The parser converts the XML response into an AssistantMessage format, extracting key information like
+ * citations and processing steps for further use in the assistant's workflow.
+ */
+
+import { v4 as uuid } from 'uuid';
+import { ASSISTANT_ROLE, AssistantMessage, Citation, ProcessingInfo, TEXT_TYPE, getChunkType } from '../types/types';
+
+export class AnswerParser {
+    static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage {
+        const answerRegex = /<answer>([\s\S]*?)<\/answer>/;
+        const citationsRegex = /<citations>([\s\S]*?)<\/citations>/;
+        const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g;
+        const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/;
+        const questionRegex = /<question>(.*?)<\/question>/g;
+        const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g;
+        const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g;
+        const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/;
+
+        const answerMatch = answerRegex.exec(xml);
+        const citationsMatch = citationsRegex.exec(xml);
+        const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml);
+        const loopSummaryMatch = loopSummaryRegex.exec(xml);
+
+        if (!answerMatch) {
+            throw new Error('Invalid XML: Missing <answer> tag.');
+        }
+
+        let rawTextContent = answerMatch[1].trim();
+        const content: AssistantMessage['content'] = [];
+        const citations: Citation[] = [];
+        let contentIndex = 0;
+
+        // Remove citations and follow-up questions from rawTextContent
+        if (citationsMatch) {
+            rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim();
+        }
+        if (followUpQuestionsMatch) {
+            rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim();
+        }
+        if (loopSummaryMatch) {
+            rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim();
+        }
+
+        // Parse citations
+        let citationMatch;
+        const citationMap = new Map<string, string>();
+        if (citationsMatch) {
+            const citationsContent = citationsMatch[1];
+            while ((citationMatch = citationRegex.exec(citationsContent)) !== null) {
+                // eslint-disable-next-line @typescript-eslint/no-unused-vars
+                const [_, index, chunk_id, type, direct_text] = citationMatch;
+                const citation_id = uuid();
+                citationMap.set(index, citation_id);
+                citations.push({
+                    direct_text: direct_text.trim(),
+                    type: getChunkType(type),
+                    chunk_id,
+                    citation_id,
+                });
+            }
+        }
+
+        rawTextContent = rawTextContent.replace(normalTextRegex, '$1');
+
+        // Parse text content (normal and grounded)
+        let lastIndex = 0;
+        let match;
+
+        while ((match = groundedTextRegex.exec(rawTextContent)) !== null) {
+            const [fullMatch, citationIndex, groundedText] = match;
+
+            // Add normal text that is before the grounded text
+            if (match.index > lastIndex) {
+                const normalText = rawTextContent.slice(lastIndex, match.index).trim();
+                if (normalText) {
+                    content.push({
+                        index: contentIndex++,
+                        type: TEXT_TYPE.NORMAL,
+                        text: normalText,
+                        citation_ids: null,
+                    });
+                }
+            }
+
+            // Add grounded text
+            const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || '');
+            content.push({
+                index: contentIndex++,
+                type: TEXT_TYPE.GROUNDED,
+                text: groundedText.trim(),
+                citation_ids,
+            });
+
+            lastIndex = match.index + fullMatch.length;
+        }
+
+        // Add any remaining normal text after the last grounded text
+        if (lastIndex < rawTextContent.length) {
+            const remainingText = rawTextContent.slice(lastIndex).trim();
+            if (remainingText) {
+                content.push({
+                    index: contentIndex++,
+                    type: TEXT_TYPE.NORMAL,
+                    text: remainingText,
+                    citation_ids: null,
+                });
+            }
+        }
+
+        const followUpQuestions: string[] = [];
+        if (followUpQuestionsMatch) {
+            const questionsText = followUpQuestionsMatch[1];
+            let questionMatch;
+            while ((questionMatch = questionRegex.exec(questionsText)) !== null) {
+                followUpQuestions.push(questionMatch[1].trim());
+            }
+        }
+
+        const assistantResponse: AssistantMessage = {
+            role: ASSISTANT_ROLE.ASSISTANT,
+            content,
+            follow_up_questions: followUpQuestions,
+            citations,
+            processing_info: processingInfo,
+            loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined,
+        };
+
+        return assistantResponse;
+    }
+}