aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Foiani <sotech117@michaels-mbp-3.lan>2021-04-05 14:35:49 -0400
committerMichael Foiani <sotech117@michaels-mbp-3.lan>2021-04-05 14:35:49 -0400
commitafd767bf26c6853c36178e2fc0d091ba1b598fea (patch)
treed88a6a7975bb145bc8b63f4c47ba3117c70f57c7 /src
parentf94cea08037850014f2dbd9c3e0c56fae7ed4536 (diff)
Added a basic xml parser that deals with local files. TODO: add a url one. Also, added some tests to ensure it's ok. Needs some edge case testing still.
Diffstat (limited to 'src')
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java38
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java2
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java37
-rw-r--r--src/test/java/edu/brown/cs/student/TradeTest.java2
-rw-r--r--src/test/java/edu/brown/cs/student/XmlParserTest.java88
5 files changed, 167 insertions, 0 deletions
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java
new file mode 100644
index 0000000..27c3988
--- /dev/null
+++ b/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java
@@ -0,0 +1,38 @@
+package edu.brown.cs.student.term.parsing;
+
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+import java.io.File;
+import java.io.IOException;
+
+public class LocalXmlParser extends XmlParser {
+ public LocalXmlParser() {
+ super();
+ }
+
+ /**
+ * Method used to parse the xml file.
+ *
+ * @param pathToXml The path to the xml text file.
+ * @return The tree structure parsed as an xml doc.
+ */
+ @Override
+ public Document parse(String pathToXml) {
+ // TODO: change to online hosted file option
+ // Creating the file reference.
+ System.err.println("LOG: To make file reference for " + pathToXml + " in " + getClass());
+ File file = new File(pathToXml);
+
+ // Parsing the file.
+ try {
+ System.err.println("LOG: Calling builder.parse() in " + getClass());
+ return builder.parse(file);
+ } catch (SAXException e) {
+ System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass());
+ } catch (IOException e) {
+ System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass());
+ }
+ return null;
+ }
+}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java
new file mode 100644
index 0000000..adad835
--- /dev/null
+++ b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java
@@ -0,0 +1,2 @@
+package edu.brown.cs.student.term.parsing;public class UrlXmlParser {
+}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java
new file mode 100644
index 0000000..d8182d6
--- /dev/null
+++ b/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java
@@ -0,0 +1,37 @@
+package edu.brown.cs.student.term.parsing;
+
+import org.w3c.dom.Document;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+public abstract class XmlParser {
+ protected DocumentBuilder builder = null;
+
+ /**
+ * This constructor crates and saves the builder that turns the xml text into a tree stricture.
+ */
+ protected XmlParser() {
+ // Builds the immutable factory
+ System.err.println("LOG: Constructor of " + getClass() + ". To make XML parser factory.");
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ factory.setValidating(true);
+ factory.setIgnoringElementContentWhitespace(true);
+
+ // Creates the builder from the factory
+ try {
+ System.err.println("LOG: To make documentBuilder in " + getClass());
+ builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ System.err.println("INTERNAL: " + getClass() + " : " + e.getClass());
+ }
+ }
+
+ /**
+ * Method used to parse the xml file.
+ * @param pathToXml The path to the xml text file.
+ * @return The tree structure parsed as an xml doc.
+ */
+ public abstract Document parse(String pathToXml);
+}
diff --git a/src/test/java/edu/brown/cs/student/TradeTest.java b/src/test/java/edu/brown/cs/student/TradeTest.java
new file mode 100644
index 0000000..fb9a2ea
--- /dev/null
+++ b/src/test/java/edu/brown/cs/student/TradeTest.java
@@ -0,0 +1,2 @@
+package edu.brown.cs.student;public class TradeTest {
+}
diff --git a/src/test/java/edu/brown/cs/student/XmlParserTest.java b/src/test/java/edu/brown/cs/student/XmlParserTest.java
new file mode 100644
index 0000000..d3bc4ff
--- /dev/null
+++ b/src/test/java/edu/brown/cs/student/XmlParserTest.java
@@ -0,0 +1,88 @@
+package edu.brown.cs.student;
+
+import edu.brown.cs.student.term.parsing.LocalXmlParser;
+import edu.brown.cs.student.term.parsing.UrlXmlParser;
+import edu.brown.cs.student.term.parsing.XmlParser;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import javax.print.Doc;
+
+import static org.junit.Assert.*;
+
+public class XmlParserTest {
+ private XmlParser _localXmlParser, _urlXmlParser;
+ private Document _doc;
+
+ @Before
+ public void setUp() {
+ _localXmlParser = new LocalXmlParser();
+ _urlXmlParser = new UrlXmlParser();
+ }
+
+ @After
+ public void tearDown() {
+ _localXmlParser = null;
+ _urlXmlParser = null;
+ }
+
+ @Test
+ public void parsesLocal(){
+ setUp();
+ Document doc = _localXmlParser.parse("data/xml_trade_test.xml");
+ assertNotNull(doc);
+
+ // Id of person
+ assertEquals(getIdFromDoc(doc), "0001561844");
+ tearDown();
+ }
+
+ @Test
+ public void parsesUrl(){
+ setUp();
+ Document doc =
+ _urlXmlParser.parse("https://www.sec.gov/Archives/edgar/data/1517006/000110465921046242/tm2112036-4_4seq1.xml");
+ assertNotNull(doc);
+
+ // Id of person
+ assertEquals(getIdFromDoc(doc), "0001561844");
+ tearDown();
+ }
+
+ public String getIdFromDoc(Document doc) {
+ // Id of person
+ NodeList idNode = doc.getElementsByTagName("rptOwnerCik");
+ assertEquals(idNode.getLength(), 1);
+ return idNode.item(0).getTextContent();
+ }
+
+ @Test
+ public void urlSameAsLocal(){
+ setUp();
+ Document local = _localXmlParser.parse("data/xml_trade_test.xml");
+ Document url =
+ _urlXmlParser.parse("https://www.sec.gov/Archives/edgar/data/1517006/000110465921046242/tm2112036-4_4seq1.xml");
+
+ assertEquals(getIdFromDoc(local), getIdFromDoc(url));
+ tearDown();
+ }
+
+ @Test
+ public void noFileExists(){
+ setUp();
+ tearDown();
+ }
+
+ @Test
+ public void badXmlFormat(){
+ setUp();
+ Document doc = _localXmlParser.parse("data/bad.xml");
+ assertNull(doc);
+ tearDown();
+ }
+}