diff options
| author | Michael Foiani <sotech117@michaels-mbp-3.lan> | 2021-04-05 14:35:49 -0400 |
|---|---|---|
| committer | Michael Foiani <sotech117@michaels-mbp-3.lan> | 2021-04-05 14:35:49 -0400 |
| commit | afd767bf26c6853c36178e2fc0d091ba1b598fea (patch) | |
| tree | d88a6a7975bb145bc8b63f4c47ba3117c70f57c7 /src | |
| parent | f94cea08037850014f2dbd9c3e0c56fae7ed4536 (diff) | |
Added a basic xml parser that deals with local files. TODO: add a url one. Also, added some tests to ensure it's ok. Needs some edge case testing still.
Diffstat (limited to 'src')
5 files changed, 167 insertions, 0 deletions
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java new file mode 100644 index 0000000..27c3988 --- /dev/null +++ b/src/main/java/edu/brown/cs/student/term/parsing/LocalXmlParser.java @@ -0,0 +1,38 @@ +package edu.brown.cs.student.term.parsing; + +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import java.io.File; +import java.io.IOException; + +public class LocalXmlParser extends XmlParser { + public LocalXmlParser() { + super(); + } + + /** + * Method used to parse the xml file. + * + * @param pathToXml The path to the xml text file. + * @return The tree structure parsed as an xml doc. + */ + @Override + public Document parse(String pathToXml) { + // TODO: change to online hosted file option + // Creating the file reference. + System.err.println("LOG: To make file reference for " + pathToXml + " in " + getClass()); + File file = new File(pathToXml); + + // Parsing the file. + try { + System.err.println("LOG: Calling builder.parse() in " + getClass()); + return builder.parse(file); + } catch (SAXException e) { + System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass()); + } catch (IOException e) { + System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass()); + } + return null; + } +} diff --git a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java new file mode 100644 index 0000000..adad835 --- /dev/null +++ b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java @@ -0,0 +1,2 @@ +package edu.brown.cs.student.term.parsing;public class UrlXmlParser { +} diff --git a/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java new file mode 100644 index 0000000..d8182d6 --- /dev/null +++ b/src/main/java/edu/brown/cs/student/term/parsing/XmlParser.java @@ -0,0 +1,37 @@ +package edu.brown.cs.student.term.parsing; + +import org.w3c.dom.Document; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +public abstract class XmlParser { + protected DocumentBuilder builder = null; + + /** + * This constructor crates and saves the builder that turns the xml text into a tree stricture. + */ + protected XmlParser() { + // Builds the immutable factory + System.err.println("LOG: Constructor of " + getClass() + ". To make XML parser factory."); + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setValidating(true); + factory.setIgnoringElementContentWhitespace(true); + + // Creates the builder from the factory + try { + System.err.println("LOG: To make documentBuilder in " + getClass()); + builder = factory.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + System.err.println("INTERNAL: " + getClass() + " : " + e.getClass()); + } + } + + /** + * Method used to parse the xml file. + * @param pathToXml The path to the xml text file. + * @return The tree structure parsed as an xml doc. + */ + public abstract Document parse(String pathToXml); +} diff --git a/src/test/java/edu/brown/cs/student/TradeTest.java b/src/test/java/edu/brown/cs/student/TradeTest.java new file mode 100644 index 0000000..fb9a2ea --- /dev/null +++ b/src/test/java/edu/brown/cs/student/TradeTest.java @@ -0,0 +1,2 @@ +package edu.brown.cs.student;public class TradeTest { +} diff --git a/src/test/java/edu/brown/cs/student/XmlParserTest.java b/src/test/java/edu/brown/cs/student/XmlParserTest.java new file mode 100644 index 0000000..d3bc4ff --- /dev/null +++ b/src/test/java/edu/brown/cs/student/XmlParserTest.java @@ -0,0 +1,88 @@ +package edu.brown.cs.student; + +import edu.brown.cs.student.term.parsing.LocalXmlParser; +import edu.brown.cs.student.term.parsing.UrlXmlParser; +import edu.brown.cs.student.term.parsing.XmlParser; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.print.Doc; + +import static org.junit.Assert.*; + +public class XmlParserTest { + private XmlParser _localXmlParser, _urlXmlParser; + private Document _doc; + + @Before + public void setUp() { + _localXmlParser = new LocalXmlParser(); + _urlXmlParser = new UrlXmlParser(); + } + + @After + public void tearDown() { + _localXmlParser = null; + _urlXmlParser = null; + } + + @Test + public void parsesLocal(){ + setUp(); + Document doc = _localXmlParser.parse("data/xml_trade_test.xml"); + assertNotNull(doc); + + // Id of person + assertEquals(getIdFromDoc(doc), "0001561844"); + tearDown(); + } + + @Test + public void parsesUrl(){ + setUp(); + Document doc = + _urlXmlParser.parse("https://www.sec.gov/Archives/edgar/data/1517006/000110465921046242/tm2112036-4_4seq1.xml"); + assertNotNull(doc); + + // Id of person + assertEquals(getIdFromDoc(doc), "0001561844"); + tearDown(); + } + + public String getIdFromDoc(Document doc) { + // Id of person + NodeList idNode = doc.getElementsByTagName("rptOwnerCik"); + assertEquals(idNode.getLength(), 1); + return idNode.item(0).getTextContent(); + } + + @Test + public void urlSameAsLocal(){ + setUp(); + Document local = _localXmlParser.parse("data/xml_trade_test.xml"); + Document url = + _urlXmlParser.parse("https://www.sec.gov/Archives/edgar/data/1517006/000110465921046242/tm2112036-4_4seq1.xml"); + + assertEquals(getIdFromDoc(local), getIdFromDoc(url)); + tearDown(); + } + + @Test + public void noFileExists(){ + setUp(); + tearDown(); + } + + @Test + public void badXmlFormat(){ + setUp(); + Document doc = _localXmlParser.parse("data/bad.xml"); + assertNull(doc); + tearDown(); + } +} |
