aboutsummaryrefslogtreecommitdiff
path: root/src/scraping
diff options
context:
space:
mode:
authorbob <bcz@cs.brown.edu>2020-02-12 14:54:20 -0500
committerbob <bcz@cs.brown.edu>2020-02-12 14:54:20 -0500
commit2d6e00f48f7c2e6b5a8f512a6427ba56b1b216b4 (patch)
tree0812859d6c2c6cb897f148588eb4bb2a198be11b /src/scraping
parent30f42e403bd8cca4d20fc1b0d1cf28fbfa3a1524 (diff)
parent2d76d891cb6a656f3c404d17ffa1cac05e08d115 (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web
Diffstat (limited to 'src/scraping')
-rw-r--r--src/scraping/buxton/final/BuxtonImporter.ts9
1 files changed, 4 insertions, 5 deletions
diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts
index d9d48d68c..f9726872c 100644
--- a/src/scraping/buxton/final/BuxtonImporter.ts
+++ b/src/scraping/buxton/final/BuxtonImporter.ts
@@ -115,7 +115,7 @@ const RegexMap = new Map<keyof DeviceDocument, Processor<any>>([
}],
["year", {
exp: /Year:\s+([^\|]*)\s+\|/,
- transformer: Utilities.numberValue
+ transformer: (raw: string) => Utilities.numberValue(/[0-9]{4}/.exec(raw)![0])
}],
["primaryKey", {
exp: /Primary:\s+(.*)(Secondary|Additional):/,
@@ -254,10 +254,9 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont
strictEqual(length % 3 === 0, true, "Improper caption formatting.");
for (let i = 3; i < captionTargets.length; i += 3) {
- const [image, fileName, caption] = captionTargets.slice(i, i + 3);
- strictEqual(image, "", `The image cell in one row was not the empty string: ${image}`);
- captions.push(caption);
- embeddedFileNames.push(fileName);
+ const row = captionTargets.slice(i, i + 3);
+ captions.push(row[1]);
+ embeddedFileNames.push(row[2]);
}
// extract all hyperlinks embedded in the document