From c390449ce566249b0f947c60da89bc694672e647 Mon Sep 17 00:00:00 2001
From: Sam Wilkins <samwilkins333@gmail.com>
Date: Sun, 2 Feb 2020 09:58:07 -0500
Subject: restructuring buxton source

---
 src/scraping/buxton/scraper.py | 414 +++++++++++------------------------------
 1 file changed, 111 insertions(+), 303 deletions(-)

(limited to 'src/scraping/buxton/scraper.py')

diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index ec9c3f72c..394958823 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -1,37 +1,32 @@
 import os
-from shutil import copyfile
 import docx2txt
 from docx import Document
 from docx.opc.constants import RELATIONSHIP_TYPE as RT
 import re
-from pymongo import MongoClient
 import shutil
 import uuid
-import datetime
+import json
+import base64
+from shutil import copyfile
 from PIL import Image
-import math
-import sys
-
-source = "./source"
-filesPath = "../../server/public/files"
-image_dist = filesPath + "/images/buxton"
 
-db = MongoClient("localhost", 27017)["Dash"]
-target_collection = db.newDocuments
-target_doc_title = "Collection 1"
-schema_guids = []
-common_proto_id = ""
+files_path = "../../server/public/files"
+source_path = "./source"
+temp_images_path = "./extracted_images"
+server_images_path = f"{files_path}/images/buxton"
+json_path = "./json"
 
 
-def extract_links(fileName):
+# noinspection PyProtectedMember
+def extract_links(file):
     links = []
-    doc = Document(fileName)
+    doc = Document(file)
     rels = doc.part.rels
     for rel in rels:
         item = rels[rel]
         if item.reltype == RT.HYPERLINK and ".aspx" not in item._target:
             links.append(item._target)
-    return text_doc_map(links)
+    return links
 
 
 def extract_value(kv_string):
@@ -51,233 +46,72 @@ def guid():
     return str(uuid.uuid4())
 
 
-def listify(list):
-    return {
-        "fields": list,
-        "__type": "list"
-    }
-
-
-def protofy(fieldId):
-    return {
-        "fieldId": fieldId,
-        "__type": "proxy"
-    }
-
-
-def text_doc_map(string_list):
-    def guid_map(caption):
-        return write_text_doc(caption)
-    return listify(proxify_guids(list(map(guid_map, string_list))))
-
-
-def write_collection(parse_results, display_fields, storage_key, viewType):
-    view_guids = parse_results["child_guids"]
-
-    data_doc = parse_results["schema"]
-    fields = data_doc["fields"]
-
-    view_doc_guid = guid()
-
-    view_doc = {
-        "_id": view_doc_guid,
-        "fields": {
-            "proto": protofy(data_doc["_id"]),
-            "x": 10,
-            "y": 10,
-            "_width": 900,
-            "_height": 600,
-            "_panX": 0,
-            "_panY": 0,
-            "zIndex": 2,
-            "libraryBrush": False,
-            "_viewType": viewType,
-            "_LODdisable": True
-        },
-        "__type": "Doc"
-    }
-
-    fields["proto"] = protofy(common_proto_id)
-    fields[storage_key] = listify(proxify_guids(view_guids))
-    fields["schemaColumns"] = listify(display_fields)
-    fields["author"] = "Bill Buxton"
-    fields["creationDate"] = {
-        "date": datetime.datetime.utcnow().microsecond,
-        "__type": "date"
-    }
-    if "image_urls" in parse_results:
-        fields["hero"] = {
-            "url": parse_results["image_urls"][0],
-            "__type": "image"
-        }
-    fields["isPrototype"] = True
-
-    target_collection.insert_one(data_doc)
-    target_collection.insert_one(view_doc)
-
-    data_doc_guid = data_doc["_id"]
-    print(f"inserted view document ({view_doc_guid})")
-    print(f"inserted data document ({data_doc_guid})\n")
-
-    return view_doc_guid
-
-
-def write_text_doc(content):
-    data_doc_guid = guid()
-    view_doc_guid = guid()
-
-    view_doc = {
-        "_id": view_doc_guid,
-        "fields": {
-            "proto": protofy(data_doc_guid),
-            "x": 10,
-            "y": 10,
-            "_width": 400,
-            "zIndex": 2
-        },
-        "__type": "Doc"
-    }
-
-    data_doc = {
-        "_id": data_doc_guid,
-        "fields": {
-            "proto": protofy("textProto"),
-            "data": {
-                "Data": '{"doc":{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"' + content + '"}]}]},"selection":{"type":"text","anchor":1,"head":1}' + '}',
-                "__type": "RichTextField"
-            },
-            "title": content,
-            "_nativeWidth": 200,
-            "author": "Bill Buxton",
-            "creationDate": {
-                "date": datetime.datetime.utcnow().microsecond,
-                "__type": "date"
-            },
-            "isPrototype": True,
-            "_autoHeight": True,
-            "page": -1,
-            "_nativeHeight": 200,
-            "_height": 200,
-            "data_text": content
-        },
-        "__type": "Doc"
-    }
-
-    target_collection.insert_one(view_doc)
-    target_collection.insert_one(data_doc)
-
-    return view_doc_guid
-
-
-def write_image(folder, name):
-    path = f"http://localhost:1050/files/images/buxton/{folder}/{name}"
-
-    data_doc_guid = guid()
-    view_doc_guid = guid()
-
-    image = Image.open(f"{image_dist}/{folder}/{name}")
-    native_width, native_height = image.size
-
-    if abs(native_width - native_height) < 10:
-        return None
-
-    view_doc = {
-        "_id": view_doc_guid,
-        "fields": {
-            "proto": protofy(data_doc_guid),
-            "x": 10,
-            "y": 10,
-            "_width": min(800, native_width),
-            "zIndex": 2,
-            "widthUnit": "*",
-            "widthMagnitude": 1
-        },
-        "__type": "Doc"
-    }
-
-    data_doc = {
-        "_id": data_doc_guid,
-        "fields": {
-            "proto": protofy("imageProto"),
-            "data": {
-                "url": path,
-                "__type": "image"
-            },
-            "title": name,
-            "_nativeWidth": native_width,
-            "author": "Bill Buxton",
-            "creationDate": {
-                "date": datetime.datetime.utcnow().microsecond,
-                "__type": "date"
-            },
-            "isPrototype": True,
-            "page": -1,
-            "_nativeHeight": native_height,
-            "_height": native_height
-        },
-        "__type": "Doc"
-    }
-
-    target_collection.insert_one(view_doc)
-    target_collection.insert_one(data_doc)
-
-    return {
-        "layout_id": view_doc_guid,
-        "url": path
-    }
-
-
-def parse_document(file_name: str):
-    print(f"parsing {file_name}...")
-    pure_name = file_name.split(".")[0]
+def encode_image(folder: str, name: str):
+    with open(f"{temp_images_path}/{folder}/{name}", "rb") as image:
+        encoded = base64.b64encode(image.read())
+        return encoded.decode("utf-8")
+
+
+def parse_document(name: str):
+    print(f"parsing {name}...")
+    pure_name = name.split(".")[0]
 
     result = {}
 
-    dir_path = image_dist + "/" + pure_name
-    print(dir_path)
-    mkdir_if_absent(dir_path)
-
-    raw = str(docx2txt.process(source + "/" + file_name, dir_path))
-
-    urls = []
-    view_guids = []
-    count = 0
-    for image in os.listdir(dir_path):
-        created = write_image(pure_name, image)
-        if created != None:
-            urls.append(created["url"])
-            view_guids.append(created["layout_id"])
-            count += 1
-            resolved = dir_path + "/" + image
-            original = dir_path + "/" + image.replace(".", "_o.", 1)
-            medium = dir_path + "/" + image.replace(".", "_m.", 1)
-            copyfile(resolved, original)
-            copyfile(resolved, medium)
-    print(f"extracted {count} images...")
+    saved_device_images_dir = server_images_path + "/" + pure_name
+    temp_device_images_dir = temp_images_path + "/" + pure_name
+    mkdir_if_absent(temp_device_images_dir)
+    mkdir_if_absent(saved_device_images_dir)
+
+    raw = str(docx2txt.process(source_path +
+                               "/" + name, temp_device_images_dir))
+
+    extracted_images = []
+    for image in os.listdir(temp_device_images_dir):
+        temp = f"{temp_device_images_dir}/{image}"
+        native_width, native_height = Image.open(temp).size
+        if abs(native_width - native_height) < 10:
+            continue
+        original = saved_device_images_dir + "/" + image.replace(".", "_o.", 1)
+        medium = saved_device_images_dir + "/" + image.replace(".", "_m.", 1)
+        copyfile(temp, original)
+        copyfile(temp, medium)
+        server_path = f"http://localhost:1050/files/images/buxton/{pure_name}/{image}"
+        extracted_images.append(server_path)
+    result["extracted_images"] = extracted_images
 
     def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace(
         u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''').strip()
 
-    def sanitize_price(raw: str):
-        raw = raw.replace(",", "")
-        start = raw.find("$")
+    def sanitize_price(raw_price: str):
+        raw_price = raw_price.replace(",", "")
+        start = raw_price.find("$")
+        if "x" in raw_price.lower():
+            return None
         if start > -1:
             i = start + 1
-            while (i < len(raw) and re.match(r"[0-9\.]", raw[i])):
+            while i < len(raw_price) and re.match(r"[0-9.]", raw_price[i]):
                 i += 1
-            price = raw[start + 1: i + 1]
+            price = raw_price[start + 1: i + 1]
             return float(price)
-        elif (raw.lower().find("nfs")):
+        elif raw_price.lower().find("nfs"):
             return -1
         else:
-            return math.nan
+            return None
 
     def remove_empty(line): return len(line) > 1
 
+    def try_parse(to_parse: int):
+        value: int
+        try:
+            value = int(to_parse)
+        except ValueError:
+            value = None
+        return value
+
     lines = list(map(sanitize, raw.split("\n")))
     lines = list(filter(remove_empty, lines))
 
-    result["file_name"] = file_name
     result["title"] = lines[2].strip()
     result["short_description"] = lines[3].strip().replace(
         "Short Description: ", "")
@@ -293,13 +127,15 @@ def parse_document(file_name: str):
     clean = list(
         map(lambda data: data.strip().split(":"), lines[cur].split("|")))
     result["company"] = clean[0][len(clean[0]) - 1].strip()
-    result["year"] = clean[1][len(clean[1]) - 1].strip()
+
+    result["year"] = try_parse(clean[1][len(clean[1]) - 1].strip())
     result["original_price"] = sanitize_price(
         clean[2][len(clean[2]) - 1].strip())
 
     cur += 1
-    result["degrees_of_freedom"] = extract_value(
-        lines[cur]).replace("NA", "N/A")
+
+    result["degrees_of_freedom"] = try_parse(extract_value(
+        lines[cur]).replace("NA", "N/A"))
     cur += 1
 
     dimensions = lines[cur].lower()
@@ -325,99 +161,71 @@ def parse_document(file_name: str):
     cur += 1
     link_descriptions = []
     while lines[cur] != "Image":
-        link_descriptions.append(lines[cur].strip())
+        description = lines[cur].strip().lower()
+        valid = True
+        for ignored in ["powerpoint", "vimeo", "xxx"]:
+            if ignored in description:
+                valid = False
+                break
+        if valid:
+            link_descriptions.append(description)
         cur += 1
-    result["link_descriptions"] = text_doc_map(link_descriptions)
+    result["link_descriptions"] = link_descriptions
 
-    result["hyperlinks"] = extract_links(source + "/" + file_name)
+    result["hyperlinks"] = extract_links(source_path + "/" + name)
 
     images = []
     captions = []
     cur += 3
     while cur + 1 < len(lines) and lines[cur] != "NOTES:":
-        images.append(lines[cur])
-        captions.append(lines[cur + 1])
+        name = lines[cur]
+        if "full document" not in name.lower():
+            images.append(name)
+            captions.append(lines[cur + 1])
         cur += 2
-    result["images"] = listify(images)
+    result["table_image_names"] = images
 
-    result["captions"] = text_doc_map(captions)
+    result["captions"] = captions
 
     notes = []
-    if (cur < len(lines) and lines[cur] == "NOTES:"):
+    if cur < len(lines) and lines[cur] == "NOTES:":
         cur += 1
         while cur < len(lines):
             notes.append(lines[cur])
             cur += 1
     if len(notes) > 0:
-        result["notes"] = listify(notes)
-
-    print("writing child schema...")
-
-    return {
-        "schema": {
-            "_id": guid(),
-            "fields": result,
-            "__type": "Doc"
-        },
-        "child_guids": view_guids,
-        "image_urls": urls
-    }
-
-
-def proxify_guids(guids):
-    return list(map(lambda guid: {"fieldId": guid, "__type": "prefetch_proxy"}, guids))
-
-
-def write_common_proto():
-    id = guid()
-    common_proto = {
-        "_id": id,
-        "fields": {
-            "proto": protofy("collectionProto"),
-            "title": "The Buxton Collection",
-        },
-        "__type": "Doc"
-    }
-    target_collection.insert_one(common_proto)
-    return id
-
-
-if os.path.exists(image_dist):
-    shutil.rmtree(image_dist)
-while os.path.exists(image_dist):
+        result["notes"] = notes
+
+    return result
+
+
+if os.path.exists(server_images_path):
+    shutil.rmtree(server_images_path)
+while os.path.exists(server_images_path):
     pass
-os.mkdir(image_dist)
-mkdir_if_absent(source)
+os.mkdir(server_images_path)
 
-common_proto_id = write_common_proto()
+mkdir_if_absent(source_path)
+mkdir_if_absent(json_path)
+mkdir_if_absent(temp_images_path)
+
+results = []
 
 candidates = 0
-for file_name in os.listdir(source):
-    if file_name.endswith('.docx'):
+for file_name in os.listdir(source_path):
+    if file_name.endswith('.docx') or file_name.endswith(".doc"):
         candidates += 1
-        schema_guids.append(write_collection(
-            parse_document(file_name), ["title", "data"], "data", 5))
-
-print("writing parent schema...")
-parent_guid = write_collection({
-    "schema": {
-        "_id": guid(),
-        "fields": {},
-        "__type": "Doc"
-    },
-    "child_guids": schema_guids
-}, ["title", "short_description", "original_price"], "data", 2)
-
-print("appending parent schema to main workspace...\n")
-target_collection.update_one(
-    {"fields.title": target_doc_title},
-    {"$push": {"fields.data.fields": {"fieldId": parent_guid, "__type": "proxy"}}}
-)
-
-print("rewriting .gitignore...\n")
-lines = ['*', '!.gitignore']
-with open(filesPath + "/.gitignore", 'w') as f:
-    f.write('\n'.join(lines))
-
-suffix = "" if candidates == 1 else "s"
-print(f"conversion complete. {candidates} candidate{suffix} processed.")
+        results.append(parse_document(file_name))
+
+
+with open(f"./json/buxton_collection.json", "w", encoding="utf-8") as out:
+    json.dump(results, out, ensure_ascii=False, indent=4)
+
+print(f"\nSuccessfully parsed {candidates} candidates.")
+
+print("\nrewriting .gitignore...")
+entries = ['*', '!.gitignore']
+with open(files_path + "/.gitignore", 'w') as f:
+    f.write('\n'.join(entries))
+
+shutil.rmtree(temp_images_path)
-- 
cgit v1.2.3-70-g09d2


From 90d6454c05cdeb109da25dd55d428c140defca49 Mon Sep 17 00:00:00 2001
From: Sam Wilkins <samwilkins333@gmail.com>
Date: Sun, 2 Feb 2020 12:46:57 -0500
Subject: fixed scraper

---
 src/scraping/buxton/.idea/workspace.xml            |  46 ++-
 src/scraping/buxton/narratives.py                  |  38 ++
 .../buxton/narratives/Theme - Chord Kbds.docx      | Bin 0 -> 5701815 bytes
 .../buxton/narratives/chord_keyboards.json         |  39 ++
 src/scraping/buxton/scraper.py                     | 399 ++++++++++++++++-----
 5 files changed, 411 insertions(+), 111 deletions(-)
 create mode 100644 src/scraping/buxton/narratives.py
 create mode 100644 src/scraping/buxton/narratives/Theme - Chord Kbds.docx
 create mode 100644 src/scraping/buxton/narratives/chord_keyboards.json

(limited to 'src/scraping/buxton/scraper.py')

diff --git a/src/scraping/buxton/.idea/workspace.xml b/src/scraping/buxton/.idea/workspace.xml
index b2c7d4b8c..6f1ae3814 100644
--- a/src/scraping/buxton/.idea/workspace.xml
+++ b/src/scraping/buxton/.idea/workspace.xml
@@ -126,7 +126,7 @@
     <property name="last_opened_file_path" value="$PROJECT_DIR$" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>
-  <component name="RunManager" selected="Python.jsonifier">
+  <component name="RunManager" selected="Python.narratives">
     <configuration name="jsonifier" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
       <module name="buxton" />
       <option name="INTERPRETER_OPTIONS" value="" />
@@ -148,6 +148,27 @@
       <option name="INPUT_FILE" value="" />
       <method v="2" />
     </configuration>
+    <configuration name="narratives" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="buxton" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="/usr/local/bin/python3.7" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/narratives.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
     <configuration name="scraper" type="PythonConfigurationType" factoryName="Python">
       <module name="buxton" />
       <option name="INTERPRETER_OPTIONS" value="" />
@@ -160,7 +181,7 @@
       <option name="IS_MODULE_SDK" value="false" />
       <option name="ADD_CONTENT_ROOTS" value="true" />
       <option name="ADD_SOURCE_ROOTS" value="true" />
-      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/new_scraper.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/scraper.py" />
       <option name="PARAMETERS" value="" />
       <option name="SHOW_COMMAND_LINE" value="false" />
       <option name="EMULATE_TERMINAL" value="false" />
@@ -172,6 +193,7 @@
     <list>
       <item itemvalue="Python.jsonifier" />
       <item itemvalue="Python.scraper" />
+      <item itemvalue="Python.narratives" />
     </list>
   </component>
   <component name="SvnConfiguration">
@@ -188,30 +210,30 @@
     <servers />
   </component>
   <component name="WindowStateProjectService">
-    <state x="184" y="103" key="#com.intellij.execution.impl.EditConfigurationsDialog" timestamp="1580610403225">
+    <state x="184" y="103" key="#com.intellij.execution.impl.EditConfigurationsDialog" timestamp="1580656983882">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
-    <state x="184" y="103" key="#com.intellij.execution.impl.EditConfigurationsDialog/0.23.1440.836@0.23.1440.836" timestamp="1580610403225" />
+    <state x="184" y="103" key="#com.intellij.execution.impl.EditConfigurationsDialog/0.23.1440.836@0.23.1440.836" timestamp="1580656983882" />
     <state x="483" y="152" key="#xdebugger.evaluate" timestamp="1580601059439">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
     <state x="483" y="152" key="#xdebugger.evaluate/0.23.1440.836@0.23.1440.836" timestamp="1580601059439" />
-    <state width="1419" height="216" key="GridCell.Tab.0.bottom" timestamp="1580612505537">
+    <state width="1419" height="216" key="GridCell.Tab.0.bottom" timestamp="1580656997013">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
-    <state width="1419" height="216" key="GridCell.Tab.0.bottom/0.23.1440.836@0.23.1440.836" timestamp="1580612505537" />
-    <state width="1419" height="216" key="GridCell.Tab.0.center" timestamp="1580612505536">
+    <state width="1419" height="216" key="GridCell.Tab.0.bottom/0.23.1440.836@0.23.1440.836" timestamp="1580656997013" />
+    <state width="1419" height="216" key="GridCell.Tab.0.center" timestamp="1580656997012">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
-    <state width="1419" height="216" key="GridCell.Tab.0.center/0.23.1440.836@0.23.1440.836" timestamp="1580612505536" />
-    <state width="1419" height="216" key="GridCell.Tab.0.left" timestamp="1580612505535">
+    <state width="1419" height="216" key="GridCell.Tab.0.center/0.23.1440.836@0.23.1440.836" timestamp="1580656997012" />
+    <state width="1419" height="216" key="GridCell.Tab.0.left" timestamp="1580656997012">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
-    <state width="1419" height="216" key="GridCell.Tab.0.left/0.23.1440.836@0.23.1440.836" timestamp="1580612505535" />
-    <state width="1419" height="216" key="GridCell.Tab.0.right" timestamp="1580612505536">
+    <state width="1419" height="216" key="GridCell.Tab.0.left/0.23.1440.836@0.23.1440.836" timestamp="1580656997012" />
+    <state width="1419" height="216" key="GridCell.Tab.0.right" timestamp="1580656997012">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
-    <state width="1419" height="216" key="GridCell.Tab.0.right/0.23.1440.836@0.23.1440.836" timestamp="1580612505536" />
+    <state width="1419" height="216" key="GridCell.Tab.0.right/0.23.1440.836@0.23.1440.836" timestamp="1580656997012" />
     <state width="1419" height="268" key="GridCell.Tab.1.bottom" timestamp="1580610405283">
       <screen x="0" y="23" width="1440" height="836" />
     </state>
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py
new file mode 100644
index 000000000..947d60f91
--- /dev/null
+++ b/src/scraping/buxton/narratives.py
@@ -0,0 +1,38 @@
+from docx import Document
+import tempfile
+from zipfile import ZipFile
+import shutil
+from pathlib import Path
+from os import mkdir
+
+path = "./narratives/Theme - Chord Kbds.docx"
+doc = Document(path)
+
+# IMAGE_EXT = ('png', 'jpeg', 'jpg')
+#
+# with tempfile.TemporaryDirectory() as working_dir:
+#     with ZipFile(path) as working_zip:
+#         image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)]
+#         working_zip.extractall(working_dir, image_list)
+#         mkdir("./test")
+#         for image in image_list:
+#             shutil.copy(Path(working_dir).resolve() / image, "./test")
+
+paragraphs = doc.paragraphs
+for i in range(len(paragraphs)):
+    print(f"{i}: {paragraphs[i].text}")
+
+# for section in doc.sections:
+#     print(section.orientation)
+
+# for shape in doc.inline_shapes:
+#     print(shape._inline)
+
+# images = doc.tables[0]
+# for row in images.rows:
+#     contents = []
+#     for cell in row.cells:
+#         contents.append(cell.text)
+    # print(contents)
+
+
diff --git a/src/scraping/buxton/narratives/Theme - Chord Kbds.docx b/src/scraping/buxton/narratives/Theme - Chord Kbds.docx
new file mode 100644
index 000000000..439a7d975
Binary files /dev/null and b/src/scraping/buxton/narratives/Theme - Chord Kbds.docx differ
diff --git a/src/scraping/buxton/narratives/chord_keyboards.json b/src/scraping/buxton/narratives/chord_keyboards.json
new file mode 100644
index 000000000..748578769
--- /dev/null
+++ b/src/scraping/buxton/narratives/chord_keyboards.json
@@ -0,0 +1,39 @@
+{
+    "slides": [{
+            "text": "Theme: Chord Keyboards\nFrom music to type\n\nChord keyboards require 2 or more keys to be simultaneously pushed to spawn the intended output. Playing a chord on a piano or pushing both the shift + a letter key on a typewriter to enter an upper case character are examples.",
+            "devices": ["Casio CZ-101"]
+        },
+        {
+            "text": "This is an early mechanical keyboard for taking dictation. Instead of typing alphanumeric characters as on a typewriter, pressing different combinations prints shorthand symbols on the tape, each representing a different phoneme.  Speech is easier to keep up with this way, since each phoneme typically represents multiple characters.\n\nThe downside – until AI came to the rescue – was that it then took hours to manually transcribe to shorthand into conventional readable text.",
+            "devices": ["Grandjean Sténotype"]
+        },
+        {
+            "text": "Designed and manufactured in the DDR, the purpose of this keyboard is to emboss dots representing Braille symbols onto paper. The effect is to enable blind users to use their tactile sensitivity to read with their fingers.\n\nEach Braille symbol consists of two columns of 3 embossed dots each.  Which 3 dots are embossed in each column is determined by which of the three keys on either side are simultaneously pressed. The key in the middle, operated by either thumb, enters a space.",
+            "devices": ["Braille Writer"]
+        },
+        {
+            "text": "This combination is derived from the work of the inventor of the mouse, Doug Engelbart\n\nWhile these are 2 distinct devices, they are not what they appear to be.\n\nFunctionally, there is a virtual 7-button chord keyboard, employing the 5 buttons on the keyset and the middle and right button of the mouse.  And, using the left mouse button, there is also a 1-button mouse\n\nText was entered using a minor variant of 7-bit ASCII.  The intent was to enable entering small bits of text without moving back-and-forth between mouse and QWERTY keyboard. It didn’t catch on.",
+            "devices": ["Xerox PARC 5-Button Keyset & 3-Button Mouse"]
+        },
+        {
+            "text": "",
+            "devices": []
+        },
+        {
+            "text": "",
+            "devices": []
+        },
+        {
+            "text": "",
+            "devices": []
+        },
+        {
+            "text": "",
+            "devices": []
+        },
+        {
+            "text": "",
+            "devices": []
+        }
+    ]
+}
\ No newline at end of file
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index 394958823..f7a38112d 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -1,32 +1,36 @@
 import os
+from shutil import copyfile
 import docx2txt
 from docx import Document
 from docx.opc.constants import RELATIONSHIP_TYPE as RT
 import re
+from pymongo import MongoClient
 import shutil
 import uuid
-import json
-import base64
-from shutil import copyfile
+import datetime
 from PIL import Image
+import math
+
+source = "./source"
+filesPath = "../../server/public/files"
+image_dist = filesPath + "/images/buxton"
 
-files_path = "../../server/public/files"
-source_path = "./source"
-temp_images_path = "./extracted_images"
-server_images_path = f"{files_path}/images/buxton"
-json_path = "./json"
+db = MongoClient("localhost", 27017)["Dash"]
+target_collection = db.newDocuments
+target_doc_title = "Collection 1"
+schema_guids = []
+common_proto_id = ""
 
 
-# noinspection PyProtectedMember
-def extract_links(file):
+def extract_links(fileName):
     links = []
-    doc = Document(file)
+    doc = Document(fileName)
     rels = doc.part.rels
     for rel in rels:
         item = rels[rel]
         if item.reltype == RT.HYPERLINK and ".aspx" not in item._target:
             links.append(item._target)
-    return links
+    return text_doc_map(links)
 
 
 def extract_value(kv_string):
@@ -46,58 +50,228 @@ def guid():
     return str(uuid.uuid4())
 
 
-def encode_image(folder: str, name: str):
-    with open(f"{temp_images_path}/{folder}/{name}", "rb") as image:
-        encoded = base64.b64encode(image.read())
-        return encoded.decode("utf-8")
-
-
-def parse_document(name: str):
-    print(f"parsing {name}...")
-    pure_name = name.split(".")[0]
+def listify(list):
+    return {
+        "fields": list,
+        "__type": "list"
+    }
+
+
+def protofy(fieldId):
+    return {
+        "fieldId": fieldId,
+        "__type": "proxy"
+    }
+
+
+def text_doc_map(string_list):
+    def guid_map(caption):
+        return write_text_doc(caption)
+    return listify(proxify_guids(list(map(guid_map, string_list))))
+
+
+def write_collection(parse_results, display_fields, storage_key, viewType):
+    view_guids = parse_results["child_guids"]
+
+    data_doc = parse_results["schema"]
+    fields = data_doc["fields"]
+
+    view_doc_guid = guid()
+
+    view_doc = {
+        "_id": view_doc_guid,
+        "fields": {
+            "proto": protofy(data_doc["_id"]),
+            "x": 10,
+            "y": 10,
+            "_width": 900,
+            "_height": 600,
+            "_panX": 0,
+            "_panY": 0,
+            "zIndex": 2,
+            "libraryBrush": False,
+            "_viewType": viewType,
+            "_LODdisable": True
+        },
+        "__type": "Doc"
+    }
+
+    fields["proto"] = protofy(common_proto_id)
+    fields[storage_key] = listify(proxify_guids(view_guids))
+    fields["schemaColumns"] = listify(display_fields)
+    fields["author"] = "Bill Buxton"
+    fields["creationDate"] = {
+        "date": datetime.datetime.utcnow().microsecond,
+        "__type": "date"
+    }
+    if "image_urls" in parse_results:
+        fields["hero"] = {
+            "url": parse_results["image_urls"][0],
+            "__type": "image"
+        }
+    fields["isPrototype"] = True
+
+    target_collection.insert_one(data_doc)
+    target_collection.insert_one(view_doc)
+
+    data_doc_guid = data_doc["_id"]
+    print(f"inserted view document ({view_doc_guid})")
+    print(f"inserted data document ({data_doc_guid})\n")
+
+    return view_doc_guid
+
+
+def write_text_doc(content):
+    data_doc_guid = guid()
+    view_doc_guid = guid()
+
+    view_doc = {
+        "_id": view_doc_guid,
+        "fields": {
+            "proto": protofy(data_doc_guid),
+            "x": 10,
+            "y": 10,
+            "_width": 400,
+            "zIndex": 2
+        },
+        "__type": "Doc"
+    }
+
+    data_doc = {
+        "_id": data_doc_guid,
+        "fields": {
+            "proto": protofy("textProto"),
+            "data": {
+                "Data": '{"doc":{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"' + content + '"}]}]},"selection":{"type":"text","anchor":1,"head":1}' + '}',
+                "__type": "RichTextField"
+            },
+            "title": content,
+            "_nativeWidth": 200,
+            "author": "Bill Buxton",
+            "creationDate": {
+                "date": datetime.datetime.utcnow().microsecond,
+                "__type": "date"
+            },
+            "isPrototype": True,
+            "_autoHeight": True,
+            "page": -1,
+            "_nativeHeight": 200,
+            "_height": 200,
+            "data_text": content
+        },
+        "__type": "Doc"
+    }
+
+    target_collection.insert_one(view_doc)
+    target_collection.insert_one(data_doc)
+
+    return view_doc_guid
+
+
+def write_image(folder, name):
+    path = f"http://localhost:1050/files/images/buxton/{folder}/{name}"
+
+    data_doc_guid = guid()
+    view_doc_guid = guid()
+
+    image = Image.open(f"{image_dist}/{folder}/{name}")
+    native_width, native_height = image.size
+
+    if abs(native_width - native_height) < 10:
+        return None
+
+    view_doc = {
+        "_id": view_doc_guid,
+        "fields": {
+            "proto": protofy(data_doc_guid),
+            "x": 10,
+            "y": 10,
+            "_width": min(800, native_width),
+            "zIndex": 2,
+            "widthUnit": "*",
+            "widthMagnitude": 1
+        },
+        "__type": "Doc"
+    }
+
+    data_doc = {
+        "_id": data_doc_guid,
+        "fields": {
+            "proto": protofy("imageProto"),
+            "data": {
+                "url": path,
+                "__type": "image"
+            },
+            "title": name,
+            "_nativeWidth": native_width,
+            "author": "Bill Buxton",
+            "creationDate": {
+                "date": datetime.datetime.utcnow().microsecond,
+                "__type": "date"
+            },
+            "isPrototype": True,
+            "page": -1,
+            "_nativeHeight": native_height,
+            "_height": native_height
+        },
+        "__type": "Doc"
+    }
+
+    target_collection.insert_one(view_doc)
+    target_collection.insert_one(data_doc)
+
+    return {
+        "layout_id": view_doc_guid,
+        "url": path
+    }
+
+
+def parse_document(file_name: str):
+    print(f"parsing {file_name}...")
+    pure_name = file_name.split(".")[0]
 
     result = {}
 
-    saved_device_images_dir = server_images_path + "/" + pure_name
-    temp_device_images_dir = temp_images_path + "/" + pure_name
-    mkdir_if_absent(temp_device_images_dir)
-    mkdir_if_absent(saved_device_images_dir)
-
-    raw = str(docx2txt.process(source_path +
-                               "/" + name, temp_device_images_dir))
-
-    extracted_images = []
-    for image in os.listdir(temp_device_images_dir):
-        temp = f"{temp_device_images_dir}/{image}"
-        native_width, native_height = Image.open(temp).size
-        if abs(native_width - native_height) < 10:
-            continue
-        original = saved_device_images_dir + "/" + image.replace(".", "_o.", 1)
-        medium = saved_device_images_dir + "/" + image.replace(".", "_m.", 1)
-        copyfile(temp, original)
-        copyfile(temp, medium)
-        server_path = f"http://localhost:1050/files/images/buxton/{pure_name}/{image}"
-        extracted_images.append(server_path)
-    result["extracted_images"] = extracted_images
+    dir_path = image_dist + "/" + pure_name
+    print(dir_path)
+    mkdir_if_absent(dir_path)
+
+    raw = str(docx2txt.process(source + "/" + file_name, dir_path))
+
+    urls = []
+    view_guids = []
+    count = 0
+    for image in os.listdir(dir_path):
+        created = write_image(pure_name, image)
+        if created != None:
+            urls.append(created["url"])
+            view_guids.append(created["layout_id"])
+            count += 1
+            resolved = dir_path + "/" + image
+            original = dir_path + "/" + image.replace(".", "_o.", 1)
+            medium = dir_path + "/" + image.replace(".", "_m.", 1)
+            copyfile(resolved, original)
+            copyfile(resolved, medium)
+    print(f"extracted {count} images...")
 
     def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace(
         u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''').strip()
 
-    def sanitize_price(raw_price: str):
-        raw_price = raw_price.replace(",", "")
-        start = raw_price.find("$")
-        if "x" in raw_price.lower():
+    def sanitize_price(raw: str):
+        raw = raw.replace(",", "")
+        if "x" in raw.lower():
             return None
+        start = raw.find("$")
         if start > -1:
             i = start + 1
-            while i < len(raw_price) and re.match(r"[0-9.]", raw_price[i]):
+            while (i < len(raw) and re.match(r"[0-9\.]", raw[i])):
                 i += 1
-            price = raw_price[start + 1: i + 1]
+            price = raw[start + 1: i + 1]
             return float(price)
-        elif raw_price.lower().find("nfs"):
+        elif (raw.lower().find("nfs")):
             return -1
         else:
-            return None
+            return math.nan
 
     def remove_empty(line): return len(line) > 1
 
@@ -112,6 +286,7 @@ def parse_document(name: str):
     lines = list(map(sanitize, raw.split("\n")))
     lines = list(filter(remove_empty, lines))
 
+    result["file_name"] = file_name
     result["title"] = lines[2].strip()
     result["short_description"] = lines[3].strip().replace(
         "Short Description: ", "")
@@ -127,13 +302,11 @@ def parse_document(name: str):
     clean = list(
         map(lambda data: data.strip().split(":"), lines[cur].split("|")))
     result["company"] = clean[0][len(clean[0]) - 1].strip()
-
     result["year"] = try_parse(clean[1][len(clean[1]) - 1].strip())
     result["original_price"] = sanitize_price(
         clean[2][len(clean[2]) - 1].strip())
 
     cur += 1
-
     result["degrees_of_freedom"] = try_parse(extract_value(
         lines[cur]).replace("NA", "N/A"))
     cur += 1
@@ -161,71 +334,99 @@ def parse_document(name: str):
     cur += 1
     link_descriptions = []
     while lines[cur] != "Image":
-        description = lines[cur].strip().lower()
-        valid = True
-        for ignored in ["powerpoint", "vimeo", "xxx"]:
-            if ignored in description:
-                valid = False
-                break
-        if valid:
-            link_descriptions.append(description)
+        link_descriptions.append(lines[cur].strip())
         cur += 1
-    result["link_descriptions"] = link_descriptions
+    result["link_descriptions"] = text_doc_map(link_descriptions)
 
-    result["hyperlinks"] = extract_links(source_path + "/" + name)
+    result["hyperlinks"] = extract_links(source + "/" + file_name)
 
     images = []
     captions = []
     cur += 3
     while cur + 1 < len(lines) and lines[cur] != "NOTES:":
-        name = lines[cur]
-        if "full document" not in name.lower():
-            images.append(name)
-            captions.append(lines[cur + 1])
+        images.append(lines[cur])
+        captions.append(lines[cur + 1])
         cur += 2
-    result["table_image_names"] = images
+    result["images"] = listify(images)
 
-    result["captions"] = captions
+    result["captions"] = text_doc_map(captions)
 
     notes = []
-    if cur < len(lines) and lines[cur] == "NOTES:":
+    if (cur < len(lines) and lines[cur] == "NOTES:"):
         cur += 1
         while cur < len(lines):
             notes.append(lines[cur])
             cur += 1
     if len(notes) > 0:
-        result["notes"] = notes
-
-    return result
-
-
-if os.path.exists(server_images_path):
-    shutil.rmtree(server_images_path)
-while os.path.exists(server_images_path):
+        result["notes"] = listify(notes)
+
+    print("writing child schema...")
+
+    return {
+        "schema": {
+            "_id": guid(),
+            "fields": result,
+            "__type": "Doc"
+        },
+        "child_guids": view_guids,
+        "image_urls": urls
+    }
+
+
+def proxify_guids(guids):
+    return list(map(lambda guid: {"fieldId": guid, "__type": "prefetch_proxy"}, guids))
+
+
+def write_common_proto():
+    id = guid()
+    common_proto = {
+        "_id": id,
+        "fields": {
+            "proto": protofy("collectionProto"),
+            "title": "The Buxton Collection",
+        },
+        "__type": "Doc"
+    }
+    target_collection.insert_one(common_proto)
+    return id
+
+
+if os.path.exists(image_dist):
+    shutil.rmtree(image_dist)
+while os.path.exists(image_dist):
     pass
-os.mkdir(server_images_path)
+os.mkdir(image_dist)
+mkdir_if_absent(source)
 
-mkdir_if_absent(source_path)
-mkdir_if_absent(json_path)
-mkdir_if_absent(temp_images_path)
-
-results = []
+common_proto_id = write_common_proto()
 
 candidates = 0
-for file_name in os.listdir(source_path):
-    if file_name.endswith('.docx') or file_name.endswith(".doc"):
+for file_name in os.listdir(source):
+    if file_name.endswith('.docx') or file_name.endswith('.doc'):
         candidates += 1
-        results.append(parse_document(file_name))
-
-
-with open(f"./json/buxton_collection.json", "w", encoding="utf-8") as out:
-    json.dump(results, out, ensure_ascii=False, indent=4)
-
-print(f"\nSuccessfully parsed {candidates} candidates.")
-
-print("\nrewriting .gitignore...")
-entries = ['*', '!.gitignore']
-with open(files_path + "/.gitignore", 'w') as f:
-    f.write('\n'.join(entries))
-
-shutil.rmtree(temp_images_path)
+        schema_guids.append(write_collection(
+            parse_document(file_name), ["title", "data"], "data", 5))
+
+print("writing parent schema...")
+parent_guid = write_collection({
+    "schema": {
+        "_id": guid(),
+        "fields": {},
+        "__type": "Doc"
+    },
+    "child_guids": schema_guids
+}, ["title", "short_description", "original_price"], "data", 2)
+
+print("appending parent schema to main workspace...\n")
+target_collection.update_one(
+    {"fields.title": target_doc_title},
+    {"$push": {"fields.data.fields": {"fieldId": parent_guid, "__type": "proxy"}}}
+)
+
+print("rewriting .gitignore...\n")
+lines = ['*', '!.gitignore']
+with open(filesPath + "/.gitignore", 'w') as f:
+    f.write('\n'.join(lines))
+
+suffix = "" if candidates == 1 else "s"
+print(f"conversion complete. {candidates} candidate{suffix} processed.")
-- 
cgit v1.2.3-70-g09d2


From 646de60fc314198b97172c62f414ffb9576ffb98 Mon Sep 17 00:00:00 2001
From: bob <bcz@cs.brown.edu>
Date: Mon, 3 Feb 2020 11:33:24 -0500
Subject: fixed timeline bug.  made multirow/multicol use same fields.

---
 .../CollectionFreeFormLayoutEngines.tsx            | 28 +++++++++++--------
 .../collectionFreeForm/CollectionFreeFormView.tsx  |  4 +--
 .../CollectionMulticolumnView.tsx                  | 32 +++++++++++-----------
 .../CollectionMultirowView.tsx                     | 32 +++++++++++-----------
 .../collectionMulticolumn/MulticolumnResizer.tsx   | 20 +++++++-------
 .../MulticolumnWidthLabel.tsx                      | 12 ++++----
 .../collectionMulticolumn/MultirowHeightLabel.tsx  | 12 ++++----
 .../collectionMulticolumn/MultirowResizer.tsx      | 20 +++++++-------
 src/scraping/buxton/scraper.py                     |  4 +--
 9 files changed, 84 insertions(+), 80 deletions(-)

(limited to 'src/scraping/buxton/scraper.py')

diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormLayoutEngines.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormLayoutEngines.tsx
index f08c2506e..da0b51196 100644
--- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormLayoutEngines.tsx
+++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormLayoutEngines.tsx
@@ -204,7 +204,21 @@ export function computeTimelineLayout(
         x += scaling * (key - prevKey);
         const stack = findStack(x, stacking);
         prevKey = key;
-        !stack && Math.abs(x - (curTime - minTime) * scaling) > pivotAxisWidth && groupNames.push({ type: "text", text: key.toString(), x: x, y: stack * 25, height: fontHeight, fontSize });
+        !stack && (curTime === undefined || Math.abs(x - (curTime - minTime) * scaling) > pivotAxisWidth) && groupNames.push({ type: "text", text: key.toString(), x: x, y: stack * 25, height: fontHeight, fontSize });
+        newFunction(keyDocs, key);
+    });
+    if (sortedKeys.length && curTime > sortedKeys[sortedKeys.length - 1]) {
+        x = (curTime - minTime) * scaling;
+        groupNames.push({ type: "text", text: curTime.toString(), x: x, y: 0, zIndex: 1000, color: "orange", height: fontHeight, fontSize });
+    }
+    if (Math.ceil(maxTime - minTime) * scaling > x + 25) {
+        groupNames.push({ type: "text", text: Math.ceil(maxTime).toString(), x: Math.ceil(maxTime - minTime) * scaling, y: 0, height: fontHeight, fontSize });
+    }
+
+    const divider = { type: "div", color: "black", x: 0, y: 0, width: panelDim[0], height: 1 } as any;
+    return normalizeResults(panelDim, fontHeight, childPairs, docMap, poolData, viewDefsToJSX, groupNames, (maxTime - minTime) * scaling, [divider]);
+
+    function newFunction(keyDocs: Doc[], key: number) {
         keyDocs.forEach(doc => {
             const stack = findStack(x, stacking);
             const layoutDoc = Doc.Layout(doc);
@@ -215,22 +229,12 @@ export function computeTimelineLayout(
                 wid = layoutDoc._nativeHeight ? (NumCast(layoutDoc._nativeWidth) / NumCast(layoutDoc._nativeHeight)) * pivotAxisWidth : pivotAxisWidth;
             }
             docMap.set(doc, {
-                x: x, y: - Math.sqrt(stack) * pivotAxisWidth / 2 - pivotAxisWidth + (pivotAxisWidth - hgt) / 2,
+                x: x, y: -Math.sqrt(stack) * pivotAxisWidth / 2 - pivotAxisWidth + (pivotAxisWidth - hgt) / 2,
                 zIndex: (curTime === key ? 1000 : zind++), highlight: curTime === key, width: wid / (Math.max(stack, 1)), height: hgt
             });
             stacking[stack] = x + pivotAxisWidth;
         });
-    });
-    if (sortedKeys.length && curTime > sortedKeys[sortedKeys.length - 1]) {
-        x = (curTime - minTime) * scaling;
-        groupNames.push({ type: "text", text: curTime.toString(), x: x, y: 0, zIndex: 1000, color: "orange", height: fontHeight, fontSize });
     }
-    if (Math.ceil(maxTime - minTime) * scaling > x + 25) {
-        groupNames.push({ type: "text", text: Math.ceil(maxTime).toString(), x: Math.ceil(maxTime - minTime) * scaling, y: 0, height: fontHeight, fontSize });
-    }
-
-    const divider = { type: "div", color: "black", x: 0, y: 0, width: panelDim[0], height: 1 } as any;
-    return normalizeResults(panelDim, fontHeight, childPairs, docMap, poolData, viewDefsToJSX, groupNames, (maxTime - minTime) * scaling, [divider]);
 }
 
 function normalizeResults(panelDim: number[], fontHeight: number, childPairs: { data?: Doc, layout: Doc }[], docMap: Map<Doc, ViewDefBounds>,
diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx
index f1a239050..30ddd09e6 100644
--- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx
+++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx
@@ -791,12 +791,12 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) {
 
     doTimelineLayout(poolData: ObservableMap<string, any>) {
         return computeTimelineLayout(poolData, this.props.Document, this.childDocs,
-            this.childLayoutPairs.filter(pair => this.isCurrent(pair.layout)), [this.props.PanelWidth(), this.props.PanelHeight()], this.viewDefsToJSX);
+            this.childLayoutPairs, [this.props.PanelWidth(), this.props.PanelHeight()], this.viewDefsToJSX);
     }
 
     doPivotLayout(poolData: ObservableMap<string, any>) {
         return computePivotLayout(poolData, this.props.Document, this.childDocs,
-            this.childLayoutPairs.filter(pair => this.isCurrent(pair.layout)), [this.props.PanelWidth(), this.props.PanelHeight()], this.viewDefsToJSX);
+            this.childLayoutPairs, [this.props.PanelWidth(), this.props.PanelHeight()], this.viewDefsToJSX);
     }
 
     doFreeformLayout(poolData: ObservableMap<string, any>) {
diff --git a/src/client/views/collections/collectionMulticolumn/CollectionMulticolumnView.tsx b/src/client/views/collections/collectionMulticolumn/CollectionMulticolumnView.tsx
index 041eb69da..65862f34f 100644
--- a/src/client/views/collections/collectionMulticolumn/CollectionMulticolumnView.tsx
+++ b/src/client/views/collections/collectionMulticolumn/CollectionMulticolumnView.tsx
@@ -28,12 +28,12 @@ interface LayoutData {
     starSum: number;
 }
 
-export const WidthUnit = {
+export const DimUnit = {
     Pixel: "px",
     Ratio: "*"
 };
 
-const resolvedUnits = Object.values(WidthUnit);
+const resolvedUnits = Object.values(DimUnit);
 const resizerWidth = 4;
 
 @observer
@@ -45,12 +45,12 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
      */
     @computed
     private get ratioDefinedDocs() {
-        return this.childLayoutPairs.map(({ layout }) => layout).filter(({ widthUnit }) => StrCast(widthUnit) === WidthUnit.Ratio);
+        return this.childLayoutPairs.map(({ layout }) => layout).filter(({ dimUnit }) => StrCast(dimUnit) === DimUnit.Ratio);
     }
 
     /**
-     * This loops through all childLayoutPairs and extracts the values for widthUnit
-     * and widthMagnitude, ignoring any that are malformed. Additionally, it then
+     * This loops through all childLayoutPairs and extracts the values for dimUnit
+     * and dimMagnitude, ignoring any that are malformed. Additionally, it then
      * normalizes the ratio values so that one * value is always 1, with the remaining
      * values proportionate to that easily readable metric.
      * @returns the list of the resolved width specifiers (unit and magnitude pairs)
@@ -60,11 +60,11 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
     private get resolvedLayoutInformation(): LayoutData {
         let starSum = 0;
         const widthSpecifiers: WidthSpecifier[] = [];
-        this.childLayoutPairs.map(({ layout: { widthUnit, widthMagnitude } }) => {
-            const unit = StrCast(widthUnit);
-            const magnitude = NumCast(widthMagnitude);
+        this.childLayoutPairs.map(({ layout: { dimUnit, dimMagnitude } }) => {
+            const unit = StrCast(dimUnit);
+            const magnitude = NumCast(dimMagnitude);
             if (unit && magnitude && magnitude > 0 && resolvedUnits.includes(unit)) {
-                (unit === WidthUnit.Ratio) && (starSum += magnitude);
+                (unit === DimUnit.Ratio) && (starSum += magnitude);
                 widthSpecifiers.push({ magnitude, unit });
             }
             /**
@@ -82,9 +82,9 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
         setTimeout(() => {
             const { ratioDefinedDocs } = this;
             if (this.childLayoutPairs.length) {
-                const minimum = Math.min(...ratioDefinedDocs.map(({ widthMagnitude }) => NumCast(widthMagnitude)));
+                const minimum = Math.min(...ratioDefinedDocs.map(({ dimMagnitude }) => NumCast(dimMagnitude)));
                 if (minimum !== 0) {
-                    ratioDefinedDocs.forEach(layout => layout.widthMagnitude = NumCast(layout.widthMagnitude) / minimum);
+                    ratioDefinedDocs.forEach(layout => layout.dimMagnitude = NumCast(layout.dimMagnitude) / minimum);
                 }
             }
         });
@@ -103,7 +103,7 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
     @computed
     private get totalFixedAllocation(): number | undefined {
         return this.resolvedLayoutInformation?.widthSpecifiers.reduce(
-            (sum, { magnitude, unit }) => sum + (unit === WidthUnit.Pixel ? magnitude : 0), 0);
+            (sum, { magnitude, unit }) => sum + (unit === DimUnit.Pixel ? magnitude : 0), 0);
     }
 
     /**
@@ -160,8 +160,8 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
         if (columnUnitLength === undefined) {
             return 0; // we're still waiting on promises to resolve
         }
-        let width = NumCast(layout.widthMagnitude);
-        if (StrCast(layout.widthUnit) === WidthUnit.Ratio) {
+        let width = NumCast(layout.dimMagnitude);
+        if (StrCast(layout.dimUnit) === DimUnit.Ratio) {
             width *= columnUnitLength;
         }
         return width;
@@ -193,8 +193,8 @@ export class CollectionMulticolumnView extends CollectionSubView(MulticolumnDocu
     drop = (e: Event, de: DragManager.DropEvent) => {
         if (super.drop(e, de)) {
             de.complete.docDragData?.droppedDocuments.forEach(action((d: Doc) => {
-                d.widthUnit = "*";
-                d.widthMagnitude = 1;
+                d.dimUnit = "*";
+                d.dimMagnitude = 1;
             }));
         }
         return false;
diff --git a/src/client/views/collections/collectionMulticolumn/CollectionMultirowView.tsx b/src/client/views/collections/collectionMulticolumn/CollectionMultirowView.tsx
index e07985bb4..aa440b677 100644
--- a/src/client/views/collections/collectionMulticolumn/CollectionMultirowView.tsx
+++ b/src/client/views/collections/collectionMulticolumn/CollectionMultirowView.tsx
@@ -28,12 +28,12 @@ interface LayoutData {
     starSum: number;
 }
 
-export const HeightUnit = {
+export const DimUnit = {
     Pixel: "px",
     Ratio: "*"
 };
 
-const resolvedUnits = Object.values(HeightUnit);
+const resolvedUnits = Object.values(DimUnit);
 const resizerHeight = 4;
 
 @observer
@@ -45,12 +45,12 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
      */
     @computed
     private get ratioDefinedDocs() {
-        return this.childLayoutPairs.map(({ layout }) => layout).filter(({ widthUnit }) => StrCast(widthUnit) === HeightUnit.Ratio);
+        return this.childLayoutPairs.map(({ layout }) => layout).filter(({ dimUnit }) => StrCast(dimUnit) === DimUnit.Ratio);
     }
 
     /**
-     * This loops through all childLayoutPairs and extracts the values for widthUnit
-     * and widthMagnitude, ignoring any that are malformed. Additionally, it then
+     * This loops through all childLayoutPairs and extracts the values for dimUnit
+     * and dimUnit, ignoring any that are malformed. Additionally, it then
      * normalizes the ratio values so that one * value is always 1, with the remaining
      * values proportionate to that easily readable metric.
      * @returns the list of the resolved width specifiers (unit and magnitude pairs)
@@ -60,11 +60,11 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
     private get resolvedLayoutInformation(): LayoutData {
         let starSum = 0;
         const heightSpecifiers: HeightSpecifier[] = [];
-        this.childLayoutPairs.map(({ layout: { heightUnit, heightMagnitude } }) => {
-            const unit = StrCast(heightUnit);
-            const magnitude = NumCast(heightMagnitude);
+        this.childLayoutPairs.map(({ layout: { dimUnit, dimMagnitude } }) => {
+            const unit = StrCast(dimUnit);
+            const magnitude = NumCast(dimMagnitude);
             if (unit && magnitude && magnitude > 0 && resolvedUnits.includes(unit)) {
-                (unit === HeightUnit.Ratio) && (starSum += magnitude);
+                (unit === DimUnit.Ratio) && (starSum += magnitude);
                 heightSpecifiers.push({ magnitude, unit });
             }
             /**
@@ -82,9 +82,9 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
         setTimeout(() => {
             const { ratioDefinedDocs } = this;
             if (this.childLayoutPairs.length) {
-                const minimum = Math.min(...ratioDefinedDocs.map(({ heightMagnitude }) => NumCast(heightMagnitude)));
+                const minimum = Math.min(...ratioDefinedDocs.map(({ dimMagnitude }) => NumCast(dimMagnitude)));
                 if (minimum !== 0) {
-                    ratioDefinedDocs.forEach(layout => layout.heightMagnitude = NumCast(layout.heightMagnitude) / minimum);
+                    ratioDefinedDocs.forEach(layout => layout.dimMagnitude = NumCast(layout.dimMagnitude) / minimum);
                 }
             }
         });
@@ -103,7 +103,7 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
     @computed
     private get totalFixedAllocation(): number | undefined {
         return this.resolvedLayoutInformation?.heightSpecifiers.reduce(
-            (sum, { magnitude, unit }) => sum + (unit === HeightUnit.Pixel ? magnitude : 0), 0);
+            (sum, { magnitude, unit }) => sum + (unit === DimUnit.Pixel ? magnitude : 0), 0);
     }
 
     /**
@@ -160,8 +160,8 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
         if (rowUnitLength === undefined) {
             return 0; // we're still waiting on promises to resolve
         }
-        let height = NumCast(layout.heightMagnitude);
-        if (StrCast(layout.heightUnit) === HeightUnit.Ratio) {
+        let height = NumCast(layout.dimMagnitude);
+        if (StrCast(layout.dimUnit) === DimUnit.Ratio) {
             height *= rowUnitLength;
         }
         return height;
@@ -193,8 +193,8 @@ export class CollectionMultirowView extends CollectionSubView(MultirowDocument)
     drop = (e: Event, de: DragManager.DropEvent) => {
         if (super.drop(e, de)) {
             de.complete.docDragData?.droppedDocuments.forEach(action((d: Doc) => {
-                d.heightUnit = "*";
-                d.heightMagnitude = 1;
+                d.dimUnit = "*";
+                d.dimMagnitude = 1;
             }));
         }
         return false;
diff --git a/src/client/views/collections/collectionMulticolumn/MulticolumnResizer.tsx b/src/client/views/collections/collectionMulticolumn/MulticolumnResizer.tsx
index 11e210958..46c39d817 100644
--- a/src/client/views/collections/collectionMulticolumn/MulticolumnResizer.tsx
+++ b/src/client/views/collections/collectionMulticolumn/MulticolumnResizer.tsx
@@ -3,7 +3,7 @@ import { observer } from "mobx-react";
 import { observable, action } from "mobx";
 import { Doc } from "../../../../new_fields/Doc";
 import { NumCast, StrCast } from "../../../../new_fields/Types";
-import { WidthUnit } from "./CollectionMulticolumnView";
+import { DimUnit } from "./CollectionMulticolumnView";
 
 interface ResizerProps {
     width: number;
@@ -46,14 +46,14 @@ export default class ResizeBar extends React.Component<ResizerProps> {
         const unitLength = columnUnitLength();
         if (unitLength) {
             if (toNarrow) {
-                const { widthUnit, widthMagnitude } = toNarrow;
-                const scale = widthUnit === WidthUnit.Ratio ? unitLength : 1;
-                toNarrow.widthMagnitude = NumCast(widthMagnitude) - Math.abs(movementX) / scale;
+                const { dimUnit, dimMagnitude } = toNarrow;
+                const scale = dimUnit === DimUnit.Ratio ? unitLength : 1;
+                toNarrow.dimMagnitude = NumCast(dimMagnitude) - Math.abs(movementX) / scale;
             }
             if (this.resizeMode === ResizeMode.Pinned && toWiden) {
-                const { widthUnit, widthMagnitude } = toWiden;
-                const scale = widthUnit === WidthUnit.Ratio ? unitLength : 1;
-                toWiden.widthMagnitude = NumCast(widthMagnitude) + Math.abs(movementX) / scale;
+                const { dimUnit, dimMagnitude } = toWiden;
+                const scale = dimUnit === DimUnit.Ratio ? unitLength : 1;
+                toWiden.dimMagnitude = NumCast(dimMagnitude) + Math.abs(movementX) / scale;
             }
         }
     }
@@ -61,17 +61,17 @@ export default class ResizeBar extends React.Component<ResizerProps> {
     private get isActivated() {
         const { toLeft, toRight } = this.props;
         if (toLeft && toRight) {
-            if (StrCast(toLeft.widthUnit) === WidthUnit.Pixel && StrCast(toRight.widthUnit) === WidthUnit.Pixel) {
+            if (StrCast(toLeft.dimUnit) === DimUnit.Pixel && StrCast(toRight.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
         } else if (toLeft) {
-            if (StrCast(toLeft.widthUnit) === WidthUnit.Pixel) {
+            if (StrCast(toLeft.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
         } else if (toRight) {
-            if (StrCast(toRight.widthUnit) === WidthUnit.Pixel) {
+            if (StrCast(toRight.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
diff --git a/src/client/views/collections/collectionMulticolumn/MulticolumnWidthLabel.tsx b/src/client/views/collections/collectionMulticolumn/MulticolumnWidthLabel.tsx
index b394fed62..5b2054428 100644
--- a/src/client/views/collections/collectionMulticolumn/MulticolumnWidthLabel.tsx
+++ b/src/client/views/collections/collectionMulticolumn/MulticolumnWidthLabel.tsx
@@ -4,7 +4,7 @@ import { computed } from "mobx";
 import { Doc } from "../../../../new_fields/Doc";
 import { NumCast, StrCast, BoolCast } from "../../../../new_fields/Types";
 import { EditableView } from "../../EditableView";
-import { WidthUnit } from "./CollectionMulticolumnView";
+import { DimUnit } from "./CollectionMulticolumnView";
 
 interface WidthLabelProps {
     layout: Doc;
@@ -18,8 +18,8 @@ export default class WidthLabel extends React.Component<WidthLabelProps> {
     @computed
     private get contents() {
         const { layout, decimals } = this.props;
-        const getUnit = () => StrCast(layout.widthUnit);
-        const getMagnitude = () => String(+NumCast(layout.widthMagnitude).toFixed(decimals ?? 3));
+        const getUnit = () => StrCast(layout.dimUnit);
+        const getMagnitude = () => String(+NumCast(layout.dimMagnitude).toFixed(decimals ?? 3));
         return (
             <div className={"label-wrapper"}>
                 <EditableView
@@ -27,7 +27,7 @@ export default class WidthLabel extends React.Component<WidthLabelProps> {
                     SetValue={value => {
                         const converted = Number(value);
                         if (!isNaN(converted) && converted > 0) {
-                            layout.widthMagnitude = converted;
+                            layout.dimMagnitude = converted;
                             return true;
                         }
                         return false;
@@ -37,8 +37,8 @@ export default class WidthLabel extends React.Component<WidthLabelProps> {
                 <EditableView
                     GetValue={getUnit}
                     SetValue={value => {
-                        if (Object.values(WidthUnit).includes(value)) {
-                            layout.widthUnit = value;
+                        if (Object.values(DimUnit).includes(value)) {
+                            layout.dimUnit = value;
                             return true;
                         }
                         return false;
diff --git a/src/client/views/collections/collectionMulticolumn/MultirowHeightLabel.tsx b/src/client/views/collections/collectionMulticolumn/MultirowHeightLabel.tsx
index 56a2e868d..899577fd5 100644
--- a/src/client/views/collections/collectionMulticolumn/MultirowHeightLabel.tsx
+++ b/src/client/views/collections/collectionMulticolumn/MultirowHeightLabel.tsx
@@ -4,7 +4,7 @@ import { computed } from "mobx";
 import { Doc } from "../../../../new_fields/Doc";
 import { NumCast, StrCast, BoolCast } from "../../../../new_fields/Types";
 import { EditableView } from "../../EditableView";
-import { HeightUnit } from "./CollectionMultirowView";
+import { DimUnit } from "./CollectionMultirowView";
 
 interface HeightLabelProps {
     layout: Doc;
@@ -18,8 +18,8 @@ export default class HeightLabel extends React.Component<HeightLabelProps> {
     @computed
     private get contents() {
         const { layout, decimals } = this.props;
-        const getUnit = () => StrCast(layout.heightUnit);
-        const getMagnitude = () => String(+NumCast(layout.heightMagnitude).toFixed(decimals ?? 3));
+        const getUnit = () => StrCast(layout.dimUnit);
+        const getMagnitude = () => String(+NumCast(layout.dimMagnitude).toFixed(decimals ?? 3));
         return (
             <div className={"label-wrapper"}>
                 <EditableView
@@ -27,7 +27,7 @@ export default class HeightLabel extends React.Component<HeightLabelProps> {
                     SetValue={value => {
                         const converted = Number(value);
                         if (!isNaN(converted) && converted > 0) {
-                            layout.heightMagnitude = converted;
+                            layout.dimMagnitude = converted;
                             return true;
                         }
                         return false;
@@ -37,8 +37,8 @@ export default class HeightLabel extends React.Component<HeightLabelProps> {
                 <EditableView
                     GetValue={getUnit}
                     SetValue={value => {
-                        if (Object.values(HeightUnit).includes(value)) {
-                            layout.heightUnit = value;
+                        if (Object.values(DimUnit).includes(value)) {
+                            layout.dimUnit = value;
                             return true;
                         }
                         return false;
diff --git a/src/client/views/collections/collectionMulticolumn/MultirowResizer.tsx b/src/client/views/collections/collectionMulticolumn/MultirowResizer.tsx
index 20c6cd3df..4f58f3fa8 100644
--- a/src/client/views/collections/collectionMulticolumn/MultirowResizer.tsx
+++ b/src/client/views/collections/collectionMulticolumn/MultirowResizer.tsx
@@ -3,7 +3,7 @@ import { observer } from "mobx-react";
 import { observable, action } from "mobx";
 import { Doc } from "../../../../new_fields/Doc";
 import { NumCast, StrCast } from "../../../../new_fields/Types";
-import { HeightUnit } from "./CollectionMultirowView";
+import { DimUnit } from "./CollectionMultirowView";
 
 interface ResizerProps {
     height: number;
@@ -46,14 +46,14 @@ export default class ResizeBar extends React.Component<ResizerProps> {
         const unitLength = columnUnitLength();
         if (unitLength) {
             if (toNarrow) {
-                const { heightUnit, heightMagnitude } = toNarrow;
-                const scale = heightUnit === HeightUnit.Ratio ? unitLength : 1;
-                toNarrow.heightMagnitude = NumCast(heightMagnitude) - Math.abs(movementY) / scale;
+                const { dimUnit, dimMagnitude } = toNarrow;
+                const scale = dimUnit === DimUnit.Ratio ? unitLength : 1;
+                toNarrow.dimMagnitude = NumCast(dimMagnitude) - Math.abs(movementY) / scale;
             }
             if (this.resizeMode === ResizeMode.Pinned && toWiden) {
-                const { heightUnit, heightMagnitude } = toWiden;
-                const scale = heightUnit === HeightUnit.Ratio ? unitLength : 1;
-                toWiden.heightMagnitude = NumCast(heightMagnitude) + Math.abs(movementY) / scale;
+                const { dimUnit, dimMagnitude } = toWiden;
+                const scale = dimUnit === DimUnit.Ratio ? unitLength : 1;
+                toWiden.dimMagnitude = NumCast(dimMagnitude) + Math.abs(movementY) / scale;
             }
         }
     }
@@ -61,17 +61,17 @@ export default class ResizeBar extends React.Component<ResizerProps> {
     private get isActivated() {
         const { toTop, toBottom } = this.props;
         if (toTop && toBottom) {
-            if (StrCast(toTop.heightUnit) === HeightUnit.Pixel && StrCast(toBottom.heightUnit) === HeightUnit.Pixel) {
+            if (StrCast(toTop.dimUnit) === DimUnit.Pixel && StrCast(toBottom.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
         } else if (toTop) {
-            if (StrCast(toTop.heightUnit) === HeightUnit.Pixel) {
+            if (StrCast(toTop.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
         } else if (toBottom) {
-            if (StrCast(toBottom.heightUnit) === HeightUnit.Pixel) {
+            if (StrCast(toBottom.dimUnit) === DimUnit.Pixel) {
                 return false;
             }
             return true;
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index f7a38112d..3375c1141 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -188,8 +188,8 @@ def write_image(folder, name):
             "y": 10,
             "_width": min(800, native_width),
             "zIndex": 2,
-            "widthUnit": "*",
-            "widthMagnitude": 1
+            "dimUnit": "*",
+            "dimMagnitude": 1
         },
         "__type": "Doc"
     }
-- 
cgit v1.2.3-70-g09d2


From 983f51b62f4b869bdb86fc4b708098d02f0d749d Mon Sep 17 00:00:00 2001
From: Sam Wilkins <samwilkins333@gmail.com>
Date: Mon, 3 Feb 2020 17:57:09 -0500
Subject: added base64 encodings support for image upload, removed logs from
 scraper.py

---
 src/client/views/collections/CollectionSubView.tsx |  7 +++++-
 src/scraping/buxton/scraper.py                     | 14 ++++++------
 src/server/ApiManagers/DownloadManager.ts          |  6 +++--
 src/server/ApiManagers/GooglePhotosManager.ts      |  9 ++++++--
 src/server/ApiManagers/UploadManager.ts            |  3 ++-
 src/server/ApiManagers/UtilManager.ts              |  7 +++++-
 src/server/DashUploadUtils.ts                      | 26 ++++++++++++++++++----
 7 files changed, 54 insertions(+), 18 deletions(-)

(limited to 'src/scraping/buxton/scraper.py')

diff --git a/src/client/views/collections/CollectionSubView.tsx b/src/client/views/collections/CollectionSubView.tsx
index 0eeb1c83d..9cdd48089 100644
--- a/src/client/views/collections/CollectionSubView.tsx
+++ b/src/client/views/collections/CollectionSubView.tsx
@@ -254,7 +254,12 @@ export function CollectionSubView<T>(schemaCtor: (doc: Doc) => T) {
                 const img = tags[0].startsWith("img") ? tags[0] : tags.length > 1 && tags[1].startsWith("img") ? tags[1] : "";
                 if (img) {
                     const split = img.split("src=\"")[1].split("\"")[0];
-                    const doc = Docs.Create.ImageDocument(split, { ...options, _width: 300 });
+                    let source = split;
+                    if (split.startsWith("data:image") && split.includes("base64")) {
+                        const [{ clientAccessPath }] = await Networking.PostToServer("/uploadRemoteImage", { sources: [split] });
+                        source = Utils.prepend(clientAccessPath);
+                    }
+                    const doc = Docs.Create.ImageDocument(source, { ...options, _width: 300 });
                     ImageUtils.ExtractExif(doc);
                     this.props.addDocument(doc);
                     return;
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index f7a38112d..c502ac30c 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -115,8 +115,8 @@ def write_collection(parse_results, display_fields, storage_key, viewType):
     target_collection.insert_one(view_doc)
 
     data_doc_guid = data_doc["_id"]
-    print(f"inserted view document ({view_doc_guid})")
-    print(f"inserted data document ({data_doc_guid})\n")
+    # print(f"inserted view document ({view_doc_guid})")
+    # print(f"inserted data document ({data_doc_guid})\n")
 
     return view_doc_guid
 
@@ -233,7 +233,7 @@ def parse_document(file_name: str):
     result = {}
 
     dir_path = image_dist + "/" + pure_name
-    print(dir_path)
+    # print(dir_path)
     mkdir_if_absent(dir_path)
 
     raw = str(docx2txt.process(source + "/" + file_name, dir_path))
@@ -252,7 +252,7 @@ def parse_document(file_name: str):
             medium = dir_path + "/" + image.replace(".", "_m.", 1)
             copyfile(resolved, original)
             copyfile(resolved, medium)
-    print(f"extracted {count} images...")
+    # print(f"extracted {count} images...")
 
     def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace(
         u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''').strip()
@@ -360,7 +360,7 @@ def parse_document(file_name: str):
     if len(notes) > 0:
         result["notes"] = listify(notes)
 
-    print("writing child schema...")
+    # print("writing child schema...")
 
     return {
         "schema": {
@@ -392,7 +392,7 @@ def write_common_proto():
 
 
 if os.path.exists(image_dist):
-    shutil.rmtree(image_dist)
+    shutil.rmtree(image_dist, True)
 while os.path.exists(image_dist):
     pass
 os.mkdir(image_dist)
@@ -415,7 +415,7 @@ parent_guid = write_collection({
         "__type": "Doc"
     },
     "child_guids": schema_guids
-}, ["title", "short_description", "original_price"], "data", 2)
+}, ["title", "short_description", "original_price"], "data", 4)
 
 print("appending parent schema to main workspace...\n")
 target_collection.update_one(
diff --git a/src/server/ApiManagers/DownloadManager.ts b/src/server/ApiManagers/DownloadManager.ts
index 1bb84f374..fad5e6789 100644
--- a/src/server/ApiManagers/DownloadManager.ts
+++ b/src/server/ApiManagers/DownloadManager.ts
@@ -254,11 +254,13 @@ async function writeHierarchyRecursive(file: Archiver.Archiver, hierarchy: Hiera
                 // and dropped in the browser and thus hosted remotely) so we upload it
                 // to our server and point the zip file to it, so it can bundle up the bytes
                 const information = await DashUploadUtils.UploadImage(result);
-                path = information.serverAccessPaths[SizeSuffix.Original];
+                path = information instanceof Error ? "" : information.serverAccessPaths[SizeSuffix.Original];
             }
             // write the file specified by the path to the directory in the
             // zip file given by the prefix.
-            file.file(path, { name: documentTitle, prefix });
+            if (path) {
+                file.file(path, { name: documentTitle, prefix });
+            }
         } else {
             // we've hit a collection, so we have to recurse
             await writeHierarchyRecursive(file, result, `${prefix}/${documentTitle}`);
diff --git a/src/server/ApiManagers/GooglePhotosManager.ts b/src/server/ApiManagers/GooglePhotosManager.ts
index 107542ce2..1727cc5a6 100644
--- a/src/server/ApiManagers/GooglePhotosManager.ts
+++ b/src/server/ApiManagers/GooglePhotosManager.ts
@@ -88,8 +88,13 @@ export default class GooglePhotosManager extends ApiManager {
                 if (contents) {
                     const completed: Opt<DashUploadUtils.ImageUploadInformation>[] = [];
                     for (const item of contents.mediaItems) {
-                        const { contentSize, ...attributes } = await DashUploadUtils.InspectImage(item.baseUrl);
-                        const found: Opt<DashUploadUtils.ImageUploadInformation> = await Database.Auxiliary.QueryUploadHistory(contentSize!);
+                        const results = await DashUploadUtils.InspectImage(item.baseUrl);
+                        if (results instanceof Error) {
+                            failed++;
+                            continue;
+                        }
+                        const { contentSize, ...attributes } = results;
+                        const found: Opt<DashUploadUtils.ImageUploadInformation> = await Database.Auxiliary.QueryUploadHistory(contentSize);
                         if (!found) {
                             const upload = await DashUploadUtils.UploadInspectedImage({ contentSize, ...attributes }, item.filename, prefix).catch(error => _error(res, downloadError, error));
                             if (upload) {
diff --git a/src/server/ApiManagers/UploadManager.ts b/src/server/ApiManagers/UploadManager.ts
index a92b613b7..4d09528f4 100644
--- a/src/server/ApiManagers/UploadManager.ts
+++ b/src/server/ApiManagers/UploadManager.ts
@@ -65,7 +65,8 @@ export default class UploadManager extends ApiManager {
             secureHandler: async ({ req, res }) => {
                 const { sources } = req.body;
                 if (Array.isArray(sources)) {
-                    return res.send(await Promise.all(sources.map(url => DashUploadUtils.UploadImage(url))));
+                    const results = await Promise.all(sources.map(source => DashUploadUtils.UploadImage(source)));
+                    return res.send(results);
                 }
                 res.send();
             }
diff --git a/src/server/ApiManagers/UtilManager.ts b/src/server/ApiManagers/UtilManager.ts
index a0d0d0f4b..d7b085a30 100644
--- a/src/server/ApiManagers/UtilManager.ts
+++ b/src/server/ApiManagers/UtilManager.ts
@@ -47,7 +47,12 @@ export default class UtilManager extends ApiManager {
 
                 const onResolved = (stdout: string) => { console.log(stdout); res.redirect("/"); };
                 const onRejected = (err: any) => { console.error(err.message); res.send(err); };
-                const tryPython3 = () => command_line('python3 scraper.py', cwd).then(onResolved, onRejected);
+                const tryPython3 = (reason: any) => {
+                    console.log("Initial scraper failed for the following reason:");
+                    console.log(red(reason.Error));
+                    console.log("Falling back to python3...");
+                    command_line('python3 scraper.py', cwd).then(onResolved, onRejected);
+                };
 
                 return command_line('python scraper.py', cwd).then(onResolved, tryPython3);
             },
diff --git a/src/server/DashUploadUtils.ts b/src/server/DashUploadUtils.ts
index cb7104757..27c4bf854 100644
--- a/src/server/DashUploadUtils.ts
+++ b/src/server/DashUploadUtils.ts
@@ -1,4 +1,4 @@
-import { unlinkSync, createWriteStream, readFileSync, rename } from 'fs';
+import { unlinkSync, createWriteStream, readFileSync, rename, writeFile } from 'fs';
 import { Utils } from '../Utils';
 import * as path from 'path';
 import * as sharp from 'sharp';
@@ -127,9 +127,12 @@ export namespace DashUploadUtils {
      * 3) the size of the image, in bytes (4432130)
      * 4) the content type of the image, i.e. image/(jpeg | png | ...)
      */
-    export const UploadImage = async (source: string, filename?: string, format?: string, prefix: string = ""): Promise<ImageUploadInformation> => {
+    export const UploadImage = async (source: string, filename?: string, format?: string, prefix: string = ""): Promise<ImageUploadInformation | Error> => {
         const metadata = await InspectImage(source);
-        return UploadInspectedImage(metadata, filename, format, prefix);
+        if (metadata instanceof Error) {
+            return metadata;
+        }
+        return UploadInspectedImage(metadata, filename || metadata.filename, format, prefix);
     };
 
     export interface InspectionResults {
@@ -140,6 +143,7 @@ export namespace DashUploadUtils {
         contentType: string;
         nativeWidth: number;
         nativeHeight: number;
+        filename?: string;
     }
 
     export interface EnrichedExifData {
@@ -164,7 +168,20 @@ export namespace DashUploadUtils {
      * 
      * @param source is the path or url to the image in question
      */
-    export const InspectImage = async (source: string): Promise<InspectionResults> => {
+    export const InspectImage = async (source: string): Promise<InspectionResults | Error> => {
+        let rawMatches: RegExpExecArray | null;
+        let filename: string | undefined;
+        if ((rawMatches = /^data:image\/([a-z]+);base64,(.*)/.exec(source)) !== null) {
+            const [ext, data] = rawMatches.slice(1, 3);
+            const resolved = filename = `upload_${Utils.GenerateGuid()}.${ext}`;
+            const error = await new Promise<Error | null>(resolve => {
+                writeFile(serverPathToFile(Directory.images, resolved), data, "base64", resolve);
+            });
+            if (error !== null) {
+                return error;
+            }
+            source = `http://localhost:1050${clientPathToFile(Directory.images, resolved)}`;
+        }
         let resolvedUrl: string;
         const matches = isLocal().exec(source);
         if (matches === null) {
@@ -187,6 +204,7 @@ export namespace DashUploadUtils {
             contentType: headers[type],
             nativeWidth,
             nativeHeight,
+            filename,
             ...results
         };
     };
-- 
cgit v1.2.3-70-g09d2