merged w master

author: kimdahey <claire_kim1@brown.edu> 2020-01-16 11:31:41 -0500
committer: kimdahey <claire_kim1@brown.edu> 2020-01-16 11:31:41 -0500
commit: 6be0e19ed0bd13f3796f542affa5a2e52674650c (patch)
tree: 1be222ea9341ecd8020fad3149035fa650a8a07f /solr-8.1.1/example/films/film_data_generator.py
parent: 5cde81d8c6b4dcd8d0796f8669b668763957f395 (diff)
parent: e410cde0e430553002d4e1a2f64364b57b65fdbc (diff)
1 files changed, 0 insertions, 117 deletions
diff --git a/solr-8.1.1/example/films/film_data_generator.py b/solr-8.1.1/example/films/film_data_generator.py
deleted file mode 100644
index 7e2a46318..000000000
--- a/solr-8.1.1/example/films/film_data_generator.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This will generate a movie data set of 1100 records.
-These are the first 1100 movies which appear when querying the Freebase of type '/film/film'.
-Here is the link to the freebase page - https://www.freebase.com/film/film?schema=
-
-Usage - python3 film_data_generator.py
-"""
-
-import csv
-import copy
-import json
-import codecs
-import datetime
-import urllib.parse
-import urllib.request
-import xml.etree.cElementTree as ET
-from xml.dom import minidom
-
-MAX_ITERATIONS=10  #10 limits it to 1100 docs
-
-# You need an API Key by Google to run this
-API_KEY = '<insert your Google developer API key>'
-service_url = 'https://www.googleapis.com/freebase/v1/mqlread'
-query = [{
-  "id": None,
-  "name": None,
-  "initial_release_date": None,
-  "directed_by": [],
-  "genre": [],
-  "type": "/film/film",
-  "initial_release_date>" : "2000"
-}]
-
-def gen_csv(filmlist):
-  filmlistDup = copy.deepcopy(filmlist)
-  #Convert multi-valued to % delimited string
-  for film in filmlistDup:
-      for key in film:
-        if isinstance(film[key], list):
-          film[key] = '|'.join(film[key])
-  keys = ['name', 'directed_by', 'genre', 'type', 'id', 'initial_release_date']
-  with open('films.csv', 'w', newline='', encoding='utf8') as csvfile:
-    dict_writer = csv.DictWriter(csvfile, keys)
-    dict_writer.writeheader()
-    dict_writer.writerows(filmlistDup)
-
-def gen_json(filmlist):
-  filmlistDup = copy.deepcopy(filmlist)
-  with open('films.json', 'w') as jsonfile:
-    jsonfile.write(json.dumps(filmlist, indent=2))
-
-def gen_xml(filmlist):
-  root = ET.Element("add")
-  for film in filmlist:
-    doc = ET.SubElement(root, "doc")
-    for key in film:
-      if isinstance(film[key], list):
-        for value in film[key]:
-          field = ET.SubElement(doc, "field")
-          field.set("name", key)
-          field.text=value
-      else:
-        field = ET.SubElement(doc, "field")
-        field.set("name", key)
-        field.text=film[key]
-  tree = ET.ElementTree(root)
-  with open('films.xml', 'w') as f:
-    f.write( minidom.parseString(ET.tostring(tree.getroot(),'utf-8')).toprettyxml(indent="  ") )
-
-def do_query(filmlist, cursor=""):
-  params = {
-          'query': json.dumps(query),
-          'key': API_KEY,
-          'cursor': cursor
-  }
-  url = service_url + '?' + urllib.parse.urlencode(params)
-  data = urllib.request.urlopen(url).read().decode('utf-8')
-  response = json.loads(data)
-  for item in response['result']:
-    del item['type'] # It's always /film/film. No point of adding this.
-    try:
-      datetime.datetime.strptime(item['initial_release_date'], "%Y-%m-%d")
-    except ValueError:
-      #Date time not formatted properly. Keeping it simple by removing the date field from that doc
-      del item['initial_release_date']
-    filmlist.append(item)
-  return response.get("cursor")
-
-
-if __name__ == "__main__":
-  filmlist = []
-  cursor = do_query(filmlist)
-  i=0
-  while(cursor):
-      cursor = do_query(filmlist, cursor)
-      i = i+1
-      if i==MAX_ITERATIONS:
-          break
-
-  gen_json(filmlist)
-  gen_csv(filmlist)
-  gen_xml(filmlist)
author	kimdahey <claire_kim1@brown.edu>	2020-01-16 11:31:41 -0500
committer	kimdahey <claire_kim1@brown.edu>	2020-01-16 11:31:41 -0500
commit	6be0e19ed0bd13f3796f542affa5a2e52674650c (patch)
tree	1be222ea9341ecd8020fad3149035fa650a8a07f /solr-8.1.1/example/films/film_data_generator.py
parent	5cde81d8c6b4dcd8d0796f8669b668763957f395 (diff)
parent	e410cde0e430553002d4e1a2f64364b57b65fdbc (diff)