From 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e Mon Sep 17 00:00:00 2001 From: sotech117 Date: Thu, 31 Jul 2025 17:27:24 -0400 Subject: add code for analysis of data --- .../python3.8/site-packages/_plotly_utils/utils.py | 557 +++++++++++++++++++++ 1 file changed, 557 insertions(+) create mode 100644 venv/lib/python3.8/site-packages/_plotly_utils/utils.py (limited to 'venv/lib/python3.8/site-packages/_plotly_utils/utils.py') diff --git a/venv/lib/python3.8/site-packages/_plotly_utils/utils.py b/venv/lib/python3.8/site-packages/_plotly_utils/utils.py new file mode 100644 index 0000000..07a2b9e --- /dev/null +++ b/venv/lib/python3.8/site-packages/_plotly_utils/utils.py @@ -0,0 +1,557 @@ +import base64 +import decimal +import json as _json +import sys +import re +from functools import reduce + +from _plotly_utils.optional_imports import get_module +from _plotly_utils.basevalidators import ( + ImageUriValidator, + copy_to_readonly_numpy_array, + is_homogeneous_array, +) + + +int8min = -128 +int8max = 127 +int16min = -32768 +int16max = 32767 +int32min = -2147483648 +int32max = 2147483647 + +uint8max = 255 +uint16max = 65535 +uint32max = 4294967295 + +plotlyjsShortTypes = { + "int8": "i1", + "uint8": "u1", + "int16": "i2", + "uint16": "u2", + "int32": "i4", + "uint32": "u4", + "float32": "f4", + "float64": "f8", +} + + +def to_typed_array_spec(v): + """ + Convert numpy array to plotly.js typed array spec + If not possible return the original value + """ + v = copy_to_readonly_numpy_array(v) + + # Skip b64 encoding if numpy is not installed, + # or if v is not a numpy array, or if v is empty + np = get_module("numpy", should_load=False) + if not np or not isinstance(v, np.ndarray) or v.size == 0: + return v + + dtype = str(v.dtype) + + # convert default Big Ints until we could support them in plotly.js + if dtype == "int64": + max = v.max() + min = v.min() + if max <= int8max and min >= int8min: + v = v.astype("int8") + elif max <= int16max and min >= int16min: + v = v.astype("int16") + elif max <= int32max and min >= int32min: + v = v.astype("int32") + else: + return v + + elif dtype == "uint64": + max = v.max() + min = v.min() + if max <= uint8max and min >= 0: + v = v.astype("uint8") + elif max <= uint16max and min >= 0: + v = v.astype("uint16") + elif max <= uint32max and min >= 0: + v = v.astype("uint32") + else: + return v + + dtype = str(v.dtype) + + if dtype in plotlyjsShortTypes: + arrObj = { + "dtype": plotlyjsShortTypes[dtype], + "bdata": base64.b64encode(v).decode("ascii"), + } + + if v.ndim > 1: + arrObj["shape"] = str(v.shape)[1:-1] + + return arrObj + + return v + + +def is_skipped_key(key): + """ + Return whether the key is skipped for conversion to the typed array spec + """ + skipped_keys = ["geojson", "layer", "layers", "range"] + return any(skipped_key == key for skipped_key in skipped_keys) + + +def convert_to_base64(obj): + if isinstance(obj, dict): + for key, value in obj.items(): + if is_skipped_key(key): + continue + elif is_homogeneous_array(value): + obj[key] = to_typed_array_spec(value) + else: + convert_to_base64(value) + elif isinstance(obj, list) or isinstance(obj, tuple): + for value in obj: + convert_to_base64(value) + + +def cumsum(x): + """ + Custom cumsum to avoid a numpy import. + """ + + def _reducer(a, x): + if len(a) == 0: + return [x] + return a + [a[-1] + x] + + ret = reduce(_reducer, x, []) + return ret + + +class PlotlyJSONEncoder(_json.JSONEncoder): + """ + Meant to be passed as the `cls` kwarg to json.dumps(obj, cls=..) + + See PlotlyJSONEncoder.default for more implementation information. + + Additionally, this encoder overrides nan functionality so that 'Inf', + 'NaN' and '-Inf' encode to 'null'. Which is stricter JSON than the Python + version. + + """ + + def coerce_to_strict(self, const): + """ + This is used to ultimately *encode* into strict JSON, see `encode` + + """ + # before python 2.7, 'true', 'false', 'null', were include here. + if const in ("Infinity", "-Infinity", "NaN"): + return None + else: + return const + + def encode(self, o): + """ + Load and then dump the result using parse_constant kwarg + + Note that setting invalid separators will cause a failure at this step. + + """ + # this will raise errors in a normal-expected way + encoded_o = super(PlotlyJSONEncoder, self).encode(o) + # Brute force guessing whether NaN or Infinity values are in the string + # We catch false positive cases (e.g. strings such as titles, labels etc.) + # but this is ok since the intention is to skip the decoding / reencoding + # step when it's completely safe + + if not ("NaN" in encoded_o or "Infinity" in encoded_o): + return encoded_o + + # now: + # 1. `loads` to switch Infinity, -Infinity, NaN to None + # 2. `dumps` again so you get 'null' instead of extended JSON + try: + new_o = _json.loads(encoded_o, parse_constant=self.coerce_to_strict) + except ValueError: + # invalid separators will fail here. raise a helpful exception + raise ValueError( + "Encoding into strict JSON failed. Did you set the separators " + "valid JSON separators?" + ) + else: + return _json.dumps( + new_o, + sort_keys=self.sort_keys, + indent=self.indent, + separators=(self.item_separator, self.key_separator), + ) + + def default(self, obj): + """ + Accept an object (of unknown type) and try to encode with priority: + 1. builtin: user-defined objects + 2. sage: sage math cloud + 3. pandas: dataframes/series + 4. numpy: ndarrays + 5. datetime: time/datetime objects + + Each method throws a NotEncoded exception if it fails. + + The default method will only get hit if the object is not a type that + is naturally encoded by json: + + Normal objects: + dict object + list, tuple array + str, unicode string + int, long, float number + True true + False false + None null + + Extended objects: + float('nan') 'NaN' + float('infinity') 'Infinity' + float('-infinity') '-Infinity' + + Therefore, we only anticipate either unknown iterables or values here. + + """ + # TODO: The ordering if these methods is *very* important. Is this OK? + encoding_methods = ( + self.encode_as_plotly, + self.encode_as_sage, + self.encode_as_numpy, + self.encode_as_pandas, + self.encode_as_datetime, + self.encode_as_date, + self.encode_as_list, # because some values have `tolist` do last. + self.encode_as_decimal, + self.encode_as_pil, + ) + for encoding_method in encoding_methods: + try: + return encoding_method(obj) + except NotEncodable: + pass + return _json.JSONEncoder.default(self, obj) + + @staticmethod + def encode_as_plotly(obj): + """Attempt to use a builtin `to_plotly_json` method.""" + try: + return obj.to_plotly_json() + except AttributeError: + raise NotEncodable + + @staticmethod + def encode_as_list(obj): + """Attempt to use `tolist` method to convert to normal Python list.""" + if hasattr(obj, "tolist"): + return obj.tolist() + else: + raise NotEncodable + + @staticmethod + def encode_as_sage(obj): + """Attempt to convert sage.all.RR to floats and sage.all.ZZ to ints""" + sage_all = get_module("sage.all") + if not sage_all: + raise NotEncodable + + if obj in sage_all.RR: + return float(obj) + elif obj in sage_all.ZZ: + return int(obj) + else: + raise NotEncodable + + @staticmethod + def encode_as_pandas(obj): + """Attempt to convert pandas.NaT / pandas.NA""" + pandas = get_module("pandas", should_load=False) + if not pandas: + raise NotEncodable + + if obj is pandas.NaT: + return None + + # pandas.NA was introduced in pandas 1.0 + if hasattr(pandas, "NA") and obj is pandas.NA: + return None + + raise NotEncodable + + @staticmethod + def encode_as_numpy(obj): + """Attempt to convert numpy.ma.core.masked""" + numpy = get_module("numpy", should_load=False) + if not numpy: + raise NotEncodable + + if obj is numpy.ma.core.masked: + return float("nan") + elif isinstance(obj, numpy.ndarray) and obj.dtype.kind == "M": + try: + return numpy.datetime_as_string(obj).tolist() + except TypeError: + pass + + raise NotEncodable + + @staticmethod + def encode_as_datetime(obj): + """Convert datetime objects to iso-format strings""" + try: + return obj.isoformat() + except AttributeError: + raise NotEncodable + + @staticmethod + def encode_as_date(obj): + """Attempt to convert to utc-iso time string using date methods.""" + try: + time_string = obj.isoformat() + except AttributeError: + raise NotEncodable + else: + return iso_to_plotly_time_string(time_string) + + @staticmethod + def encode_as_decimal(obj): + """Attempt to encode decimal by converting it to float""" + if isinstance(obj, decimal.Decimal): + return float(obj) + else: + raise NotEncodable + + @staticmethod + def encode_as_pil(obj): + """Attempt to convert PIL.Image.Image to base64 data uri""" + image = get_module("PIL.Image") + if image is not None and isinstance(obj, image.Image): + return ImageUriValidator.pil_image_to_uri(obj) + else: + raise NotEncodable + + +class NotEncodable(Exception): + pass + + +def iso_to_plotly_time_string(iso_string): + """Remove timezone info and replace 'T' delimeter with ' ' (ws).""" + # make sure we don't send timezone info to plotly + if (iso_string.split("-")[:3] == "00:00") or (iso_string.split("+")[0] == "00:00"): + raise Exception( + "Plotly won't accept timestrings with timezone info.\n" + "All timestrings are assumed to be in UTC." + ) + + iso_string = iso_string.replace("-00:00", "").replace("+00:00", "") + + if iso_string.endswith("T00:00:00"): + return iso_string.replace("T00:00:00", "") + else: + return iso_string.replace("T", " ") + + +def template_doc(**names): + def _decorator(func): + if not sys.version_info[:2] == (3, 2): + if func.__doc__ is not None: + func.__doc__ = func.__doc__.format(**names) + return func + + return _decorator + + +def _natural_sort_strings(vals, reverse=False): + def key(v): + v_parts = re.split(r"(\d+)", v) + for i in range(len(v_parts)): + try: + v_parts[i] = int(v_parts[i]) + except ValueError: + # not an int + pass + return tuple(v_parts) + + return sorted(vals, key=key, reverse=reverse) + + +def _get_int_type(): + np = get_module("numpy", should_load=False) + if np: + int_type = (int, np.integer) + else: + int_type = (int,) + return int_type + + +def split_multichar(ss, chars): + """ + Split all the strings in ss at any of the characters in chars. + Example: + + >>> ss = ["a.string[0].with_separators"] + >>> chars = list(".[]_") + >>> split_multichar(ss, chars) + ['a', 'string', '0', '', 'with', 'separators'] + + :param (list) ss: A list of strings. + :param (list) chars: Is a list of chars (note: not a string). + """ + if len(chars) == 0: + return ss + c = chars.pop() + ss = reduce(lambda x, y: x + y, map(lambda x: x.split(c), ss)) + return split_multichar(ss, chars) + + +def split_string_positions(ss): + """ + Given a list of strings split using split_multichar, return a list of + integers representing the indices of the first character of every string in + the original string. + Example: + + >>> ss = ["a.string[0].with_separators"] + >>> chars = list(".[]_") + >>> ss_split = split_multichar(ss, chars) + >>> ss_split + ['a', 'string', '0', '', 'with', 'separators'] + >>> split_string_positions(ss_split) + [0, 2, 9, 11, 12, 17] + + :param (list) ss: A list of strings. + """ + return list( + map( + lambda t: t[0] + t[1], + zip(range(len(ss)), cumsum([0] + list(map(len, ss[:-1])))), + ) + ) + + +def display_string_positions(p, i=None, offset=0, length=1, char="^", trim=True): + """ + Return a string that is whitespace except at p[i] which is replaced with char. + If i is None then all the indices of the string in p are replaced with char. + + Example: + + >>> ss = ["a.string[0].with_separators"] + >>> chars = list(".[]_") + >>> ss_split = split_multichar(ss, chars) + >>> ss_split + ['a', 'string', '0', '', 'with', 'separators'] + >>> ss_pos = split_string_positions(ss_split) + >>> ss[0] + 'a.string[0].with_separators' + >>> display_string_positions(ss_pos,4) + ' ^' + >>> display_string_positions(ss_pos,4,offset=1,length=3,char="~",trim=False) + ' ~~~ ' + >>> display_string_positions(ss_pos) + '^ ^ ^ ^^ ^' + :param (list) p: A list of integers. + :param (integer|None) i: Optional index of p to display. + :param (integer) offset: Allows adding a number of spaces to the replacement. + :param (integer) length: Allows adding a replacement that is the char + repeated length times. + :param (str) char: allows customizing the replacement character. + :param (boolean) trim: trims the remaining whitespace if True. + """ + s = [" " for _ in range(max(p) + 1 + offset + length)] + maxaddr = 0 + if i is None: + for p_ in p: + for temp in range(length): + maxaddr = p_ + offset + temp + s[maxaddr] = char + else: + for temp in range(length): + maxaddr = p[i] + offset + temp + s[maxaddr] = char + ret = "".join(s) + if trim: + ret = ret[: maxaddr + 1] + return ret + + +def chomp_empty_strings(strings, c, reverse=False): + """ + Given a list of strings, some of which are the empty string "", replace the + empty strings with c and combine them with the closest non-empty string on + the left or "" if it is the first string. + Examples: + for c="_" + ['hey', '', 'why', '', '', 'whoa', '', ''] -> ['hey_', 'why__', 'whoa__'] + ['', 'hi', '', "I'm", 'bob', '', ''] -> ['_', 'hi_', "I'm", 'bob__'] + ['hi', "i'm", 'a', 'good', 'string'] -> ['hi', "i'm", 'a', 'good', 'string'] + Some special cases are: + [] -> [] + [''] -> [''] + ['', ''] -> ['_'] + ['', '', '', ''] -> ['___'] + If reverse is true, empty strings are combined with closest non-empty string + on the right or "" if it is the last string. + """ + + def _rev(vals): + return [s[::-1] for s in vals][::-1] + + if reverse: + return _rev(chomp_empty_strings(_rev(strings), c)) + if not len(strings): + return strings + if sum(map(len, strings)) == 0: + return [c * (len(strings) - 1)] + + class _Chomper: + def __init__(self, c): + self.c = c + + def __call__(self, x, y): + # x is list up to now + # y is next item in list + # x should be [""] initially, and then empty strings filtered out at the + # end + if len(y) == 0: + return x[:-1] + [x[-1] + self.c] + else: + return x + [y] + + return list(filter(len, reduce(_Chomper(c), strings, [""]))) + + +# taken from +# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python +def levenshtein(s1, s2): + if len(s1) < len(s2): + return levenshtein(s2, s1) # len(s1) >= len(s2) + if len(s2) == 0: + return len(s1) + previous_row = range(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + # j+1 instead of j since previous_row and current_row are one character longer + # than s2 + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] + + +def find_closest_string(string, strings): + def _key(s): + # sort by levenshtein distance and lexographically to maintain a stable + # sort for different keys with the same levenshtein distance + return (levenshtein(s, string), s) + + return sorted(strings, key=_key)[0] -- cgit v1.2.3-70-g09d2