issue_comments: 1248621072
This data as json
html_url | issue_url | id | node_id | user | created_at | updated_at | author_association | body | reactions | issue | performed_via_github_app |
---|---|---|---|---|---|---|---|---|---|---|---|
https://github.com/simonw/sqlite-utils/issues/489#issuecomment-1248621072 | https://api.github.com/repos/simonw/sqlite-utils/issues/489 | 1248621072 | IC_kwDOCGYnMM5KbHIQ | 9599 | 2022-09-15T20:56:09Z | 2022-09-15T20:56:09Z | OWNER | Prototype so far: ```diff diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 767b170..d96c507 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1762,6 +1762,17 @@ def query( is_flag=True, help="Analyze resulting tables and output results", ) +@click.option("--key", help="read data from this key of the root object") +@click.option( + "--auto-key", + is_flag=True, + help="Find a key in the root object that is a list of objects", +) +@click.option( + "--analyze", + is_flag=True, + help="Analyze resulting tables and output results", +) @load_extension_option def memory( paths, @@ -1784,6 +1795,8 @@ def memory( schema, dump, save, + key, + auto_key, analyze, load_extension, ): @@ -1838,7 +1851,9 @@ def memory( csv_table = stem stem_counts[stem] = stem_counts.get(stem, 1) + 1 csv_fp = csv_path.open("rb") - rows, format_used = rows_from_file(csv_fp, format=format, encoding=encoding) + rows, format_used = rows_from_file( + csv_fp, format=format, encoding=encoding, key=key, auto_key=auto_key + ) tracker = None if format_used in (Format.CSV, Format.TSV) and not no_detect_types: tracker = TypeTracker() diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py index 8754554..2e69c26 100644 --- a/sqlite_utils/utils.py +++ b/sqlite_utils/utils.py @@ -231,6 +231,8 @@ def rows_from_file( encoding: Optional[str] = None, ignore_extras: Optional[bool] = False, extras_key: Optional[str] = None, + key: Optional[str] = None, + auto_key: Optional[bool] = False, ) -> Tuple[Iterable[dict], Format]: """ Load a sequence of dictionaries from a file-like object containing one of four different formats. @@ -271,13 +273,31 @@ def rows_from_file( :param encoding: the character encoding to use when reading CSV/TSV data :param ignore_extras: ignore any extra fields on rows :param extras_key: put any extra fields in a list with this key + :param key: read data from this key of the root object + :param auto_key: find a key in the root object that is a list of objects """ if ignore_extras and extras_key: raise ValueError("Cannot use ignore_extras= and extras_key= together") + if key and auto_key: + raise ValueError("Cannot use key= and auto_key= together") if format == Format.JSON: decoded = json.load(fp) if isinstance(decoded, dict): - decoded = [decoded] + if auto_key: + list_keys = [ + k + for k in decoded + if isinstance(decoded[k], list) + and decoded[k] + and all(isinstance(o, dict) for o in decoded[k]) + ] + if len(list_keys) == 1: + decoded = decoded[list_keys[0]] + elif key: + # Raises KeyError, I think that's OK + decoded = decoded[key] + if not isinstance(decoded, list): + decoded = [decoded] if not isinstance(decoded, list): raise RowsFromFileBadJSON("JSON must be a list or a dictionary") return decoded, Format.JSON @@ -305,7 +325,9 @@ def rows_from_file( first_bytes = buffered.peek(2048).strip() if first_bytes.startswith(b"[") or first_bytes.startswith(b"{"): # TODO: Detect newline-JSON - return rows_from_file(buffered, format=Format.JSON) + return rows_from_file( + buffered, format=Format.JSON, key=key, auto_key=auto_key + ) else: dialect = csv.Sniffer().sniff( first_bytes.decode(encoding or "utf-8-sig", "ignore") ``` | {"total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0} | 1374939463 |