add script for dumping full bugzilla database as json
authorJacob Lifshay <programmerjake@gmail.com>
Wed, 24 Jan 2024 04:46:39 +0000 (20:46 -0800)
committerJacob Lifshay <programmerjake@gmail.com>
Wed, 24 Jan 2024 04:52:26 +0000 (20:52 -0800)
.gitignore
src/budget_sync/dump_full.py [new file with mode: 0644]

index 7b61abc69dac9df9c5e4b4a4997b98e39d10189b..a5075135743459aeb501ede09addd8b80b75af07 100644 (file)
@@ -6,3 +6,4 @@ __pycache__
 /task_db
 .coverage
 cov.xml
+/dump
diff --git a/src/budget_sync/dump_full.py b/src/budget_sync/dump_full.py
new file mode 100644 (file)
index 0000000..2cbb680
--- /dev/null
@@ -0,0 +1,64 @@
+from budget_sync.util import all_bugs
+from budget_sync.config import Config, ConfigParseError
+from bugzilla import Bugzilla
+import logging
+import argparse
+from pathlib import Path
+import json
+from xmlrpc.client import DateTime
+
+
+def _encode_json(obj):
+    if isinstance(obj, DateTime):
+        return str(obj)
+    raise TypeError(type(obj))
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Dump all bugzilla data accessible from API")
+    parser.add_argument(
+        "-c", "--config", type=argparse.FileType('r'),
+        required=True, help="The path to the configuration TOML file",
+        dest="config", metavar="<path/to/budget-sync-config.toml>")
+    parser.add_argument(
+        "-o", "--output-dir", type=Path, required=True,
+        help="The path to the output directory, will be created if it "
+        "doesn't exist",
+        dest="output_dir", metavar="<path/to/output/dir>")
+    args = parser.parse_args()
+    try:
+        with args.config as config_file:
+            config = Config.from_file(config_file)
+    except (IOError, ConfigParseError) as e:
+        logging.error("Failed to parse config file: %s", e)
+        return
+    logging.info("Using Bugzilla instance at %s", config.bugzilla_url)
+    bz = Bugzilla(config.bugzilla_url)
+    output_dir = args.output_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for bug in all_bugs(bz):
+        bug_json = {}
+        bug_json_path = output_dir / ("bug-%d.json" % (bug.id,))
+        bug_json["data"] = bug.get_raw_data()
+        bug_json["history"] = bug.get_history_raw()
+        bug_json["comments"] = bug.getcomments()
+        attachments = [a.copy() for a in bug.get_attachments()]
+        bug_json["attachments"] = attachments
+        for a in attachments:
+            data = a.pop("data", None)
+            if data is None:
+                continue
+            file_name = "attachment-%d.dat" % (a['id'],)
+            a['data'] = file_name
+            attachment_path = output_dir / file_name
+            attachment_path.write_bytes(data.data)
+            logging.info("Wrote %s", attachment_path)
+        bug_json = json.dumps(bug_json, indent=4, default=_encode_json)
+        bug_json_path.write_text(bug_json, encoding="utf-8")
+        logging.info("Wrote %s", bug_json_path)
+
+
+if __name__ == "__main__":
+    main()