diff --git a/.gitignore b/.gitignore
index 4a1abea..04b7ce2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,4 @@ fabric.properties
# Don't include the full snapshot ZIP since it's massive.
kiva_ds_json.zip
+*.json
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index e212a38..56993a3 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -17,23 +17,7 @@
-
+
-
-
-
-
- Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe)
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/kiva-dig.iml b/kiva-dig.iml
index 9468285..78c0c16 100644
--- a/kiva-dig.iml
+++ b/kiva-dig.iml
@@ -7,8 +7,11 @@
-
+
+
+
+
\ No newline at end of file
diff --git a/unpack_kiva.py b/unpack_kiva.py
new file mode 100644
index 0000000..a213e06
--- /dev/null
+++ b/unpack_kiva.py
@@ -0,0 +1,41 @@
+import zipfile
+import json
+import os
+
+kiva_folder = 'kiva-data/'
+
+def mkdirs():
+ os.mkdir(kiva_folder)
+ for f in map(lambda x: kiva_folder + x,
+ ['loans', 'lenders', 'loans_lenders']):
+ if not os.path.isdir(f):
+ os.mkdir(f)
+
+
+def reformat_json(json_obj):
+ return json.dumps(json_obj, sort_keys=True, separators=(',', ':'))
+
+
+def unpack_kiva(filename="kiva_ds_json.zip"):
+ if not zipfile.is_zipfile(filename):
+ raise TypeError("Unable to unpack zip - Corrupted file?")
+
+ z = zipfile.ZipFile(filename)
+ names = z.namelist()
+ for json_name in filter(lambda x: 'json' in x, names):
+ try:
+ json_file = z.open(json_name)
+ json_string = json_file.read().decode('utf8')
+ json_obj = json.loads(json_string)
+ # Get `loan`, `lender`, etc.
+ obj_type = json_name.split('/')[0]
+ json_content = json_obj[obj_type]
+ formatted = [reformat_json(j) for j in json_content]
+ with open(kiva_folder + json_name, 'w+') as output:
+ output.write('\n'.join(formatted))
+ except json.JSONDecodeError:
+ print("Error decoding file {}".format(json_name))
+
+if __name__ == '__main__':
+ mkdirs()
+ unpack_kiva()