mirror of
https://github.com/bspeice/kiva-dig
synced 2024-12-04 12:48:10 -05:00
Add code to unpack the Kiva data for Spark
This commit is contained in:
parent
ab3c48853c
commit
71627e160e
1
.gitignore
vendored
1
.gitignore
vendored
@ -47,3 +47,4 @@ fabric.properties
|
|||||||
|
|
||||||
# Don't include the full snapshot ZIP since it's massive.
|
# Don't include the full snapshot ZIP since it's massive.
|
||||||
kiva_ds_json.zip
|
kiva_ds_json.zip
|
||||||
|
*.json
|
@ -17,23 +17,7 @@
|
|||||||
<ConfirmationsSetting value="0" id="Add" />
|
<ConfirmationsSetting value="0" id="Add" />
|
||||||
<ConfirmationsSetting value="0" id="Remove" />
|
<ConfirmationsSetting value="0" id="Remove" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="true" assert-keyword="false" jdk-15="false">
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="false" assert-keyword="false" jdk-15="false" project-jdk-name="Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe)" project-jdk-type="Python SDK">
|
||||||
<output url="file://$PROJECT_DIR$/out" />
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
</component>
|
</component>
|
||||||
<component name="masterDetails">
|
|
||||||
<states>
|
|
||||||
<state key="ProjectJDKs.UI">
|
|
||||||
<settings>
|
|
||||||
<last-edited>Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe)</last-edited>
|
|
||||||
<splitter-proportions>
|
|
||||||
<option name="proportions">
|
|
||||||
<list>
|
|
||||||
<option value="0.2" />
|
|
||||||
</list>
|
|
||||||
</option>
|
|
||||||
</splitter-proportions>
|
|
||||||
</settings>
|
|
||||||
</state>
|
|
||||||
</states>
|
|
||||||
</component>
|
|
||||||
</project>
|
</project>
|
@ -7,8 +7,11 @@
|
|||||||
</component>
|
</component>
|
||||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
<exclude-output />
|
<exclude-output />
|
||||||
<content url="file://$MODULE_DIR$" />
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/kiva-data" />
|
||||||
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
<orderEntry type="library" name="Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe) interpreter library" level="application" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
41
unpack_kiva.py
Normal file
41
unpack_kiva.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import zipfile
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
kiva_folder = 'kiva-data/'
|
||||||
|
|
||||||
|
def mkdirs():
|
||||||
|
os.mkdir(kiva_folder)
|
||||||
|
for f in map(lambda x: kiva_folder + x,
|
||||||
|
['loans', 'lenders', 'loans_lenders']):
|
||||||
|
if not os.path.isdir(f):
|
||||||
|
os.mkdir(f)
|
||||||
|
|
||||||
|
|
||||||
|
def reformat_json(json_obj):
|
||||||
|
return json.dumps(json_obj, sort_keys=True, separators=(',', ':'))
|
||||||
|
|
||||||
|
|
||||||
|
def unpack_kiva(filename="kiva_ds_json.zip"):
|
||||||
|
if not zipfile.is_zipfile(filename):
|
||||||
|
raise TypeError("Unable to unpack zip - Corrupted file?")
|
||||||
|
|
||||||
|
z = zipfile.ZipFile(filename)
|
||||||
|
names = z.namelist()
|
||||||
|
for json_name in filter(lambda x: 'json' in x, names):
|
||||||
|
try:
|
||||||
|
json_file = z.open(json_name)
|
||||||
|
json_string = json_file.read().decode('utf8')
|
||||||
|
json_obj = json.loads(json_string)
|
||||||
|
# Get `loan`, `lender`, etc.
|
||||||
|
obj_type = json_name.split('/')[0]
|
||||||
|
json_content = json_obj[obj_type]
|
||||||
|
formatted = [reformat_json(j) for j in json_content]
|
||||||
|
with open(kiva_folder + json_name, 'w+') as output:
|
||||||
|
output.write('\n'.join(formatted))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Error decoding file {}".format(json_name))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
mkdirs()
|
||||||
|
unpack_kiva()
|
Loading…
Reference in New Issue
Block a user