mirror of
https://github.com/bspeice/kiva-dig
synced 2024-12-03 20:28:10 -05:00
Add code to unpack the Kiva data for Spark
This commit is contained in:
parent
ab3c48853c
commit
71627e160e
1
.gitignore
vendored
1
.gitignore
vendored
@ -47,3 +47,4 @@ fabric.properties
|
||||
|
||||
# Don't include the full snapshot ZIP since it's massive.
|
||||
kiva_ds_json.zip
|
||||
*.json
|
@ -17,23 +17,7 @@
|
||||
<ConfirmationsSetting value="0" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="true" assert-keyword="false" jdk-15="false">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="false" assert-keyword="false" jdk-15="false" project-jdk-name="Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe)" project-jdk-type="Python SDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
<component name="masterDetails">
|
||||
<states>
|
||||
<state key="ProjectJDKs.UI">
|
||||
<settings>
|
||||
<last-edited>Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe)</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
</states>
|
||||
</component>
|
||||
</project>
|
@ -7,8 +7,11 @@
|
||||
</component>
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/kiva-data" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Python 3.5.1 (C:\Users\Bradlee Speice\Anaconda3\python.exe) interpreter library" level="application" />
|
||||
</component>
|
||||
</module>
|
41
unpack_kiva.py
Normal file
41
unpack_kiva.py
Normal file
@ -0,0 +1,41 @@
|
||||
import zipfile
|
||||
import json
|
||||
import os
|
||||
|
||||
kiva_folder = 'kiva-data/'
|
||||
|
||||
def mkdirs():
|
||||
os.mkdir(kiva_folder)
|
||||
for f in map(lambda x: kiva_folder + x,
|
||||
['loans', 'lenders', 'loans_lenders']):
|
||||
if not os.path.isdir(f):
|
||||
os.mkdir(f)
|
||||
|
||||
|
||||
def reformat_json(json_obj):
|
||||
return json.dumps(json_obj, sort_keys=True, separators=(',', ':'))
|
||||
|
||||
|
||||
def unpack_kiva(filename="kiva_ds_json.zip"):
|
||||
if not zipfile.is_zipfile(filename):
|
||||
raise TypeError("Unable to unpack zip - Corrupted file?")
|
||||
|
||||
z = zipfile.ZipFile(filename)
|
||||
names = z.namelist()
|
||||
for json_name in filter(lambda x: 'json' in x, names):
|
||||
try:
|
||||
json_file = z.open(json_name)
|
||||
json_string = json_file.read().decode('utf8')
|
||||
json_obj = json.loads(json_string)
|
||||
# Get `loan`, `lender`, etc.
|
||||
obj_type = json_name.split('/')[0]
|
||||
json_content = json_obj[obj_type]
|
||||
formatted = [reformat_json(j) for j in json_content]
|
||||
with open(kiva_folder + json_name, 'w+') as output:
|
||||
output.write('\n'.join(formatted))
|
||||
except json.JSONDecodeError:
|
||||
print("Error decoding file {}".format(json_name))
|
||||
|
||||
if __name__ == '__main__':
|
||||
mkdirs()
|
||||
unpack_kiva()
|
Loading…
Reference in New Issue
Block a user