You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.7KB

  1. import zipfile
  2. import json
  3. import os
  4. try:
  5. from json import JSONDecodeError
  6. except ImportError:
  7. JSONDecodeError = ValueError # Python 2 compatibility
  8. kiva_root = 'kiva-data/'
  9. kiva_folders = ['loans', 'lenders', 'loans_lenders']
  10. def mkdirs():
  11. os.mkdir(kiva_root)
  12. for f in map(lambda x: kiva_root + x,
  13. kiva_folders):
  14. if not os.path.isdir(f):
  15. os.mkdir(f)
  16. def reformat_json(json_obj):
  17. return json.dumps(json_obj, sort_keys=True, separators=(',', ':'))
  18. def unpack_kiva(filename="kiva_ds_json.zip"):
  19. if not zipfile.is_zipfile(filename):
  20. raise TypeError("Unable to unpack zip - Corrupted file?")
  21. z = zipfile.ZipFile(filename)
  22. names = z.namelist()
  23. for json_name in filter(lambda x: 'json' in x, names):
  24. try:
  25. json_file = z.open(json_name)
  26. json_string = json_file.read().decode('utf8')
  27. json_obj = json.loads(json_string)
  28. # Get `loan`, `lender`, etc.
  29. obj_type = json_name.split('/')[0]
  30. json_content = json_obj[obj_type]
  31. formatted = [reformat_json(j) for j in json_content]
  32. with open(kiva_root + json_name, 'w+') as output:
  33. output.write('\n'.join(formatted))
  34. except JSONDecodeError:
  35. print("Error decoding file {}".format(json_name))
  36. def merge_kiva():
  37. for folder in kiva_folders:
  38. files = os.listdir(kiva_root + folder)
  39. out_handle = open(kiva_root + folder + '.json', 'w+')
  40. for f in files:
  41. in_handle = open(os.path.join(kiva_root, folder, f), 'r')
  42. for line in in_handle:
  43. out_handle.write(line)
  44. if __name__ == '__main__':
  45. mkdirs()
  46. unpack_kiva()
  47. merge_kiva()