diff --git a/.ipynb_checkpoints/Kiva Datatypes-checkpoint.ipynb b/.ipynb_checkpoints/Kiva Datatypes-checkpoint.ipynb new file mode 100644 index 0000000..b05499c --- /dev/null +++ b/.ipynb_checkpoints/Kiva Datatypes-checkpoint.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sparkSql = (SparkSession.builder\n", + " .master(\"local\")\n", + " .appName(\"Kiva Exploration\")\n", + " .getOrCreate())\n", + "\n", + "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n", + "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n", + "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('activity', 'string'),\n", + " ('arrears_amount', 'string'),\n", + " ('basket_amount', 'bigint'),\n", + " ('bonus_credit_eligibility', 'boolean'),\n", + " ('borrowers',\n", + " 'array>'),\n", + " ('currency_exchange_loss_amount', 'double'),\n", + " ('delinquent', 'string'),\n", + " ('description',\n", + " 'struct,texts:struct>'),\n", + " ('funded_amount', 'bigint'),\n", + " ('funded_date', 'string'),\n", + " ('id', 'bigint'),\n", + " ('image', 'struct'),\n", + " ('journal_totals', 'struct'),\n", + " ('lender_count', 'bigint'),\n", + " ('loan_amount', 'bigint'),\n", + " ('location',\n", + " 'struct,town:string>'),\n", + " ('name', 'string'),\n", + " ('paid_amount', 'string'),\n", + " ('paid_date', 'string'),\n", + " ('partner_id', 'bigint'),\n", + " ('payments', 'array'),\n", + " ('planned_expiration_date', 'string'),\n", + " ('posted_date', 'string'),\n", + " ('sector', 'string'),\n", + " ('status', 'string'),\n", + " ('tags', 'array>'),\n", + " ('terms',\n", + " 'struct>,loss_liability:struct,repayment_interval:string,repayment_term:bigint,scheduled_payments:array>>'),\n", + " ('themes', 'array'),\n", + " ('translator', 'struct'),\n", + " ('use', 'string'),\n", + " ('video',\n", + " 'struct')]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loans.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('country_code', 'string'),\n", + " ('image', 'struct'),\n", + " ('invitee_count', 'bigint'),\n", + " ('inviter_id', 'string'),\n", + " ('lender_id', 'string'),\n", + " ('loan_because', 'string'),\n", + " ('loan_count', 'bigint'),\n", + " ('member_since', 'string'),\n", + " ('name', 'string'),\n", + " ('occupation', 'string'),\n", + " ('occupational_info', 'string'),\n", + " ('personal_url', 'string'),\n", + " ('uid', 'string'),\n", + " ('whereabouts', 'string')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lenders.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('id', 'bigint'), ('lender_ids', 'array')]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loans_lenders.dtypes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Kiva Datatypes.ipynb b/Kiva Datatypes.ipynb new file mode 100644 index 0000000..b05499c --- /dev/null +++ b/Kiva Datatypes.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sparkSql = (SparkSession.builder\n", + " .master(\"local\")\n", + " .appName(\"Kiva Exploration\")\n", + " .getOrCreate())\n", + "\n", + "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n", + "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n", + "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('activity', 'string'),\n", + " ('arrears_amount', 'string'),\n", + " ('basket_amount', 'bigint'),\n", + " ('bonus_credit_eligibility', 'boolean'),\n", + " ('borrowers',\n", + " 'array>'),\n", + " ('currency_exchange_loss_amount', 'double'),\n", + " ('delinquent', 'string'),\n", + " ('description',\n", + " 'struct,texts:struct>'),\n", + " ('funded_amount', 'bigint'),\n", + " ('funded_date', 'string'),\n", + " ('id', 'bigint'),\n", + " ('image', 'struct'),\n", + " ('journal_totals', 'struct'),\n", + " ('lender_count', 'bigint'),\n", + " ('loan_amount', 'bigint'),\n", + " ('location',\n", + " 'struct,town:string>'),\n", + " ('name', 'string'),\n", + " ('paid_amount', 'string'),\n", + " ('paid_date', 'string'),\n", + " ('partner_id', 'bigint'),\n", + " ('payments', 'array'),\n", + " ('planned_expiration_date', 'string'),\n", + " ('posted_date', 'string'),\n", + " ('sector', 'string'),\n", + " ('status', 'string'),\n", + " ('tags', 'array>'),\n", + " ('terms',\n", + " 'struct>,loss_liability:struct,repayment_interval:string,repayment_term:bigint,scheduled_payments:array>>'),\n", + " ('themes', 'array'),\n", + " ('translator', 'struct'),\n", + " ('use', 'string'),\n", + " ('video',\n", + " 'struct')]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loans.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('country_code', 'string'),\n", + " ('image', 'struct'),\n", + " ('invitee_count', 'bigint'),\n", + " ('inviter_id', 'string'),\n", + " ('lender_id', 'string'),\n", + " ('loan_because', 'string'),\n", + " ('loan_count', 'bigint'),\n", + " ('member_since', 'string'),\n", + " ('name', 'string'),\n", + " ('occupation', 'string'),\n", + " ('occupational_info', 'string'),\n", + " ('personal_url', 'string'),\n", + " ('uid', 'string'),\n", + " ('whereabouts', 'string')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lenders.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('id', 'bigint'), ('lender_ids', 'array')]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loans_lenders.dtypes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}