Add initial code to list the JSON datatypes

pull/1/head
Bradlee Speice 2016-10-25 10:01:44 -04:00
parent cc1c73d050
commit 45af64e68c
2 changed files with 314 additions and 0 deletions

View File

@ -0,0 +1,157 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sparkSql = (SparkSession.builder\n",
" .master(\"local\")\n",
" .appName(\"Kiva Exploration\")\n",
" .getOrCreate())\n",
"\n",
"loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
"lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
"loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('activity', 'string'),\n",
" ('arrears_amount', 'string'),\n",
" ('basket_amount', 'bigint'),\n",
" ('bonus_credit_eligibility', 'boolean'),\n",
" ('borrowers',\n",
" 'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
" ('currency_exchange_loss_amount', 'double'),\n",
" ('delinquent', 'string'),\n",
" ('description',\n",
" 'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
" ('funded_amount', 'bigint'),\n",
" ('funded_date', 'string'),\n",
" ('id', 'bigint'),\n",
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
" ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
" ('lender_count', 'bigint'),\n",
" ('loan_amount', 'bigint'),\n",
" ('location',\n",
" 'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
" ('name', 'string'),\n",
" ('paid_amount', 'string'),\n",
" ('paid_date', 'string'),\n",
" ('partner_id', 'bigint'),\n",
" ('payments', 'array<string>'),\n",
" ('planned_expiration_date', 'string'),\n",
" ('posted_date', 'string'),\n",
" ('sector', 'string'),\n",
" ('status', 'string'),\n",
" ('tags', 'array<struct<name:string>>'),\n",
" ('terms',\n",
" 'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
" ('themes', 'array<string>'),\n",
" ('translator', 'struct<byline:string,image:bigint>'),\n",
" ('use', 'string'),\n",
" ('video',\n",
" 'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('country_code', 'string'),\n",
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
" ('invitee_count', 'bigint'),\n",
" ('inviter_id', 'string'),\n",
" ('lender_id', 'string'),\n",
" ('loan_because', 'string'),\n",
" ('loan_count', 'bigint'),\n",
" ('member_since', 'string'),\n",
" ('name', 'string'),\n",
" ('occupation', 'string'),\n",
" ('occupational_info', 'string'),\n",
" ('personal_url', 'string'),\n",
" ('uid', 'string'),\n",
" ('whereabouts', 'string')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lenders.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('id', 'bigint'), ('lender_ids', 'array<string>')]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans_lenders.dtypes"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

157
Kiva Datatypes.ipynb Normal file
View File

@ -0,0 +1,157 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sparkSql = (SparkSession.builder\n",
" .master(\"local\")\n",
" .appName(\"Kiva Exploration\")\n",
" .getOrCreate())\n",
"\n",
"loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
"lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
"loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('activity', 'string'),\n",
" ('arrears_amount', 'string'),\n",
" ('basket_amount', 'bigint'),\n",
" ('bonus_credit_eligibility', 'boolean'),\n",
" ('borrowers',\n",
" 'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
" ('currency_exchange_loss_amount', 'double'),\n",
" ('delinquent', 'string'),\n",
" ('description',\n",
" 'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
" ('funded_amount', 'bigint'),\n",
" ('funded_date', 'string'),\n",
" ('id', 'bigint'),\n",
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
" ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
" ('lender_count', 'bigint'),\n",
" ('loan_amount', 'bigint'),\n",
" ('location',\n",
" 'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
" ('name', 'string'),\n",
" ('paid_amount', 'string'),\n",
" ('paid_date', 'string'),\n",
" ('partner_id', 'bigint'),\n",
" ('payments', 'array<string>'),\n",
" ('planned_expiration_date', 'string'),\n",
" ('posted_date', 'string'),\n",
" ('sector', 'string'),\n",
" ('status', 'string'),\n",
" ('tags', 'array<struct<name:string>>'),\n",
" ('terms',\n",
" 'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
" ('themes', 'array<string>'),\n",
" ('translator', 'struct<byline:string,image:bigint>'),\n",
" ('use', 'string'),\n",
" ('video',\n",
" 'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('country_code', 'string'),\n",
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
" ('invitee_count', 'bigint'),\n",
" ('inviter_id', 'string'),\n",
" ('lender_id', 'string'),\n",
" ('loan_because', 'string'),\n",
" ('loan_count', 'bigint'),\n",
" ('member_since', 'string'),\n",
" ('name', 'string'),\n",
" ('occupation', 'string'),\n",
" ('occupational_info', 'string'),\n",
" ('personal_url', 'string'),\n",
" ('uid', 'string'),\n",
" ('whereabouts', 'string')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lenders.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('id', 'bigint'), ('lender_ids', 'array<string>')]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans_lenders.dtypes"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}