mirror of
https://github.com/bspeice/kiva-dig
synced 2024-12-04 20:58:09 -05:00
Add a basic SQL example
This commit is contained in:
parent
45af64e68c
commit
f32e0b37f1
@ -1,157 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"sparkSql = (SparkSession.builder\n",
|
|
||||||
" .master(\"local\")\n",
|
|
||||||
" .appName(\"Kiva Exploration\")\n",
|
|
||||||
" .getOrCreate())\n",
|
|
||||||
"\n",
|
|
||||||
"loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
|
|
||||||
"lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
|
|
||||||
"loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('activity', 'string'),\n",
|
|
||||||
" ('arrears_amount', 'string'),\n",
|
|
||||||
" ('basket_amount', 'bigint'),\n",
|
|
||||||
" ('bonus_credit_eligibility', 'boolean'),\n",
|
|
||||||
" ('borrowers',\n",
|
|
||||||
" 'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
|
|
||||||
" ('currency_exchange_loss_amount', 'double'),\n",
|
|
||||||
" ('delinquent', 'string'),\n",
|
|
||||||
" ('description',\n",
|
|
||||||
" 'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
|
|
||||||
" ('funded_amount', 'bigint'),\n",
|
|
||||||
" ('funded_date', 'string'),\n",
|
|
||||||
" ('id', 'bigint'),\n",
|
|
||||||
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
|
|
||||||
" ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
|
|
||||||
" ('lender_count', 'bigint'),\n",
|
|
||||||
" ('loan_amount', 'bigint'),\n",
|
|
||||||
" ('location',\n",
|
|
||||||
" 'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
|
|
||||||
" ('name', 'string'),\n",
|
|
||||||
" ('paid_amount', 'string'),\n",
|
|
||||||
" ('paid_date', 'string'),\n",
|
|
||||||
" ('partner_id', 'bigint'),\n",
|
|
||||||
" ('payments', 'array<string>'),\n",
|
|
||||||
" ('planned_expiration_date', 'string'),\n",
|
|
||||||
" ('posted_date', 'string'),\n",
|
|
||||||
" ('sector', 'string'),\n",
|
|
||||||
" ('status', 'string'),\n",
|
|
||||||
" ('tags', 'array<struct<name:string>>'),\n",
|
|
||||||
" ('terms',\n",
|
|
||||||
" 'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
|
|
||||||
" ('themes', 'array<string>'),\n",
|
|
||||||
" ('translator', 'struct<byline:string,image:bigint>'),\n",
|
|
||||||
" ('use', 'string'),\n",
|
|
||||||
" ('video',\n",
|
|
||||||
" 'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loans.dtypes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('country_code', 'string'),\n",
|
|
||||||
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
|
|
||||||
" ('invitee_count', 'bigint'),\n",
|
|
||||||
" ('inviter_id', 'string'),\n",
|
|
||||||
" ('lender_id', 'string'),\n",
|
|
||||||
" ('loan_because', 'string'),\n",
|
|
||||||
" ('loan_count', 'bigint'),\n",
|
|
||||||
" ('member_since', 'string'),\n",
|
|
||||||
" ('name', 'string'),\n",
|
|
||||||
" ('occupation', 'string'),\n",
|
|
||||||
" ('occupational_info', 'string'),\n",
|
|
||||||
" ('personal_url', 'string'),\n",
|
|
||||||
" ('uid', 'string'),\n",
|
|
||||||
" ('whereabouts', 'string')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"lenders.dtypes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('id', 'bigint'), ('lender_ids', 'array<string>')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loans_lenders.dtypes"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 2",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python2"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 2
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython2",
|
|
||||||
"version": "2.7.12"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 1
|
|
||||||
}
|
|
@ -1,157 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"sparkSql = (SparkSession.builder\n",
|
|
||||||
" .master(\"local\")\n",
|
|
||||||
" .appName(\"Kiva Exploration\")\n",
|
|
||||||
" .getOrCreate())\n",
|
|
||||||
"\n",
|
|
||||||
"loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
|
|
||||||
"lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
|
|
||||||
"loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('activity', 'string'),\n",
|
|
||||||
" ('arrears_amount', 'string'),\n",
|
|
||||||
" ('basket_amount', 'bigint'),\n",
|
|
||||||
" ('bonus_credit_eligibility', 'boolean'),\n",
|
|
||||||
" ('borrowers',\n",
|
|
||||||
" 'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
|
|
||||||
" ('currency_exchange_loss_amount', 'double'),\n",
|
|
||||||
" ('delinquent', 'string'),\n",
|
|
||||||
" ('description',\n",
|
|
||||||
" 'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
|
|
||||||
" ('funded_amount', 'bigint'),\n",
|
|
||||||
" ('funded_date', 'string'),\n",
|
|
||||||
" ('id', 'bigint'),\n",
|
|
||||||
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
|
|
||||||
" ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
|
|
||||||
" ('lender_count', 'bigint'),\n",
|
|
||||||
" ('loan_amount', 'bigint'),\n",
|
|
||||||
" ('location',\n",
|
|
||||||
" 'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
|
|
||||||
" ('name', 'string'),\n",
|
|
||||||
" ('paid_amount', 'string'),\n",
|
|
||||||
" ('paid_date', 'string'),\n",
|
|
||||||
" ('partner_id', 'bigint'),\n",
|
|
||||||
" ('payments', 'array<string>'),\n",
|
|
||||||
" ('planned_expiration_date', 'string'),\n",
|
|
||||||
" ('posted_date', 'string'),\n",
|
|
||||||
" ('sector', 'string'),\n",
|
|
||||||
" ('status', 'string'),\n",
|
|
||||||
" ('tags', 'array<struct<name:string>>'),\n",
|
|
||||||
" ('terms',\n",
|
|
||||||
" 'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
|
|
||||||
" ('themes', 'array<string>'),\n",
|
|
||||||
" ('translator', 'struct<byline:string,image:bigint>'),\n",
|
|
||||||
" ('use', 'string'),\n",
|
|
||||||
" ('video',\n",
|
|
||||||
" 'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loans.dtypes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('country_code', 'string'),\n",
|
|
||||||
" ('image', 'struct<id:bigint,template_id:bigint>'),\n",
|
|
||||||
" ('invitee_count', 'bigint'),\n",
|
|
||||||
" ('inviter_id', 'string'),\n",
|
|
||||||
" ('lender_id', 'string'),\n",
|
|
||||||
" ('loan_because', 'string'),\n",
|
|
||||||
" ('loan_count', 'bigint'),\n",
|
|
||||||
" ('member_since', 'string'),\n",
|
|
||||||
" ('name', 'string'),\n",
|
|
||||||
" ('occupation', 'string'),\n",
|
|
||||||
" ('occupational_info', 'string'),\n",
|
|
||||||
" ('personal_url', 'string'),\n",
|
|
||||||
" ('uid', 'string'),\n",
|
|
||||||
" ('whereabouts', 'string')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"lenders.dtypes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('id', 'bigint'), ('lender_ids', 'array<string>')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loans_lenders.dtypes"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 2",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python2"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 2
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython2",
|
|
||||||
"version": "2.7.12"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 1
|
|
||||||
}
|
|
274
Kiva Exploration.ipynb
Normal file
274
Kiva Exploration.ipynb
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Understanding the Kiva Dataset\n",
|
||||||
|
"\n",
|
||||||
|
"Before we actually get into the work of predicting anything based on the data Kiva makes public, we first want to get a better picture of what the dataset actually looks like.\n",
|
||||||
|
"\n",
|
||||||
|
"Our first step: What is the schema of the data? Spark SQL will make it easy to query data in the future, but we need to know first what is available."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sparkSql = (SparkSession.builder\n",
|
||||||
|
" .master(\"local\")\n",
|
||||||
|
" .appName(\"Kiva Exploration\")\n",
|
||||||
|
" .getOrCreate())\n",
|
||||||
|
"\n",
|
||||||
|
"loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
|
||||||
|
"lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
|
||||||
|
"loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"root\n",
|
||||||
|
" |-- activity: string (nullable = true)\n",
|
||||||
|
" |-- basket_amount: long (nullable = true)\n",
|
||||||
|
" |-- bonus_credit_eligibility: boolean (nullable = true)\n",
|
||||||
|
" |-- borrowers: array (nullable = true)\n",
|
||||||
|
" | |-- element: struct (containsNull = true)\n",
|
||||||
|
" | | |-- first_name: string (nullable = true)\n",
|
||||||
|
" | | |-- gender: string (nullable = true)\n",
|
||||||
|
" | | |-- last_name: string (nullable = true)\n",
|
||||||
|
" | | |-- pictured: boolean (nullable = true)\n",
|
||||||
|
" |-- currency_exchange_loss_amount: double (nullable = true)\n",
|
||||||
|
" |-- delinquent: boolean (nullable = true)\n",
|
||||||
|
" |-- description: struct (nullable = true)\n",
|
||||||
|
" | |-- languages: array (nullable = true)\n",
|
||||||
|
" | | |-- element: string (containsNull = true)\n",
|
||||||
|
" | |-- texts: struct (nullable = true)\n",
|
||||||
|
" | | |-- ar: string (nullable = true)\n",
|
||||||
|
" | | |-- en: string (nullable = true)\n",
|
||||||
|
" | | |-- es: string (nullable = true)\n",
|
||||||
|
" | | |-- fr: string (nullable = true)\n",
|
||||||
|
" | | |-- id: string (nullable = true)\n",
|
||||||
|
" | | |-- mn: string (nullable = true)\n",
|
||||||
|
" | | |-- pt: string (nullable = true)\n",
|
||||||
|
" | | |-- ru: string (nullable = true)\n",
|
||||||
|
" | | |-- vi: string (nullable = true)\n",
|
||||||
|
" |-- funded_amount: long (nullable = true)\n",
|
||||||
|
" |-- funded_date: string (nullable = true)\n",
|
||||||
|
" |-- id: long (nullable = true)\n",
|
||||||
|
" |-- image: struct (nullable = true)\n",
|
||||||
|
" | |-- id: long (nullable = true)\n",
|
||||||
|
" | |-- template_id: long (nullable = true)\n",
|
||||||
|
" |-- journal_totals: struct (nullable = true)\n",
|
||||||
|
" | |-- bulkEntries: long (nullable = true)\n",
|
||||||
|
" | |-- entries: long (nullable = true)\n",
|
||||||
|
" |-- lender_count: long (nullable = true)\n",
|
||||||
|
" |-- loan_amount: long (nullable = true)\n",
|
||||||
|
" |-- location: struct (nullable = true)\n",
|
||||||
|
" | |-- country: string (nullable = true)\n",
|
||||||
|
" | |-- country_code: string (nullable = true)\n",
|
||||||
|
" | |-- geo: struct (nullable = true)\n",
|
||||||
|
" | | |-- level: string (nullable = true)\n",
|
||||||
|
" | | |-- pairs: string (nullable = true)\n",
|
||||||
|
" | | |-- type: string (nullable = true)\n",
|
||||||
|
" | |-- town: string (nullable = true)\n",
|
||||||
|
" |-- name: string (nullable = true)\n",
|
||||||
|
" |-- paid_amount: double (nullable = true)\n",
|
||||||
|
" |-- paid_date: string (nullable = true)\n",
|
||||||
|
" |-- partner_id: long (nullable = true)\n",
|
||||||
|
" |-- payments: array (nullable = true)\n",
|
||||||
|
" | |-- element: struct (containsNull = true)\n",
|
||||||
|
" | | |-- amount: double (nullable = true)\n",
|
||||||
|
" | | |-- currency_exchange_loss_amount: double (nullable = true)\n",
|
||||||
|
" | | |-- local_amount: double (nullable = true)\n",
|
||||||
|
" | | |-- payment_id: long (nullable = true)\n",
|
||||||
|
" | | |-- processed_date: string (nullable = true)\n",
|
||||||
|
" | | |-- rounded_local_amount: double (nullable = true)\n",
|
||||||
|
" | | |-- settlement_date: string (nullable = true)\n",
|
||||||
|
" |-- planned_expiration_date: string (nullable = true)\n",
|
||||||
|
" |-- posted_date: string (nullable = true)\n",
|
||||||
|
" |-- sector: string (nullable = true)\n",
|
||||||
|
" |-- status: string (nullable = true)\n",
|
||||||
|
" |-- tags: array (nullable = true)\n",
|
||||||
|
" | |-- element: struct (containsNull = true)\n",
|
||||||
|
" | | |-- name: string (nullable = true)\n",
|
||||||
|
" |-- terms: struct (nullable = true)\n",
|
||||||
|
" | |-- disbursal_amount: double (nullable = true)\n",
|
||||||
|
" | |-- disbursal_currency: string (nullable = true)\n",
|
||||||
|
" | |-- disbursal_date: string (nullable = true)\n",
|
||||||
|
" | |-- loan_amount: long (nullable = true)\n",
|
||||||
|
" | |-- local_payments: array (nullable = true)\n",
|
||||||
|
" | | |-- element: struct (containsNull = true)\n",
|
||||||
|
" | | | |-- amount: double (nullable = true)\n",
|
||||||
|
" | | | |-- due_date: string (nullable = true)\n",
|
||||||
|
" | |-- loss_liability: struct (nullable = true)\n",
|
||||||
|
" | | |-- currency_exchange: string (nullable = true)\n",
|
||||||
|
" | | |-- currency_exchange_coverage_rate: double (nullable = true)\n",
|
||||||
|
" | | |-- nonpayment: string (nullable = true)\n",
|
||||||
|
" | |-- repayment_interval: string (nullable = true)\n",
|
||||||
|
" | |-- repayment_term: long (nullable = true)\n",
|
||||||
|
" | |-- scheduled_payments: array (nullable = true)\n",
|
||||||
|
" | | |-- element: struct (containsNull = true)\n",
|
||||||
|
" | | | |-- amount: double (nullable = true)\n",
|
||||||
|
" | | | |-- due_date: string (nullable = true)\n",
|
||||||
|
" |-- themes: array (nullable = true)\n",
|
||||||
|
" | |-- element: string (containsNull = true)\n",
|
||||||
|
" |-- translator: struct (nullable = true)\n",
|
||||||
|
" | |-- byline: string (nullable = true)\n",
|
||||||
|
" | |-- image: long (nullable = true)\n",
|
||||||
|
" |-- use: string (nullable = true)\n",
|
||||||
|
" |-- video: struct (nullable = true)\n",
|
||||||
|
" | |-- id: long (nullable = true)\n",
|
||||||
|
" | |-- thumbnailImageId: long (nullable = true)\n",
|
||||||
|
" | |-- title: string (nullable = true)\n",
|
||||||
|
" | |-- youtubeId: string (nullable = true)\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loans.printSchema()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Row(status=u'refunded', count=5504),\n",
|
||||||
|
" Row(status=u'defaulted', count=21776),\n",
|
||||||
|
" Row(status=u'in_repayment', count=155749),\n",
|
||||||
|
" Row(status=u'reviewed', count=3),\n",
|
||||||
|
" Row(status=u'deleted', count=2721),\n",
|
||||||
|
" Row(status=u'paid', count=775330),\n",
|
||||||
|
" Row(status=u'issue', count=199),\n",
|
||||||
|
" Row(status=u'inactive_expired', count=12421),\n",
|
||||||
|
" Row(status=u'fundraising', count=3986),\n",
|
||||||
|
" Row(status=u'expired', count=33773),\n",
|
||||||
|
" Row(status=u'inactive', count=2493),\n",
|
||||||
|
" Row(status=u'funded', count=173),\n",
|
||||||
|
" Row(status=u'', count=2)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loans.groupby(loans.status).count().collect()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Row(delinquent=None, count=970465), Row(delinquent=True, count=43665)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loans.groupby(loans.delinquent).count().collect()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Row(status=u'refunded', count=156),\n",
|
||||||
|
" Row(status=u'defaulted', count=20116),\n",
|
||||||
|
" Row(status=u'in_repayment', count=23393)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loans.where(loans.delinquent == True).groupby(loans.status).count().collect()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Row(status=u'in_repayment')]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loans.registerTempTable('loans')\n",
|
||||||
|
"sparkSql.sql('''\n",
|
||||||
|
"SELECT loans.status\n",
|
||||||
|
"FROM loans\n",
|
||||||
|
"LIMIT 1\n",
|
||||||
|
"''').collect()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 2",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python2"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 1
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user