mirror of
				https://github.com/bspeice/kiva-dig
				synced 2025-11-04 02:10:25 -05:00 
			
		
		
		
	Add a basic SQL example
This commit is contained in:
		@ -1,157 +0,0 @@
 | 
			
		||||
{
 | 
			
		||||
 "cells": [
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 1,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": true
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "sparkSql = (SparkSession.builder\n",
 | 
			
		||||
    "         .master(\"local\")\n",
 | 
			
		||||
    "         .appName(\"Kiva Exploration\")\n",
 | 
			
		||||
    "         .getOrCreate())\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
 | 
			
		||||
    "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
 | 
			
		||||
    "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 2,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('activity', 'string'),\n",
 | 
			
		||||
       " ('arrears_amount', 'string'),\n",
 | 
			
		||||
       " ('basket_amount', 'bigint'),\n",
 | 
			
		||||
       " ('bonus_credit_eligibility', 'boolean'),\n",
 | 
			
		||||
       " ('borrowers',\n",
 | 
			
		||||
       "  'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
 | 
			
		||||
       " ('currency_exchange_loss_amount', 'double'),\n",
 | 
			
		||||
       " ('delinquent', 'string'),\n",
 | 
			
		||||
       " ('description',\n",
 | 
			
		||||
       "  'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
 | 
			
		||||
       " ('funded_amount', 'bigint'),\n",
 | 
			
		||||
       " ('funded_date', 'string'),\n",
 | 
			
		||||
       " ('id', 'bigint'),\n",
 | 
			
		||||
       " ('image', 'struct<id:bigint,template_id:bigint>'),\n",
 | 
			
		||||
       " ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
 | 
			
		||||
       " ('lender_count', 'bigint'),\n",
 | 
			
		||||
       " ('loan_amount', 'bigint'),\n",
 | 
			
		||||
       " ('location',\n",
 | 
			
		||||
       "  'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
 | 
			
		||||
       " ('name', 'string'),\n",
 | 
			
		||||
       " ('paid_amount', 'string'),\n",
 | 
			
		||||
       " ('paid_date', 'string'),\n",
 | 
			
		||||
       " ('partner_id', 'bigint'),\n",
 | 
			
		||||
       " ('payments', 'array<string>'),\n",
 | 
			
		||||
       " ('planned_expiration_date', 'string'),\n",
 | 
			
		||||
       " ('posted_date', 'string'),\n",
 | 
			
		||||
       " ('sector', 'string'),\n",
 | 
			
		||||
       " ('status', 'string'),\n",
 | 
			
		||||
       " ('tags', 'array<struct<name:string>>'),\n",
 | 
			
		||||
       " ('terms',\n",
 | 
			
		||||
       "  'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
 | 
			
		||||
       " ('themes', 'array<string>'),\n",
 | 
			
		||||
       " ('translator', 'struct<byline:string,image:bigint>'),\n",
 | 
			
		||||
       " ('use', 'string'),\n",
 | 
			
		||||
       " ('video',\n",
 | 
			
		||||
       "  'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 2,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 3,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('country_code', 'string'),\n",
 | 
			
		||||
       " ('image', 'struct<id:bigint,template_id:bigint>'),\n",
 | 
			
		||||
       " ('invitee_count', 'bigint'),\n",
 | 
			
		||||
       " ('inviter_id', 'string'),\n",
 | 
			
		||||
       " ('lender_id', 'string'),\n",
 | 
			
		||||
       " ('loan_because', 'string'),\n",
 | 
			
		||||
       " ('loan_count', 'bigint'),\n",
 | 
			
		||||
       " ('member_since', 'string'),\n",
 | 
			
		||||
       " ('name', 'string'),\n",
 | 
			
		||||
       " ('occupation', 'string'),\n",
 | 
			
		||||
       " ('occupational_info', 'string'),\n",
 | 
			
		||||
       " ('personal_url', 'string'),\n",
 | 
			
		||||
       " ('uid', 'string'),\n",
 | 
			
		||||
       " ('whereabouts', 'string')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 3,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "lenders.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 4,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('id', 'bigint'), ('lender_ids', 'array<string>')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 4,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans_lenders.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  }
 | 
			
		||||
 ],
 | 
			
		||||
 "metadata": {
 | 
			
		||||
  "kernelspec": {
 | 
			
		||||
   "display_name": "Python 2",
 | 
			
		||||
   "language": "python",
 | 
			
		||||
   "name": "python2"
 | 
			
		||||
  },
 | 
			
		||||
  "language_info": {
 | 
			
		||||
   "codemirror_mode": {
 | 
			
		||||
    "name": "ipython",
 | 
			
		||||
    "version": 2
 | 
			
		||||
   },
 | 
			
		||||
   "file_extension": ".py",
 | 
			
		||||
   "mimetype": "text/x-python",
 | 
			
		||||
   "name": "python",
 | 
			
		||||
   "nbconvert_exporter": "python",
 | 
			
		||||
   "pygments_lexer": "ipython2",
 | 
			
		||||
   "version": "2.7.12"
 | 
			
		||||
  }
 | 
			
		||||
 },
 | 
			
		||||
 "nbformat": 4,
 | 
			
		||||
 "nbformat_minor": 1
 | 
			
		||||
}
 | 
			
		||||
@ -1,157 +0,0 @@
 | 
			
		||||
{
 | 
			
		||||
 "cells": [
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 1,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": true
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "sparkSql = (SparkSession.builder\n",
 | 
			
		||||
    "         .master(\"local\")\n",
 | 
			
		||||
    "         .appName(\"Kiva Exploration\")\n",
 | 
			
		||||
    "         .getOrCreate())\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
 | 
			
		||||
    "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
 | 
			
		||||
    "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 2,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('activity', 'string'),\n",
 | 
			
		||||
       " ('arrears_amount', 'string'),\n",
 | 
			
		||||
       " ('basket_amount', 'bigint'),\n",
 | 
			
		||||
       " ('bonus_credit_eligibility', 'boolean'),\n",
 | 
			
		||||
       " ('borrowers',\n",
 | 
			
		||||
       "  'array<struct<first_name:string,gender:string,last_name:string,pictured:boolean>>'),\n",
 | 
			
		||||
       " ('currency_exchange_loss_amount', 'double'),\n",
 | 
			
		||||
       " ('delinquent', 'string'),\n",
 | 
			
		||||
       " ('description',\n",
 | 
			
		||||
       "  'struct<languages:array<string>,texts:struct<ar:string,en:string,es:string,fr:string,id:string,mn:string,pt:string,ru:string,vi:string>>'),\n",
 | 
			
		||||
       " ('funded_amount', 'bigint'),\n",
 | 
			
		||||
       " ('funded_date', 'string'),\n",
 | 
			
		||||
       " ('id', 'bigint'),\n",
 | 
			
		||||
       " ('image', 'struct<id:bigint,template_id:bigint>'),\n",
 | 
			
		||||
       " ('journal_totals', 'struct<bulkEntries:bigint,entries:bigint>'),\n",
 | 
			
		||||
       " ('lender_count', 'bigint'),\n",
 | 
			
		||||
       " ('loan_amount', 'bigint'),\n",
 | 
			
		||||
       " ('location',\n",
 | 
			
		||||
       "  'struct<country:string,country_code:string,geo:struct<level:string,pairs:string,type:string>,town:string>'),\n",
 | 
			
		||||
       " ('name', 'string'),\n",
 | 
			
		||||
       " ('paid_amount', 'string'),\n",
 | 
			
		||||
       " ('paid_date', 'string'),\n",
 | 
			
		||||
       " ('partner_id', 'bigint'),\n",
 | 
			
		||||
       " ('payments', 'array<string>'),\n",
 | 
			
		||||
       " ('planned_expiration_date', 'string'),\n",
 | 
			
		||||
       " ('posted_date', 'string'),\n",
 | 
			
		||||
       " ('sector', 'string'),\n",
 | 
			
		||||
       " ('status', 'string'),\n",
 | 
			
		||||
       " ('tags', 'array<struct<name:string>>'),\n",
 | 
			
		||||
       " ('terms',\n",
 | 
			
		||||
       "  'struct<disbursal_amount:double,disbursal_currency:string,disbursal_date:string,loan_amount:bigint,local_payments:array<struct<amount:double,due_date:string>>,loss_liability:struct<currency_exchange:string,currency_exchange_coverage_rate:double,nonpayment:string>,repayment_interval:string,repayment_term:bigint,scheduled_payments:array<struct<amount:double,due_date:string>>>'),\n",
 | 
			
		||||
       " ('themes', 'array<string>'),\n",
 | 
			
		||||
       " ('translator', 'struct<byline:string,image:bigint>'),\n",
 | 
			
		||||
       " ('use', 'string'),\n",
 | 
			
		||||
       " ('video',\n",
 | 
			
		||||
       "  'struct<id:bigint,thumbnailImageId:bigint,title:string,youtubeId:string>')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 2,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 3,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('country_code', 'string'),\n",
 | 
			
		||||
       " ('image', 'struct<id:bigint,template_id:bigint>'),\n",
 | 
			
		||||
       " ('invitee_count', 'bigint'),\n",
 | 
			
		||||
       " ('inviter_id', 'string'),\n",
 | 
			
		||||
       " ('lender_id', 'string'),\n",
 | 
			
		||||
       " ('loan_because', 'string'),\n",
 | 
			
		||||
       " ('loan_count', 'bigint'),\n",
 | 
			
		||||
       " ('member_since', 'string'),\n",
 | 
			
		||||
       " ('name', 'string'),\n",
 | 
			
		||||
       " ('occupation', 'string'),\n",
 | 
			
		||||
       " ('occupational_info', 'string'),\n",
 | 
			
		||||
       " ('personal_url', 'string'),\n",
 | 
			
		||||
       " ('uid', 'string'),\n",
 | 
			
		||||
       " ('whereabouts', 'string')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 3,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "lenders.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 4,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[('id', 'bigint'), ('lender_ids', 'array<string>')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 4,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans_lenders.dtypes"
 | 
			
		||||
   ]
 | 
			
		||||
  }
 | 
			
		||||
 ],
 | 
			
		||||
 "metadata": {
 | 
			
		||||
  "kernelspec": {
 | 
			
		||||
   "display_name": "Python 2",
 | 
			
		||||
   "language": "python",
 | 
			
		||||
   "name": "python2"
 | 
			
		||||
  },
 | 
			
		||||
  "language_info": {
 | 
			
		||||
   "codemirror_mode": {
 | 
			
		||||
    "name": "ipython",
 | 
			
		||||
    "version": 2
 | 
			
		||||
   },
 | 
			
		||||
   "file_extension": ".py",
 | 
			
		||||
   "mimetype": "text/x-python",
 | 
			
		||||
   "name": "python",
 | 
			
		||||
   "nbconvert_exporter": "python",
 | 
			
		||||
   "pygments_lexer": "ipython2",
 | 
			
		||||
   "version": "2.7.12"
 | 
			
		||||
  }
 | 
			
		||||
 },
 | 
			
		||||
 "nbformat": 4,
 | 
			
		||||
 "nbformat_minor": 1
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										274
									
								
								Kiva Exploration.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										274
									
								
								Kiva Exploration.ipynb
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,274 @@
 | 
			
		||||
{
 | 
			
		||||
 "cells": [
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "markdown",
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "source": [
 | 
			
		||||
    "# Understanding the Kiva Dataset\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "Before we actually get into the work of predicting anything based on the data Kiva makes public, we first want to get a better picture of what the dataset actually looks like.\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "Our first step: What is the schema of the data? Spark SQL will make it easy to query data in the future, but we need to know first what is available."
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 1,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": true
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "sparkSql = (SparkSession.builder\n",
 | 
			
		||||
    "         .master(\"local\")\n",
 | 
			
		||||
    "         .appName(\"Kiva Exploration\")\n",
 | 
			
		||||
    "         .getOrCreate())\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
 | 
			
		||||
    "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
 | 
			
		||||
    "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 2,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "name": "stdout",
 | 
			
		||||
     "output_type": "stream",
 | 
			
		||||
     "text": [
 | 
			
		||||
      "root\n",
 | 
			
		||||
      " |-- activity: string (nullable = true)\n",
 | 
			
		||||
      " |-- basket_amount: long (nullable = true)\n",
 | 
			
		||||
      " |-- bonus_credit_eligibility: boolean (nullable = true)\n",
 | 
			
		||||
      " |-- borrowers: array (nullable = true)\n",
 | 
			
		||||
      " |    |-- element: struct (containsNull = true)\n",
 | 
			
		||||
      " |    |    |-- first_name: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- gender: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- last_name: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- pictured: boolean (nullable = true)\n",
 | 
			
		||||
      " |-- currency_exchange_loss_amount: double (nullable = true)\n",
 | 
			
		||||
      " |-- delinquent: boolean (nullable = true)\n",
 | 
			
		||||
      " |-- description: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- languages: array (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- element: string (containsNull = true)\n",
 | 
			
		||||
      " |    |-- texts: struct (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- ar: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- en: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- es: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- fr: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- id: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- mn: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- pt: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- ru: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- vi: string (nullable = true)\n",
 | 
			
		||||
      " |-- funded_amount: long (nullable = true)\n",
 | 
			
		||||
      " |-- funded_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- id: long (nullable = true)\n",
 | 
			
		||||
      " |-- image: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- id: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- template_id: long (nullable = true)\n",
 | 
			
		||||
      " |-- journal_totals: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- bulkEntries: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- entries: long (nullable = true)\n",
 | 
			
		||||
      " |-- lender_count: long (nullable = true)\n",
 | 
			
		||||
      " |-- loan_amount: long (nullable = true)\n",
 | 
			
		||||
      " |-- location: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- country: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- country_code: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- geo: struct (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- level: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- pairs: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- type: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- town: string (nullable = true)\n",
 | 
			
		||||
      " |-- name: string (nullable = true)\n",
 | 
			
		||||
      " |-- paid_amount: double (nullable = true)\n",
 | 
			
		||||
      " |-- paid_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- partner_id: long (nullable = true)\n",
 | 
			
		||||
      " |-- payments: array (nullable = true)\n",
 | 
			
		||||
      " |    |-- element: struct (containsNull = true)\n",
 | 
			
		||||
      " |    |    |-- amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- currency_exchange_loss_amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- local_amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- payment_id: long (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- processed_date: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- rounded_local_amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- settlement_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- planned_expiration_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- posted_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- sector: string (nullable = true)\n",
 | 
			
		||||
      " |-- status: string (nullable = true)\n",
 | 
			
		||||
      " |-- tags: array (nullable = true)\n",
 | 
			
		||||
      " |    |-- element: struct (containsNull = true)\n",
 | 
			
		||||
      " |    |    |-- name: string (nullable = true)\n",
 | 
			
		||||
      " |-- terms: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- disbursal_amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |-- disbursal_currency: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- disbursal_date: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- loan_amount: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- local_payments: array (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- element: struct (containsNull = true)\n",
 | 
			
		||||
      " |    |    |    |-- amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |    |-- due_date: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- loss_liability: struct (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- currency_exchange: string (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- currency_exchange_coverage_rate: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- nonpayment: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- repayment_interval: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- repayment_term: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- scheduled_payments: array (nullable = true)\n",
 | 
			
		||||
      " |    |    |-- element: struct (containsNull = true)\n",
 | 
			
		||||
      " |    |    |    |-- amount: double (nullable = true)\n",
 | 
			
		||||
      " |    |    |    |-- due_date: string (nullable = true)\n",
 | 
			
		||||
      " |-- themes: array (nullable = true)\n",
 | 
			
		||||
      " |    |-- element: string (containsNull = true)\n",
 | 
			
		||||
      " |-- translator: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- byline: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- image: long (nullable = true)\n",
 | 
			
		||||
      " |-- use: string (nullable = true)\n",
 | 
			
		||||
      " |-- video: struct (nullable = true)\n",
 | 
			
		||||
      " |    |-- id: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- thumbnailImageId: long (nullable = true)\n",
 | 
			
		||||
      " |    |-- title: string (nullable = true)\n",
 | 
			
		||||
      " |    |-- youtubeId: string (nullable = true)\n",
 | 
			
		||||
      "\n"
 | 
			
		||||
     ]
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.printSchema()"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 3,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[Row(status=u'refunded', count=5504),\n",
 | 
			
		||||
       " Row(status=u'defaulted', count=21776),\n",
 | 
			
		||||
       " Row(status=u'in_repayment', count=155749),\n",
 | 
			
		||||
       " Row(status=u'reviewed', count=3),\n",
 | 
			
		||||
       " Row(status=u'deleted', count=2721),\n",
 | 
			
		||||
       " Row(status=u'paid', count=775330),\n",
 | 
			
		||||
       " Row(status=u'issue', count=199),\n",
 | 
			
		||||
       " Row(status=u'inactive_expired', count=12421),\n",
 | 
			
		||||
       " Row(status=u'fundraising', count=3986),\n",
 | 
			
		||||
       " Row(status=u'expired', count=33773),\n",
 | 
			
		||||
       " Row(status=u'inactive', count=2493),\n",
 | 
			
		||||
       " Row(status=u'funded', count=173),\n",
 | 
			
		||||
       " Row(status=u'', count=2)]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 3,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.groupby(loans.status).count().collect()"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 4,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[Row(delinquent=None, count=970465), Row(delinquent=True, count=43665)]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 4,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.groupby(loans.delinquent).count().collect()"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 6,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[Row(status=u'refunded', count=156),\n",
 | 
			
		||||
       " Row(status=u'defaulted', count=20116),\n",
 | 
			
		||||
       " Row(status=u'in_repayment', count=23393)]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 6,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.where(loans.delinquent == True).groupby(loans.status).count().collect()"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 19,
 | 
			
		||||
   "metadata": {
 | 
			
		||||
    "collapsed": false
 | 
			
		||||
   },
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "data": {
 | 
			
		||||
      "text/plain": [
 | 
			
		||||
       "[Row(status=u'in_repayment')]"
 | 
			
		||||
      ]
 | 
			
		||||
     },
 | 
			
		||||
     "execution_count": 19,
 | 
			
		||||
     "metadata": {},
 | 
			
		||||
     "output_type": "execute_result"
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "loans.registerTempTable('loans')\n",
 | 
			
		||||
    "sparkSql.sql('''\n",
 | 
			
		||||
    "SELECT loans.status\n",
 | 
			
		||||
    "FROM loans\n",
 | 
			
		||||
    "LIMIT 1\n",
 | 
			
		||||
    "''').collect()"
 | 
			
		||||
   ]
 | 
			
		||||
  }
 | 
			
		||||
 ],
 | 
			
		||||
 "metadata": {
 | 
			
		||||
  "kernelspec": {
 | 
			
		||||
   "display_name": "Python 2",
 | 
			
		||||
   "language": "python",
 | 
			
		||||
   "name": "python2"
 | 
			
		||||
  },
 | 
			
		||||
  "language_info": {
 | 
			
		||||
   "codemirror_mode": {
 | 
			
		||||
    "name": "ipython",
 | 
			
		||||
    "version": 2
 | 
			
		||||
   },
 | 
			
		||||
   "file_extension": ".py",
 | 
			
		||||
   "mimetype": "text/x-python",
 | 
			
		||||
   "name": "python",
 | 
			
		||||
   "nbconvert_exporter": "python",
 | 
			
		||||
   "pygments_lexer": "ipython2",
 | 
			
		||||
   "version": "2.7.12"
 | 
			
		||||
  }
 | 
			
		||||
 },
 | 
			
		||||
 "nbformat": 4,
 | 
			
		||||
 "nbformat_minor": 1
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user