You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

893 lines
60KB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "collapsed": true
  8. },
  9. "outputs": [],
  10. "source": [
  11. "sparkSql = (SparkSession.builder\n",
  12. " .master(\"local\")\n",
  13. " .appName(\"Kiva Exploration\")\n",
  14. " .getOrCreate())\n",
  15. "\n",
  16. "loans = sparkSql.read.format('json').load('kiva-data/loans.json')\n",
  17. "loans.registerTempTable('loans')\n",
  18. "lenders = sparkSql.read.format('json').load('kiva-data/lenders.json')\n",
  19. "lenders.registerTempTable('lenders')\n",
  20. "loans_lenders = sparkSql.read.format('json').load('kiva-data/loans_lenders.json')\n",
  21. "loans_lenders.registerTempTable('loans_lenders')"
  22. ]
  23. },
  24. {
  25. "cell_type": "markdown",
  26. "metadata": {},
  27. "source": [
  28. "# Custom Functions\n",
  29. "\n",
  30. "## Gender Ratio\n",
  31. "\n",
  32. "0 = All female\n",
  33. "\n",
  34. "1 = All male"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": 2,
  40. "metadata": {
  41. "collapsed": false
  42. },
  43. "outputs": [],
  44. "source": [
  45. "import pyspark\n",
  46. "\n",
  47. "def gender_ratio(array):\n",
  48. " num_males = 0\n",
  49. " for item in array:\n",
  50. " if item.gender == 'M':\n",
  51. " num_males += 1\n",
  52. " \n",
  53. " return float(num_males) / len(array)\n",
  54. "\n",
  55. "sparkSql.udf.register('gender_ratio',\n",
  56. " gender_ratio,\n",
  57. " pyspark.sql.types.FloatType())"
  58. ]
  59. },
  60. {
  61. "cell_type": "markdown",
  62. "metadata": {},
  63. "source": [
  64. "## Fetch GDP"
  65. ]
  66. },
  67. {
  68. "cell_type": "code",
  69. "execution_count": 3,
  70. "metadata": {
  71. "collapsed": false
  72. },
  73. "outputs": [],
  74. "source": [
  75. "import pandas as pd\n",
  76. "from datetime import datetime\n",
  77. "import numpy as np\n",
  78. "\n",
  79. "\n",
  80. "# Load country info data\n",
  81. "country_codes_raw = pd.read_csv('economic-data/country-codes.csv')\n",
  82. "country_gdp_raw = pd.read_csv('economic-data/country-gdp.csv')\n",
  83. "\n",
  84. "# Clean country codes data\n",
  85. "country_codes = country_codes_raw[['official_name_en', 'ISO3166-1-Alpha-2', \n",
  86. " 'ISO3166-1-Alpha-3', 'ISO4217-currency_alphabetic_code']]\n",
  87. "\n",
  88. "# Clean gdp data\n",
  89. "country_gdp = country_gdp_raw.drop(country_gdp_raw.columns[[0, 1]], axis=1)\n",
  90. "country_gdp.columns = ['name', 'country_code_3', '2002', '2003', '2004', '2005', '2006',\n",
  91. " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016']\n",
  92. "\n",
  93. "# Merge gdp and code\n",
  94. "country_gdp = pd.merge(country_gdp, country_codes, left_on='country_code_3', right_on='ISO3166-1-Alpha-3', how='left')\n",
  95. "country_gdp.drop(['official_name_en', 'ISO3166-1-Alpha-3', 'country_code_3'], axis=1, inplace=True)\n",
  96. "country_gdp = country_gdp.rename(columns = {'ISO3166-1-Alpha-2':'country_code',\n",
  97. " 'ISO4217-currency_alphabetic_code':'currency_code'})\n",
  98. "country_gdp.replace('..', np.nan, inplace=True)\n",
  99. "\n",
  100. "# Reorder columns\n",
  101. "cols = list(country_gdp.columns)\n",
  102. "cols.insert(1, cols.pop(cols.index('country_code')))\n",
  103. "cols.insert(2, cols.pop(cols.index('currency_code')))\n",
  104. "country_gdp = country_gdp.reindex(columns= cols)\n",
  105. "\n",
  106. "def gdp(country_code, disbursal_date):\n",
  107. " def historical_gdp(array):\n",
  108. " array = np.array(map(float, array))\n",
  109. " array = array[~np.isnan(array)] # Remove NaN\n",
  110. " if len(array) == 0: # No GDP values\n",
  111. " return 0\n",
  112. " return float(np.mean(array, dtype=np.float64))\n",
  113. " \n",
  114. " # TODO: Unable to resolve country code WorldBank dataset has wrong alpha 3 codes e.g. Andorra causing issues\n",
  115. " try:\n",
  116. " float(country_code)\n",
  117. " return 0\n",
  118. " except:\n",
  119. " if country_code not in list(country_gdp['country_code']):\n",
  120. " return 0 # TODO: Bad solution ? \n",
  121. " \n",
  122. " # Get the historical average GDP if no disbursal date\n",
  123. " all_gdp = country_gdp[country_gdp.country_code == country_code].values[0][3:]\n",
  124. " if (disbursal_date is None): # or (country_gdp[date][country_gdp.country_code == country_code] == float('Nan')):\n",
  125. " return historical_gdp(all_gdp)\n",
  126. " \n",
  127. " date = str(datetime.strptime(disbursal_date, '%Y-%m-%dT%H:%M:%SZ').year)\n",
  128. " # Get the historical average GDP if no GDP for that year\n",
  129. " if pd.isnull(country_gdp[date][country_gdp.country_code == country_code].values[0]):\n",
  130. " return historical_gdp(all_gdp)\n",
  131. " \n",
  132. " return float(country_gdp[date][country_gdp.country_code == country_code].values[0])\n",
  133. "\n",
  134. "sparkSql.udf.register('gdp', gdp, pyspark.sql.types.FloatType())"
  135. ]
  136. },
  137. {
  138. "cell_type": "markdown",
  139. "metadata": {},
  140. "source": [
  141. "## Fetch Exchange Rates"
  142. ]
  143. },
  144. {
  145. "cell_type": "code",
  146. "execution_count": 4,
  147. "metadata": {
  148. "collapsed": true
  149. },
  150. "outputs": [],
  151. "source": [
  152. "currencies_raw = pd.read_csv('economic-data/currencies.csv')\n",
  153. "# Cleanup\n",
  154. "currencies = currencies_raw.drop(country_gdp_raw.columns[[0, 1]], axis=1)\n",
  155. "currencies.columns = ['country_name', 'country_code_3', '2002', '2003', '2004', '2005', '2006',\n",
  156. " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016']\n",
  157. "\n",
  158. "# Get ISO 2 code\n",
  159. "currencies = pd.merge(currencies, country_codes, left_on='country_code_3', right_on='ISO3166-1-Alpha-3', how='left')\n",
  160. "currencies.drop(['official_name_en', 'ISO3166-1-Alpha-3', 'country_code_3'], axis=1, inplace=True)\n",
  161. "currencies = currencies.rename(columns = {'ISO3166-1-Alpha-2':'country_code',\n",
  162. " 'ISO4217-currency_alphabetic_code':'currency_code'})\n",
  163. "currencies.replace('..', np.nan, inplace=True)\n",
  164. "\n",
  165. "# Add code for European Union\n",
  166. "currencies.set_value(217, 'country_code', 'EU')\n",
  167. "currencies.set_value(217, 'currency_code', 'EMU')\n",
  168. "\n",
  169. "# Reorder columns\n",
  170. "cols = list(currencies.columns)\n",
  171. "cols.insert(1, cols.pop(cols.index('country_code')))\n",
  172. "cols.insert(2, cols.pop(cols.index('currency_code')))\n",
  173. "currencies = currencies.reindex(columns=cols)\n",
  174. "\n",
  175. "def xchange_rate(country_code, disbursal_date):\n",
  176. " def historical_rates(array):\n",
  177. " array = np.array(map(float, array))\n",
  178. " array = array[~np.isnan(array)] # Remove NaN\n",
  179. " if len(array) == 0: # No rate values\n",
  180. " return 1\n",
  181. " return float(np.mean(array, dtype=np.float64))\n",
  182. " \n",
  183. " eu = ['AT','BE','BG','HR','CY','CZ','DK','EE','FI','FR','DE','GR','HU','IE',\n",
  184. " 'IT','LV','LT','LU','MT','NL','PL','PT','RO','SK','SI','ES','SE','GB']\n",
  185. " us = ['AS','GU','MP','PR','UM','VI']\n",
  186. " try:\n",
  187. " float(country_code) # Country code unknown?\n",
  188. " if pd.isnull(country_code):\n",
  189. " return 1 # TODO: Bad solution ??\n",
  190. " except:\n",
  191. " if country_code in eu:\n",
  192. " country_code = 'EU'\n",
  193. " elif country_code in us:\n",
  194. " country_code = 'US'\n",
  195. " if country_code not in list(currencies['country_code']):\n",
  196. " return 1\n",
  197. " \n",
  198. " \n",
  199. " # TODO: Unable to resolve country code WorldBank dataset has wrong alpha 3 codes e.g. Andorra causing\n",
  200. " try:\n",
  201. " float(country_code)\n",
  202. " return 0\n",
  203. " except:\n",
  204. " if country_code not in list(currencies['country_code']):\n",
  205. " return 0 # TODO: Bad solution \n",
  206. " \n",
  207. " # Get the historical average exchange rate if no disbursal date\n",
  208. " all_rates = currencies[currencies.country_code == country_code].values[0][3:]\n",
  209. " if (disbursal_date is None): # or (country_gdp[date][country_gdp.country_code == country_code] == float('Nan')):\n",
  210. " return historical_rates(all_rates)\n",
  211. " \n",
  212. " date = str(datetime.strptime(disbursal_date, '%Y-%m-%dT%H:%M:%SZ').year)\n",
  213. " # Get the historical average exchange rate if no GDP for that year\n",
  214. " if pd.isnull(currencies[date][currencies.country_code == country_code].values[0]):\n",
  215. " return historical_rates(all_rates)\n",
  216. " \n",
  217. " return float(currencies[date][currencies.country_code == country_code].values[0])\n",
  218. "\n",
  219. "sparkSql.udf.register('xchange_rate', xchange_rate, pyspark.sql.types.FloatType())"
  220. ]
  221. },
  222. {
  223. "cell_type": "markdown",
  224. "metadata": {},
  225. "source": [
  226. "# Fetch actual data\n",
  227. "\n",
  228. "Get all data that we are going to use, get dummies, then split into train/validation/test."
  229. ]
  230. },
  231. {
  232. "cell_type": "markdown",
  233. "metadata": {},
  234. "source": [
  235. "Query our datasets to train on."
  236. ]
  237. },
  238. {
  239. "cell_type": "code",
  240. "execution_count": 6,
  241. "metadata": {
  242. "collapsed": false
  243. },
  244. "outputs": [],
  245. "source": [
  246. "query = '''\n",
  247. "SELECT\n",
  248. " id,\n",
  249. " activity,\n",
  250. " size(borrowers) as num_borrowers,\n",
  251. " gender_ratio(borrowers) as gender_ratio,\n",
  252. " lender_count,\n",
  253. " location.country,\n",
  254. " location.country_code,\n",
  255. " partner_id,\n",
  256. " sector,\n",
  257. " tags,\n",
  258. " DATEDIFF(terms.disbursal_date, planned_expiration_date) as loan_length,\n",
  259. " terms.disbursal_amount,\n",
  260. " terms.disbursal_currency,\n",
  261. " terms.disbursal_date,\n",
  262. " size(terms.scheduled_payments) as num_repayments,\n",
  263. " terms.repayment_interval,\n",
  264. " CASE WHEN\n",
  265. " (status = 'refunded') OR\n",
  266. " (status = 'defaulted') OR\n",
  267. " (status = 'deleted') OR\n",
  268. " (status = 'issue') OR\n",
  269. " (status = 'inactive_expired') OR\n",
  270. " (status = 'expired') OR\n",
  271. " (status = 'inactive') OR\n",
  272. " (delinquent = True) THEN 1 ELSE 0 END AS bad_loan,\n",
  273. " gdp(location.country_code, terms.disbursal_date) as gdp,\n",
  274. " xchange_rate(location.country_code, terms.disbursal_date) as xchange_rate,\n",
  275. " status,\n",
  276. " delinquent\n",
  277. " \n",
  278. "FROM loans\n",
  279. "WHERE\n",
  280. " status != 'fundraising' AND\n",
  281. " status != 'funded'\n",
  282. "'''\n",
  283. "\n",
  284. "dataset = sparkSql.sql(query).toPandas()"
  285. ]
  286. },
  287. {
  288. "cell_type": "markdown",
  289. "metadata": {},
  290. "source": [
  291. "# Data Splits"
  292. ]
  293. },
  294. {
  295. "cell_type": "code",
  296. "execution_count": 8,
  297. "metadata": {
  298. "collapsed": false
  299. },
  300. "outputs": [],
  301. "source": [
  302. "X_columns = [\n",
  303. " 'activity', 'num_borrowers', 'gender_ratio',\n",
  304. " 'lender_count', 'country', 'partner_id', 'sector',\n",
  305. " 'loan_length', 'disbursal_amount', 'disbursal_currency',\n",
  306. " 'num_repayments', 'repayment_interval', 'gdp', 'xchange_rate'\n",
  307. "]\n",
  308. "\n",
  309. "y_column = ['bad_loan']\n",
  310. "\n",
  311. "dummy_set = pd.get_dummies(dataset[X_columns + y_column])\n",
  312. "dummy_set.to_csv('processed_dummy.csv')"
  313. ]
  314. },
  315. {
  316. "cell_type": "markdown",
  317. "metadata": {},
  318. "source": [
  319. "Now we can restart the kernel to clear memory, and start processing."
  320. ]
  321. },
  322. {
  323. "cell_type": "code",
  324. "execution_count": 1,
  325. "metadata": {
  326. "collapsed": false
  327. },
  328. "outputs": [],
  329. "source": [
  330. "import pandas as pd\n",
  331. "\n",
  332. "processed_dummy = pd.read_csv('processed_dummy.csv', index_col=0)"
  333. ]
  334. },
  335. {
  336. "cell_type": "code",
  337. "execution_count": 3,
  338. "metadata": {
  339. "collapsed": true
  340. },
  341. "outputs": [],
  342. "source": [
  343. "import numpy as np\n",
  344. "\n",
  345. "train, validate, test = np.split(processed_dummy.sample(frac=1, random_state=0),\n",
  346. " [int(.6*len(processed_dummy)),\n",
  347. " int(.8*len(processed_dummy))])\n",
  348. "\n",
  349. "train.to_csv('processed_train.csv')\n",
  350. "validate.to_csv('processed_validate.csv')\n",
  351. "test.to_csv('processed_test.csv')"
  352. ]
  353. },
  354. {
  355. "cell_type": "markdown",
  356. "metadata": {},
  357. "source": [
  358. "# Testing all the models"
  359. ]
  360. },
  361. {
  362. "cell_type": "code",
  363. "execution_count": 1,
  364. "metadata": {
  365. "collapsed": false
  366. },
  367. "outputs": [],
  368. "source": [
  369. "import pandas as pd\n",
  370. "train = pd.read_csv('processed_train.csv', index_col=0).dropna(axis=1)"
  371. ]
  372. },
  373. {
  374. "cell_type": "markdown",
  375. "metadata": {},
  376. "source": [
  377. "Naive guess:"
  378. ]
  379. },
  380. {
  381. "cell_type": "code",
  382. "execution_count": 2,
  383. "metadata": {
  384. "collapsed": false
  385. },
  386. "outputs": [
  387. {
  388. "data": {
  389. "text/plain": [
  390. "0.89836166750827584"
  391. ]
  392. },
  393. "execution_count": 2,
  394. "metadata": {},
  395. "output_type": "execute_result"
  396. }
  397. ],
  398. "source": [
  399. "train_x = train.drop('bad_loan', axis=1)\n",
  400. "train_y = train['bad_loan']\n",
  401. "\n",
  402. "1 - train_y.mean()"
  403. ]
  404. },
  405. {
  406. "cell_type": "code",
  407. "execution_count": 3,
  408. "metadata": {
  409. "collapsed": false
  410. },
  411. "outputs": [
  412. {
  413. "name": "stderr",
  414. "output_type": "stream",
  415. "text": [
  416. "/usr/lib/python2.7/dist-packages/sklearn/linear_model/sag.py:267: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
  417. " \"the coef_ did not converge\", ConvergenceWarning)\n"
  418. ]
  419. },
  420. {
  421. "name": "stdout",
  422. "output_type": "stream",
  423. "text": [
  424. "Finished training 1\n",
  425. "Finished training 0.1\n",
  426. "Finished training 0.01\n",
  427. "Finished training 0.001\n",
  428. "Finished training 0.0001\n"
  429. ]
  430. }
  431. ],
  432. "source": [
  433. "from itertools import product\n",
  434. "import pickle\n",
  435. "from sklearn.linear_model import LogisticRegression\n",
  436. "\n",
  437. "for C in [1, .1, .01, .001, .0001]:\n",
  438. " lr = LogisticRegression(C=C, solver='sag', max_iter=100)\n",
  439. "\n",
  440. " lr.fit(train_x, train_y)\n",
  441. " with open('lr_{}.pickle'.format(C), 'w') as handle:\n",
  442. " pickle.dump(lr, handle)\n",
  443. " \n",
  444. " del(lr)\n",
  445. " print(\"Finished training {}\".format(C))"
  446. ]
  447. },
  448. {
  449. "cell_type": "code",
  450. "execution_count": 4,
  451. "metadata": {
  452. "collapsed": false
  453. },
  454. "outputs": [
  455. {
  456. "name": "stdout",
  457. "output_type": "stream",
  458. "text": [
  459. "Finished training 342\n",
  460. "Finished training 150\n",
  461. "Finished training 20\n",
  462. "Finished training 5\n",
  463. "Finished training 1\n"
  464. ]
  465. }
  466. ],
  467. "source": [
  468. "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
  469. "import pickle\n",
  470. "\n",
  471. "# Number of columns is 342\n",
  472. "for n_components in [342, 150, 20, 5, 1]:\n",
  473. " lda = LinearDiscriminantAnalysis(n_components=n_components,\n",
  474. " solver='lsqr')\n",
  475. " lda.fit(train_x, train_y)\n",
  476. " with open('lda_{}.pickle'.format(n_components), 'w') as handle:\n",
  477. " pickle.dump(lda, handle)\n",
  478. " \n",
  479. " del(lda)\n",
  480. " print(\"Finished training {}\".format(n_components))"
  481. ]
  482. },
  483. {
  484. "cell_type": "code",
  485. "execution_count": 5,
  486. "metadata": {
  487. "collapsed": false
  488. },
  489. "outputs": [
  490. {
  491. "name": "stdout",
  492. "output_type": "stream",
  493. "text": [
  494. "Finished training 10\n",
  495. "Finished training 5\n",
  496. "Finished training 3\n",
  497. "Finished training 1\n"
  498. ]
  499. }
  500. ],
  501. "source": [
  502. "from sklearn.ensemble import RandomForestClassifier\n",
  503. "\n",
  504. "for n_estimators in [10, 5, 3, 1]:\n",
  505. " rf = RandomForestClassifier(n_estimators=n_estimators)\n",
  506. " rf.fit(train_x, train_y)\n",
  507. " with open('rf_{}.pickle'.format(n_estimators), 'w') as handle:\n",
  508. " pickle.dump(rf, handle)\n",
  509. " \n",
  510. " del(rf) \n",
  511. " print(\"Finished training {}\".format(n_estimators))"
  512. ]
  513. },
  514. {
  515. "cell_type": "markdown",
  516. "metadata": {},
  517. "source": [
  518. "# Evaluating the Results"
  519. ]
  520. },
  521. {
  522. "cell_type": "code",
  523. "execution_count": 1,
  524. "metadata": {
  525. "collapsed": false
  526. },
  527. "outputs": [],
  528. "source": [
  529. "import pandas as pd\n",
  530. "valid = pd.read_csv('processed_validate.csv', index_col=0).dropna(axis=1)\n",
  531. "\n",
  532. "valid_x = valid.drop('bad_loan', axis=1)\n",
  533. "valid_y = valid['bad_loan']"
  534. ]
  535. },
  536. {
  537. "cell_type": "code",
  538. "execution_count": 2,
  539. "metadata": {
  540. "collapsed": false
  541. },
  542. "outputs": [
  543. {
  544. "name": "stdout",
  545. "output_type": "stream",
  546. "text": [
  547. "Logistic Regression, C=1; Score: 0.89904155569\n",
  548. "Logistic Regression, C=0.1; Score: 0.89904155569\n",
  549. "Logistic Regression, C=0.01; Score: 0.89904155569\n",
  550. "Logistic Regression, C=0.001; Score: 0.89904155569\n",
  551. "Logistic Regression, C=0.0001; Score: 0.89904155569\n"
  552. ]
  553. }
  554. ],
  555. "source": [
  556. "import pickle\n",
  557. "lr_params = [1, .1, .01, .001, .0001]\n",
  558. "\n",
  559. "for C in lr_params:\n",
  560. " with open('lr_{}.pickle'.format(C)) as handle:\n",
  561. " model = pickle.load(handle)\n",
  562. " \n",
  563. " score = model.score(valid_x, valid_y)\n",
  564. " \n",
  565. " print('Logistic Regression, C={}; Score: {}'.format(\n",
  566. " C, score\n",
  567. " ))"
  568. ]
  569. },
  570. {
  571. "cell_type": "code",
  572. "execution_count": 3,
  573. "metadata": {
  574. "collapsed": false
  575. },
  576. "outputs": [
  577. {
  578. "name": "stdout",
  579. "output_type": "stream",
  580. "text": [
  581. "Linear Discriminant Analysis, components=342; Score: 0.896526629504\n",
  582. "Linear Discriminant Analysis, components=150; Score: 0.896526629504\n",
  583. "Linear Discriminant Analysis, components=20; Score: 0.896526629504\n",
  584. "Linear Discriminant Analysis, components=5; Score: 0.896526629504\n",
  585. "Linear Discriminant Analysis, components=1; Score: 0.896526629504\n"
  586. ]
  587. }
  588. ],
  589. "source": [
  590. "lda_components = [342, 150, 20, 5, 1]\n",
  591. "\n",
  592. "for n_components in lda_components:\n",
  593. " with open('lda_{}.pickle'.format(n_components)) as handle:\n",
  594. " model = pickle.load(handle)\n",
  595. " \n",
  596. " score = model.score(valid_x, valid_y)\n",
  597. " \n",
  598. " print('Linear Discriminant Analysis, components={}; Score: {}'.format(\n",
  599. " n_components, score\n",
  600. " ))"
  601. ]
  602. },
  603. {
  604. "cell_type": "code",
  605. "execution_count": 4,
  606. "metadata": {
  607. "collapsed": false
  608. },
  609. "outputs": [
  610. {
  611. "name": "stdout",
  612. "output_type": "stream",
  613. "text": [
  614. "Random Forests, estimators: 10; Score: 0.934577264671\n",
  615. "Random Forests, estimators: 5; Score: 0.93108211135\n",
  616. "Random Forests, estimators: 3; Score: 0.926601780251\n",
  617. "Random Forests, estimators: 1; Score: 0.915037080309\n"
  618. ]
  619. }
  620. ],
  621. "source": [
  622. "rf_estimators = [10, 5, 3, 1]\n",
  623. "\n",
  624. "for estimators in rf_estimators:\n",
  625. " with open('rf_{}.pickle'.format(estimators)) as handle:\n",
  626. " model = pickle.load(handle)\n",
  627. " \n",
  628. " score = model.score(valid_x, valid_y)\n",
  629. " \n",
  630. " print('Random Forests, estimators: {}; Score: {}'.format(\n",
  631. " estimators, score\n",
  632. " ))"
  633. ]
  634. },
  635. {
  636. "cell_type": "markdown",
  637. "metadata": {},
  638. "source": [
  639. "# Column Significance"
  640. ]
  641. },
  642. {
  643. "cell_type": "code",
  644. "execution_count": 1,
  645. "metadata": {
  646. "collapsed": true
  647. },
  648. "outputs": [],
  649. "source": [
  650. "import pandas as pd\n",
  651. "\n",
  652. "train = pd.read_csv('processed_train.csv', index_col=0).dropna(axis=1)\n",
  653. "valid = pd.read_csv('processed_validate.csv', index_col=0).dropna(axis=1)"
  654. ]
  655. },
  656. {
  657. "cell_type": "code",
  658. "execution_count": 7,
  659. "metadata": {
  660. "collapsed": false
  661. },
  662. "outputs": [],
  663. "source": [
  664. "activity_columns = ('Activity', list(filter(\n",
  665. " lambda x: 'activity' in x,\n",
  666. " train.columns)))\n",
  667. "dcurrency_columns = ('Disbursal Currency', list(filter(\n",
  668. " lambda x: 'disbursal_currency' in x,\n",
  669. " train.columns)))\n",
  670. "country_columns = ('Country', list(filter(\n",
  671. " lambda x: 'country' in x,\n",
  672. " train.columns)))\n",
  673. "sector_columns = ('Sector', list(filter(\n",
  674. " lambda x: 'sector' in x,\n",
  675. " train.columns)))\n",
  676. "repayment_columns = ('Repayment', list(filter(\n",
  677. " lambda x: 'repayment' in x,\n",
  678. " train.columns)))\n",
  679. "nborrowers_columns = ('Number of Borrowers', ['num_borrowers'])\n",
  680. "gratio_columns = ('Gender Ratio', ['gender_ratio'])\n",
  681. "nlenders_columns = ('Lender Count', ['lender_count'])\n",
  682. "damount_columns = ('Disbursal Amount', ['disbursal_amount'])\n",
  683. "\n",
  684. "colgroups = [\n",
  685. " activity_columns, dcurrency_columns, country_columns,\n",
  686. " sector_columns, repayment_columns, nborrowers_columns,\n",
  687. " gratio_columns, nlenders_columns, damount_columns\n",
  688. "]"
  689. ]
  690. },
  691. {
  692. "cell_type": "code",
  693. "execution_count": 9,
  694. "metadata": {
  695. "collapsed": false
  696. },
  697. "outputs": [
  698. {
  699. "name": "stdout",
  700. "output_type": "stream",
  701. "text": [
  702. "Removing columns: Activity\n",
  703. "Score: 93.569\n",
  704. "\n",
  705. "Removing columns: Disbursal Currency\n",
  706. "Score: 93.458\n",
  707. "\n",
  708. "Removing columns: Country\n",
  709. "Score: 93.381\n",
  710. "\n",
  711. "Removing columns: Sector\n",
  712. "Score: 93.628\n",
  713. "\n",
  714. "Removing columns: Repayment\n",
  715. "Score: 93.180\n",
  716. "\n",
  717. "Removing columns: Number of Borrowers\n",
  718. "Score: 93.450\n",
  719. "\n",
  720. "Removing columns: Gender Ratio\n",
  721. "Score: 93.422\n",
  722. "\n",
  723. "Removing columns: Lender Count\n",
  724. "Score: 90.606\n",
  725. "\n",
  726. "Removing columns: Disbursal Amount\n",
  727. "Score: 92.342\n",
  728. "\n"
  729. ]
  730. }
  731. ],
  732. "source": [
  733. "from sklearn.ensemble import RandomForestClassifier\n",
  734. "\n",
  735. "def validate_column_group(train, valid, colgroup):\n",
  736. " sub_group = train.drop(colgroup, axis=1)\n",
  737. " train_x = sub_group.drop('bad_loan', axis=1)\n",
  738. " train_y = sub_group['bad_loan']\n",
  739. " \n",
  740. " rf = RandomForestClassifier(n_estimators=10)\n",
  741. " rf.fit(train_x, train_y)\n",
  742. " \n",
  743. " valid_subgroup = valid.drop(colgroup, axis=1)\n",
  744. " valid_x = valid_subgroup.drop('bad_loan', axis=1)\n",
  745. " valid_y = valid_subgroup['bad_loan']\n",
  746. " score = rf.score(valid_x, valid_y)\n",
  747. " \n",
  748. " return score\n",
  749. " \n",
  750. "for name, colgroup in colgroups:\n",
  751. " score = validate_column_group(train, valid, colgroup)\n",
  752. " \n",
  753. " print 'Removing columns: {}'.format(name)\n",
  754. " print 'Score: {:.3f}'.format(score*100)\n",
  755. " print"
  756. ]
  757. },
  758. {
  759. "cell_type": "code",
  760. "execution_count": 16,
  761. "metadata": {
  762. "collapsed": false
  763. },
  764. "outputs": [
  765. {
  766. "data": {
  767. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAAF5CAYAAADpvZJuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XmYVNWd//H3t7obsKFYlAQRQZgYgaCi3W4EjAvKogE1\nJJMhGrPML4YokiBoVBxxARMjgsagcSYzGmNkJpEYWSUuMRpEiN3uQkyiRgUlGoEuFqHpOr8/zi36\ndlHVXVX0Us39vJ6nnq4699y6p05X9/3ec89izjlEREQkWmJtXQARERFpfQoAREREIkgBgIiISAQp\nABAREYkgBQAiIiIRpABAREQkghQAiIiIRJACABERkQhSACAiIhJBCgBEREQiKO8AwMxONrNFZrbe\nzJJmNj6HfU41syoz+9jMXjezrxVWXBEREWkOhbQAdAZeAC4GmlxIwMz6A0uAx4GhwO3Az8zszAKO\nLSIiIs3A9mUxIDNLAuc65xY1kudmYKxz7uhQ2gKgm3PurIIPLiIiIgVrjT4AJwGPpaWtAIa1wrFF\nREQkg9JWOMbBwMa0tI1AVzPr6Jzbmb6DmR0EjAbeAj5u8RKKiIjsPzoB/YEVzrl/ZsvUGgFAIUYD\nv2zrQoiIiLRj5wMPZNvYGgHA+0CvtLReQE2mq//AW/7H/cBgwNG793dYsuSnLVTExn3+85N47727\nAAMmAannYZOCn+nbsqXnur3+sx9//JdIJn+VNR+cDDzd5Ptk0/Bz5r9/Y+rf+zvk8llb09SpU5k3\nb16rHnN/kEu95f63U1zfiZak71thVG+5W7t2LRdccAHsOZdm1hoBwCpgbFraqCA9m6DZfzBQQSy2\nnC996SwqKipapIBN+eIXxzJ//gckk2PwH+UDYExarrH4uk7fNhb4e5Z9AAYFPzNvN1uy57MfeeTh\nvPTSRiBT38mllJWVUleXKmdDudRhw8+Z//6NqX/vbPW378coVLdu3drsu9We5VJvuf/tZP5et/Xf\nfkvQ960wqreCNH4L3TmX1wM/DHAocAyQBL4XvO4bbP8B8PNQ/v5AArgZGIgfPrgLOKORY1QADp5z\nsdgyN2TIma6mpsa1lZqaGjdkyJkuFlvmYIuDMx0sdZB04BwkndmDrkOHTzk4KW3bFgfHZ0hPOljs\njjjiFHfEEadk3T548Mg9n339+vWuY8dPO1iSlm+J69jx0+60004LlbN+e6512PBz5r9/Lu9t9mBQ\nf+nHWNpmv+dx48a1+jH3B7nUW65/Ox07ftrFYkub/XtXjPR9K4zqLXdVVVXOn0OpcI2czwsZBXAc\n8DxQFRzgVqAauD7YfjDQNxRgvAWcDZyBnz9gKvDvzrn0kQF76d37YiZPXs2qVQuJx+MFFLV5xONx\nVq1ayOTJq+nffwK9e5cRj19JPH48vXuPo3//UVx66cu8+eZTTJp0KvH4lZSUDKWkZDjx+Ol885sn\n881vfjYt/XgmTXqG555bzHPPLc6wn9++evVDez77IYccwhtvPMnQoXdQWno0sdgISkuPZujQO3jj\njSfp0qVLqJyj6NPnHPr3H5VzHTb8nPnvn8t7X3rpy/TrV0vnzjMoLT2azp1P47DDzmDy5DVt/nuW\n5pfr384bbzzJ5Mlrmv17JyLZ7dM8AC3FzCqAqqqqqqJs8nHOYWZ7PU/PA+y1LVt6rtvDkskksVh9\nDDd+/HgWLaqfkiFb2XK1r/vn8t4teYxcpdeb5KaQesv1b6etvxMtSd+3wqjeclddXU1lZSVApXOu\nOlu+Yh0FUNTC/5yy/aPKNz3X7WHhkz/AxIkTC36vfS1Loe9dDP/o0+tNclNIve3L387+Qt+3wqje\nmp9aAERERPYjubYAaDVAERGRCFIAICIiEkEKAERERCJIAYCIiEgEKQAQERGJIAUAIiIiEaQAQERE\nJIIUAIiIiESQAgAREZEIUgAgIiISQQoAREREIkgBgIiISAQpABAREYkgBQAiIiIRpABAREQkghQA\niIiIRJACABERkQhSACAiIhJBCgBEREQiSAGAiIhIBCkAEBERiSAFACIiIhGkAEBERCSCFACIiIhE\nkAIAERGRCFIAICIiEkEKAERERCJIAYCIiEgEKQAQERGJIAUAIiIiEaQAQEREJIIUAIiIiESQAgAR\nEZEIUgAgIiISQQoAREREIkgBgIiISAQpABAREYkgBQAiIiIRpABAREQkghQAiIiIRJACABERkQhS\nACAiIhJBCgBEREQiSAGAiIhIBCkAEBERiSAFACIiIhGkAEBERCSCFACIiIhEkAIAERGRCCooADCz\nS8zsTTPbYWbPmtnxTeT/npmtM7PtZva2mc01s46FFVlERET2Vd4BgJl9GbgVmAkcC7wIrDCznlny\nfwX4QZB/EPBN4MvA7ALLLCIiIvuokBaAqcDdzrn7nHPrgEnAdvyJPZNhwB+dc//nnHvbOfcYsAA4\noaASi4iIyD7LKwAwszKgEng8leacc8Bj+BN9Js8AlanbBGb2L8BZwNJCCiwiIiL7rjTP/D2BEmBj\nWvpGYGCmHZxzC4LbA380Mwv2/6lz7uZ8CysiIiLNI98AIG9mdipwNf5WwRrgcODHZvaec25WY/tO\nnTqVbt26NUibOHEiEydObKHSioiItB8LFixgwYIFDdK2bNmS077mW/BzE9wC2A5McM4tCqXfC3Rz\nzp2XYZ+ngFXOue+H0s7H9yPokuU4FUBVVVUVFRUVOZdPREQk6qqrq6msrASodM5VZ8uXVx8A51wt\nUAWMTKUFzfoj8ff6MykHkmlpydC+IiIi0soKuQUwF7jXzKrwTfpT8Sf5ewHM7D7gXefc1UH+xcBU\nM3sBWA18GrgBWOTyaX4QERGRZpN3AOCc+1XQqe8GoBfwAjDaOfdBkOVQYHdolxvxV/w3An2AD4BF\nwDX7UG4RERHZBwV1AnTO3QncmWXb6WmvUyf/Gws5loiIiDQ/rQUgIiISQQoAREREIkgBgIiISAQp\nABAREYkgBQAiIiIRpABAREQkghQAiIiIRJACABERkQhSACAiIhJBCgBEREQiSAGAiIhIBCkAEBER\niSAFACIiIhGkAEBERCSCFACIiIhEkAIAERGRCFIAICIiEkEKAERERCJIAYCIiEgEKQAQERGJIAUA\nIiIiEaQAQEREJIIUAIiIiESQAgAREZEIUgAgIiISQQoAREREIkgBgIiISAQpABAREYkgBQAiIiIR\npABAREQkghQAiIiIRJACgFbknGvrIoiIiAAKAFpcIpFgypSZDBhwBn37nsuAAWcwZcpMEolEWxdN\nREQirLStC7A/SyQSDBs2gbVrLyOZvA4wwDF//gqeeGICq1YtJB6Pt3EpRUQkitQC0IJmzJgTnPzH\n4E/+AEYyOYa1a6dyzTW3tmXxREQkwhQAtKDFi1eSTI7OuC2ZHMOiRStbuUQiIiKeAoAW4pyjtrYz\n9Vf+6Yza2nJ1DBQRkTahAKCFmBllZduAbCd4R1nZNsyyBQgiIiItRwFACxo3bjix2IqM22KxRxg/\nfkQrl0hERMRTANCCZs+ezuDBc4nFllPfEuCIxZYzePA8Zs2a1pbFExGRCFMA0ILi8TirVi1k8uTV\n9O8/ij59zqF//1FMnrxaQwBFRKRNaR6AFhaPx7n99uu4/XbfMVD3/EVEpBioBaAV6eQvIiLFQgGA\niIhIBCkAEBERiSAFACIiIhGkAEBERCSCFACIiIhEkAIAERGRCFIAICIiEkEKAERERCKooADAzC4x\nszfNbIeZPWtmxzeRv5uZzTezDWb2sZmtM7MxhRVZRERE9lXeUwGb2ZeBW4GLgDXAVGCFmR3hnPsw\nQ/4y4DHgfeALwAbgMGDzPpRbRERE9kEhawFMBe52zt0HYGaTgLOBbwI/ypD/34HuwEnOubog7e0C\njisiIiLNJK9bAMHVfCXweCrNOefwV/jDsuw2DlgF3Glm75vZy2Z2lZmp/4GIiEgbybcFoCdQAmxM\nS98IDMyyz78ApwP3A2OBw4G7gmPfmOfxRUREpBm0xnLAMXyAcFHQWvC8mR0KTKeJAGDq1Kl069at\nQdrEiROZOHFiS5VVRESk3ViwYAELFixokLZly5ac9jV/Ts5NcAtgOzDBObcolH4v0M05d16GfZ4E\ndjnnRoXSxgBLgY7Oud0Z9qkAqqqqqqioqMi5fCIiIlFXXV1NZWUlQKVzrjpbvrzuwzvnaoEqYGQq\nzfwi9yOBZ7LsthLf7B82EHgv08lfREREWl4hHfHmAt8yswvNbBDwU6AcuBfAzO4zs5tC+e8CDjSz\nH5vZp83sbOAq4Cf7VnQREREpVN59AJxzvzKznsANQC/gBWC0c+6DIMuhwO5Q/nfNbDQwD3gRWB88\nzzRkUERERFpBQZ0AnXN3Andm2XZ6hrTVwGcLOZaIiIg0P43FFxERiSAFACIiIhGkAEBERCSCFACI\niIhEkAIAERGRCFIAICIiEkEKAERERCJIAYCIiEgEKQAQERGJIAUAIiIiEaQAQEREJIIUAIiIiESQ\nAgAREZEIUgAgIiISQQoAREREIkgBgIiISAQpABAREYkgBQAiIiIRpABAREQkghQAiIiIRJACgBw4\n53JKa2x7U/nzzSciIrIvStu6AMUqkUgwY8YcFi9eSW1tZ8rKtjF69HGAsWLFn/akjRs3nNmzpxOP\nx/fap6RkC927d2Tz5lrq6uJ75W/sWJnyiYiINBvnXNE9gArAVVVVubZQU1Pjhgw508Viyx0kHTgH\nWxyc5GBJKC3pYrHlbsiQM9369evT9qlxcKaDpRnz19TUNHKsvfOJiIjkoqqqygEOqHCNnGt1CyCD\nGTPmsHbtZSSTYwALUm8FZgJnh9KMZHIMa9dO5ayzvpG2zxzgMuCsjPmvuebWRo61dz4REZHmpAAg\ng8WLV5JMjk5LXQmkp3nJ5BheeWV92j6N51+0aGUjx9o7n4iISHNSAJDGOUdtbWfqr8bBt6Skp6Xv\n1z20van8Rm1tOclkMsOx9s7n1DFQRESamQKANGZGWdk2/El8TyqQnpa+3+bQ9qbyO8rKthGLxTIc\na+98ZtkDDxERkUIoAMhg3LjhxGIr0lKHA+lpXiz2CEce2Sdtn8bzjx8/opFj7Z1PRESkOSkAyGD2\n7OkMHjyXWGw59Vfn04DrgSWhNEcstpzBg+exbNk9aftMB+YCSzPmnzVrWiPH2jufiIhIc7JivL9s\nZhVAVVVVFRUVFW1ShkQiwTXX3MqiRSuprS2nrGw7Y8b4eQAeeeRPe9LGjx/OrFnT9swDEN6npKSG\nHj38PAC7d3fZK39jx8qUT0REpCnV1dVUVlYCVDrnqrPlUwCQA+fcXvfhM6U1tr2p/PnmExERySTX\nAEC3AHKQ6YTc1Ek6fXuuJ3Wd/EVEpDUoABAREYkgBQAiIiIRpABAREQkghQAiIiIRJACABERkQhS\nACAiIhJBCgBEREQiSAGAiIhIBCkAEBERiSAFACIiIhGkAEBERCSCFACIiIhEkAIAERGRCFIAICIi\nEkEKAERERCJIAYCIiEgEKQAQERGJIAUAIiIiEaQAQEREJIIKCgDM7BIze9PMdpjZs2Z2fI77/ZuZ\nJc3sN4UcV0RERJpH3gGAmX0ZuBWYCRwLvAisMLOeTezXH7gFeCrvUoqIiEizKqQFYCpwt3PuPufc\nOmASsB34ZrYdzCwG3A9cC7xZSEFFRESk+eQVAJhZGVAJPJ5Kc8454DFgWCO7zgQ2OufuKaSQIiIi\n0rxK88zfEygBNqalbwQGZtrBzEYA3wCG5l06ERERaRH5BgB5MbMuwH3At5xzm/Ldf+rUqXTr1q1B\n2sSJE5k4cWIzlVBERKT9WrBgAQsWLGiQtmXLlpz2Nd+Cn5vgFsB2YIJzblEo/V6gm3PuvLT8Q4Fq\noA6wIDl126EOGOic26tPgJlVAFVVVVVUVFTkXD4REZGoq66uprKyEqDSOVedLV9efQCcc7VAFTAy\nlWZmFrx+JsMua4GjgGPwtwCGAouAJ4Ln7+RzfBEREWkehdwCmAvca2ZVwBr8qIBy4F4AM7sPeNc5\nd7VzbhfwWnhnM9uM7zu4dl8KLiIiIoXLOwBwzv0qGPN/A9ALeAEY7Zz7IMhyKLC7+YooIiIiza2g\nToDOuTuBO7NsO72Jfb9RyDFFRESk+WgtABERkQhSACAiIhJBCgBEREQiSAGAiIhIBCkAEBERiSAF\nACIiIhGkAEBERCSCFACIiIhEkAIAERGRCFIAICIiEkEKAHKUbdnkZDLZaH7nXNZ9RURE2ooCgEYk\nEgmmTJnJgAFn0LfvuQwYcAZTpszk9ddfZ+jQ0ZSWHklZ2ecoLT2SoUNH8/rrrzNlykwOO+xUunQ5\nBrNPUVJyFGVlI+ja9TgmTbqKRCLR1h9LRESksMWAoiCRSDBs2ATWrr2MZPI6wADHHXc8wE9+Mhrn\n5gNj96S/9NJCBg0aDczBuafxKyT/EOfGUFdnJBKOu+9exlNPncfq1Q8Rj8fb7LOJiIioBSCLGTPm\nBCf/MfiTPMHP+4KT/1lp6S/j3HycewnoB8ykPkBI5Tmbdeu+xzXX3Npqn0NERCQTBQBZLF68kmRy\ndIYt6/En9nQrg/SVwLtApn3BubNZtGhlcxVTRESkILoFkIFzjtraztRfvackge4Z0h3QOXheHmxP\nz5Ni1NaW45zDLFseERGRlqUAIAMzo6xsG/7EHj5Jx4DNGdIN2BY83x78TM/DnvSysm06+YuISJvS\nLYAsxo0bTiy2IsOWPsDyDOnDg/ThwKFApn3BbCnjx49ormKKiIgURAFAFrNnT2fw4LnEYsvxV/ME\nP7+K2SXA0rT0IzG7BLOjgLeB64FlaXmWMGjQbcyaNa3VPoeIiEgmCgCyiMfjrFq1kMmTV9O//yj6\n9DmH/v1HMWXKX1m3bgVDh95BaenRxGIjKC09mqFD/5t161Zw6aWv0K+fo7x8O2ZTMDuKkpLPEo8f\nz6RJz2gIoIiIFAUrxlnqzKwCqKqqqqKioqKtiwOQtdNeMpkkFts7jkrlT9Wv7vmLiEhrqK6uprKy\nEqDSOVedLZ86AeYo2wk808k/nF8nfhERKUa6BSAiIhJBCgBEREQiSAGAiIhIBCkAKFAxdp4UERHJ\nlQKAPGRbHlhL/IqISHujUQA5yrY88Pz5K3jiiQmsWrVQ4/tFRKTdUAtAjrItD5xMjmHt2qla4ldE\nRNoVBQA5yr48MCSTY7TEr4iItCsKAHKQfXnglPolfkVERNoDBQA5aLg8cCZa4ldERNoXBQA5yr48\nMMRij2iJXxERaVcUAOQo2/LAsdhyBg+epyV+RUSkXVEAkKNsywNPnrxaQwBFRKTd0TwAeYjH49x+\n+3Xcfnv25YFFRETaA7UA5CBT7/7wyd851yBPen6NDhARkWKjFoAsEokEM2bMYfHildTWdqasbBvj\nxg1n9uzpxONxEokEl18+mwceWMb27eBcOSUlm+jYsQvx+CGUlSXo3r0jmzfXUlcX32t/ERGRtqQA\nIIOmpv393e/uZeTIC1i3bgdwMzAc+CLJ5Exqa8ewdetWYAJvv/1dYOxe+6vPgIiItDXdAsigqWl/\nzzrrG6xb1w+YiT/B3wpcRv3Jfk7w+qyM+2vaYBERaWsKADJoatrfV15ZD7wLpPKsDD3P9Lrh/po2\nWERE2poCgDRNT/sLyWQ3IJXHhZ6T4XU6TRssIiJtTwFAmqan/YVYbAuQymOh52R4nU7TBouISNtT\nAJDGOdfktL9HHtkHOBRI5Rkeep7pdcP9NW2wiIi0NY0CYO8hfyUlW+je/WE2b06STNb34o/FHmHw\n4HksW5YaBXA9kASmAV8Mno8FpgMTgDrqOwLW7z9r1sI2+ZwiIiIpkQ8Asg35M3uIHj2uIh6/ld27\nu1BWtp3x44cza5YfwrdmzcNcccVN/PKXV4bmAZhKp07X0qVLb8rKaunR4w42b56bcX8REZG2FPkA\noOGQvxTDuS+wefMBXHDBam67beZe9+zj8Th33fUD7rrrB3s69KXypE8TrGmDRUSk2ES+D0BTQ/4W\nLVrZ5MnbzBrkSc+vk7+IiBSbSAcATQ/505A9ERHZP0U6AGh6yJ+G7ImIyP4p0gEA0OSQPw3ZExGR\n/VFBAYCZXWJmb5rZDjN71syObyTv/zOzp8zso+DxaGP5W9vs2dMZPHgusdhy6lsCHLHY8mDI3rQG\ny/1muh0Q3qbbBSIi0h7kPQrAzL6MX/3mImANMBVYYWZHOOc+zLDLKcADwDPAx8CVwO/M7DPOufcK\nLnkzicfjrFq1kGuuuZVFi+ZSW1tOWdl2xow5jp07j6RPn+Fs21ZHMukwgwMO6EXPnsbYsScAxvLl\nq/jww01s374VswOIxbpRXr6Tr3zlTG655WoN+RMRkaJk+V6xmtmzwGrn3HeD1wa8A/zYOfejHPaP\nAZuAS5xz92fJUwFUVVVVUVFRkVf59pVzjq1bt3LCCecEy/1OB+7Gr+43Gt9hsCZ4fhlwF7ADuBZI\nrR7ogGUMHjyP1asfUhAgIiKtprq6msrKSoBK51x1tnx53QIwszKgEng8leZ8BPEYMCzHt+kMlAEf\n5XPs1mJmzJgxJ7Tc70v4E314aeBbg22vAOFlgeuX/oWzWbfue1r6V0REilK+fQB6AiXAxrT0jcDB\nOb7HzcB6fNBQlBYvXkn9cr+ZlvZdGdr2bobtnnNna+lfEREpSq06E6CZXQn8K3CKc25XU/mnTp1K\nt27dGqRNnDiRiRMntlAJ/S2AXbvKqb+aT58nILXcL0AqX9PzCGgooYiINLcFCxawYMGCBmlbtmzJ\nad98A4AP8Svc9EpL7wW839iOZjYduAIY6Zx7NZeDzZs3r9X7AJgZHTpsD6WEl/2F+uV+AVL5wtvD\nNI+AiIi0nEwXxaE+AI3K6xaAc64WqAJGptKCToAj8b38MzKzK4AZwGjn3PP5HLMtjBs3nPrlfjMt\n7Ts8tO3QDNs9s6WaR0BERIpSIfMAzAW+ZWYXmtkg4Kf4tvB7AczsPjO7KZXZzL4P3AB8E3jbzHoF\nj857v3VxmD17OoMGvQ1cDxyF/8jheQKmBduGAKl8ywjPIwBLGDToNmbNmtaaRRcREclJ3n0AnHO/\nMrOe+JN6L+AF/JX9B0GWQ4HdoV0m4Xv9P5j2VtcH71F0Gi73e0MwD8A0zKZTXv5JevaMMXbsqcAL\nLFvm+PDD7ezYMQXoRCzWlfLyXZx//ih+9CMNARQRkeKU9zwAraEt5wHIJLzcb6YOfam09GWBRURE\nWluu8wC06iiA9qqxpX7DaTrxi4hIexH5xYBERESiSAGAiIhIBEU+ACjGPhAiIiItLZIBQCKRYMqU\nmQwYcAZ9+57LgAFnMGXKTBKJxF55FSCIiMj+KHKdABOJBMOGTWDt2stIJq8jtXrf/PkreOKJCaxa\ntRCAGTPmsHjxSmprO1NWto1x44Yze/Z0DesTEZH9QuQCgBkz5gQn/zGhVCOZHMNrryW5/PLZ/PGP\n1Y0GCAoCRESkvYvcLYDFi1eSTIZX70sAVwJH49yV3H33b3n11e8GAUL9/P8+QNDyviIisn+IVAuA\nc47a2vDqfgngHGAHfpXi4cDpwFmhvRLAHGAlznVm/vy/4ZzT7QAREWnXItUCYGaUlaVW9wN/Yu8H\nzARGABOAg2gYIEwAhgGPAg9TV/cy8+efxLBhEzJ2GhQREWkPIhUAgF/pLxZLrd63EngXGI0PBqbh\nVzsOBwiXAem3A8aydu1U3Q4QEZF2K3IBwOzZ0xk8eC5+baLdQOqWwEp8IBBe/jeVtrdkcgyLFq1s\n8fKKiIi0hEj1AQC/0t/vfncvRx11Fh991BnYBiSpDwSm45v9w2mZGLW15RkXBxIRESl2kWsBAPjh\nD+9m8+YfAmfgVy/+HT4QcEAcWAisAf5G/e0A9npeVrZNJ38REWmXItcCAPDww08HY/yH40cBXA8c\nATwCjMUHAdfhT/i/AV7C3w5ItRgMx+xoxo8f0eplFxERaQ6RawGoqalhw4ad+Kb9OHA/vhoeBy4G\nllB/pX8R8H3gRFKjAPzPE+nQ4Sq+//2LWrn0IiIizSNyAcDUqdezezf4k3wC+DdgI/ADYADwXeAo\n4CT80MDb8fMC1I8CgLOorb2Nm2/+z9YtvIiISDOJVACQSCS4555lwEh8T/85QC1+HoAb8R0A/wq8\nAjwDHELDSYHABw6OZHKsRgGIiEi7Fak+ANOnz8K5bsAk/Im9HN8K8AvgNhqe7A3oGfxMALOBZcG2\nOLCTf/yjjpqaGrp27dpKn0BERKR5FHULQGNL8Trn8lqqN5FI8D//8xCwBfg68B/4+Kc7sAHf+S/M\n8B3+avAdBf+Any74RXyHwD+xffuNnHTSFzQjoIiItDtFHQCMHXsZAwacwZQpM0kkEiQSCSZNupKu\nXY+mrGwoZWUj6Nr1OCZNuirjSTgVICQSCU466Qvs3t0B3+T/PWAU8FHw6E7m8f7DgUupny54LA37\nApzNunVaIEhERNofy+cqurWYWQVQBVXAscRiKxgw4Abeeecf7Nr1CeBa/PS8KcsYPHgeq1c/BPgl\nfxcvXkltbWfKyrbRtWsJL730Lfw9/13+rbkK+C3QA3+V/woN1wCYg7/qfx8/V8Cj1N8O8IsDpYYF\nxuP/YP36lVocSERE2lx1dTWVlZUAlc656mz52kEfACOZPJq//W0DcAq+Of5Z4FbC4/LXrr2IK664\niaefrmLt2suCcf4GrMdf7a8CtgMHBulLgKvx4/13A8vxfQBSCwBdhr/qHw+UUH/yT21Lvb8jkVjG\nsGETWLVqoYIAERFpF4r6FkD9ePwLgT7A34E7gRPwV+S/DX4eCczipz/9Fa+++l2SydTiPRuAU/FX\n8CvwJ/gP8NP8JvE9/m/E9/b/DrAUuIX6BYBiwMfUzxKYeXEgOFuLA4mISLtS5AHAZcDJwDv4+/Rb\ngG/jp+k9EzgXf4Kfjb8t0JWGPfm/gR/Hvxvogr/id8BDQd6ngJ/jr+afAe4Afk3DBYCGUx9AaHEg\nERHZPxR5ALAWeAM4CHgLfyX+X8BQoBJ4E9/EPwOYD3Skvql+Jr7FYCzw2SAtjh/adz3+vn8N9Vf0\nffC3AY4ZuzKZAAAYxUlEQVSgYYfA6cDb+CChhFwWBxIRESl2RR4AlAB98T31a/DFnYQfs78YP3tf\nP+Bl4DD8Sb4Gf5/+RHwHP8M372/Hz/V/Gr7vQAmwk4ZX9Kmhf+GTeBw/BfBp+EAg2wleiwOJiEj7\nUeQBwCT8vfqtQCf8CXo1/j7+HOAY/In9GfzVfi1+2N5l+FsBm/En7DvwrQg34Kf5jeFnA+zC3lf0\nw/HN/SmpXv9/AjpQPxlQQ7HYI1ocSERE2o0iHwXwP/hm/QOAbvir9uX4nvwjgGH4E/yB+P4BJfgh\nfvcG6SX4+/2L8Sf1U4LnqbwW5Etv8p+ADzxGAF8EpuJvAWwNtkH9+gCOWOwRBg+ex6xZC5v344uI\niLSQIm8B2Bk8DsLfBkhd0ffAN/874Hh8XwAHfBLojz8xb8XHNzPx9/e7AuPwHfm2A4/hhwc+Ejpe\nasrf9fiWhGPxiwOlJgCKAwvxrQEj6Nz5NPr3H8Xkyas1BFBERNqVIm8B6IC/+t8c/NyJbxH4K36I\nXylwE3A6cAb+Xv1WfD+Az+NP9KXBfufhr+Tn4k/wXwQup/6Kfjh+VMF2/NX+y/iTffpiQPFg+0x6\n9jyTN998tFk/sYiISGso8haAbsHPg/Fj9Q8G/oG/Mi/Dn4x/jG8RuBx/8t8B/Ds+QDgG34mwFD98\n8DHgU/hRAVvxfQAW4ocDVgTvP436kQbZpggGMHbv7qxe/yIi0i4VeQCwFX8SfgA/F8Dp+B78/fD3\n8T/Cd8rbGqQfjm/qXwP8EHgN3zHwQ3xHwIeBXvgTfQLfnyD1sxfwZ/yV/2XAS0EZ1OtfRET2P0Ue\nAOzCN+f3Bs4GhuADgj/gm+qPwZ+gY/jOfu/iRwuU41ft64O/gt+JDwROxnfy24oPFObgRwPMxvcz\n6IQfUTAa31dgJA1HBIQtVa9/ERFpt4o8AKjDn+Afwd93/xmwCd+8fwC+g18X/Il6Cv5q/j18kLAK\nf+JfH7xXInj+p+B1f/xcAQfi7/NvCd67c7C9M/62wlx8C0GqJcAByyktncaNN17WvB9XRESklRR5\nANABPxxvHvBH/DS9O/DF7g5cgu8gCP4+/wh8R7/UiTwJfALfVyCO70C4GN/cvwN/qyC1OFBZ8D4f\nBj+3Ud9HYDV+xMA5wc9nOeSQ3nTt2rUFPrOIiEjLK/JRACcDTwPfwp+E5+Ln9e+GvzXwIXAcvh9A\nOb5lYD7+yn9DkKcMf+JP4Jv+D8XfDhiGb+5PDSHchB8muAh/xZ+aEGgMvvUBUnMGxGLLOfdc3fsX\nEZH2q8hbAC7DL+BzOX7u/9/h5+rvgr890Anfoc+oDwpi+J7/G/BX9wAD8f0JYsF+I/D9B94K3veR\n4L2uwbcYTMf3N0hv/odYbFkw6c+0FvnEIiIiraHIA4CLgWeBfpSXX0v//mfSq1cH/P16h2/WfwJ/\nO+AjfMfAcvzH6oJvxq8D3sdf9Zfjr+yHUr940B3Aj/C3ErrglwQ+B5iF7zMwDb/c8In063cGkyev\n0aQ/IiLS7hV5AAC+s94SevbswZtvPsZ77z3ChAkn40/6m/An+H8CH+NX+tuCDwx64IOEDsBkfK//\nA4CLgO9TfzugK34OgCS+JSCOn2XwJeAV4FXgFrp02c7f//44t99+nU7+IiLS7hV5AHAX/v770/To\n0QEAM+Oee+YxYEAJ/qTv8P0CuuBbAHbhA4LN+N79O/FX+T3wvf4vB04I3jcZ7P8Efm2AG0hv8vdB\nwY1ccMHnW+5jioiItLIiDwD8kDu4nU2bdu5JjcfjvPjiowwc2I/6tQF64QOBEvztgJ34Ez34DoAf\n4Xv+V+NnExyLvx3wSJD3BnwLwf/RsMf//wIbuPnmq1ryg4qIiLSqIg8ALsb3/l9IXV3XBtPuxuNx\n1qxZiu/81ydI7Yvv9V+On9jnbnyT/hH4zn4r8LMIdsZ3HJyOnwzI4dcKeBh/ywHqWwH6A53U7C8i\nIvuVIh8GeBf+/nzmaXe7du3KoYeW8+67O/BDBh/Hn+RPD3I8gV8t8CX8zH6fxTf7b6P+BL8L3w8g\ntdrfdUF6/TLBZtlmAxQREWmfirwFwIvFHsk67e55530O3wJwDL5j3yp87/5jgCfxHzG1HHB3fLP/\nofhAYQ5+caBUX4KwVLDh6Ny5VnP+i4jIfqXIAwBHLLa80XH3s2dPZ9Cgd4Bb8E36V+FvAXyAXyoY\n/EgB8Pf6pwFvA9fjVwd8F98fYFmWMizhggtGN8NnERERKR5FHQD07n0xkyevbnTcve8L8DCTJp1K\nPH4LvhPgLuAK/MRBqav+FcCZ+JkFHwZOwQcGnYEZ+OmGl9Bwzv8llJZO5+abr2ypjygiItImrBjX\nszezCqCqqqqKioqKnPdzztG377msX/83/LK+BpyGP+F/Efg2vl/B9/CrC54Z7Pko/vbBrfi+AuX4\nIYWfpV+/P/D3vz/ZHB9LRESkxVVXV1NZWQlQ6ZyrzpavyDsB5sfMKC3dip/ON3XP/nP4k/pC/Am+\nDrgWf6sgSf3ogL3n/Ddbojn/RURkv1TUtwAK4TsLhjv1Tad+NcGZwO+B54jFfsTAgZ/kiCNS/QGW\n0bAj4BIGDbpNc/6LiMh+ab8LAGbPns6BB27HTyAEfmhfeEnf0cTjJzB58mr+9KdFPPfc4qD/wJWU\nlAylpGQ48fjxTJr0DKtXP9Suxv8vWLCgrYvQLqneCqN6K4zqrTCqt+ZXUABgZpeY2ZtmtsPMnjWz\n45vI/yUzWxvkf9HMxhZW3KbF43Fefnk5Bx54NfWd+uLATGKxqXzmM0nWr//9njn94/E4d955EzU1\nL1Fb+yK1tX9k8+Y1zJ8/iwMOOIDdu3dTV1e352dtbS27d+9m9+7d1NbW7klLf6S2p+dPJpN73iuZ\nTOKco66ubs8D2JMOvl9D+HlqeyYPPPBAgzypfOH3SAm/bmxbuDyN7dOcksnknrpIl0sZ8k1vqX8s\nhdZR+u+7Mdm+C9n2b87fW2v9Qy7Gfkr7Qieywqjeml/efQDM7Mv4m+kXAWuAqcAKMzvCOfdhhvyf\nBR7Ar8CzFDgf+K2ZHeuce21fCp/NIYccwltvPc0119zKokW3U1tbTlnZdsaPH86sWb8hHo+TSCSY\nMWMOixevpLa2M2b/ZNu2f7Jp00f4vgGx0M/d1MdKSXy17c5w5NQ+pOXvgO97YEF6SbBtN/ULFnUM\nXlvwPBm87hDsUxt6rx6UlNQwZEgf7rnnB1x44eW8+upbwEZisX7Be3QKjuFXQYzFulFevoP+/Q+i\npiZJbe0BbN36DtCBLl16U1aWoHv3jmzeXEtdXRyzf7J9+2Y++mhbcPxOQDmlpVvo1ClOPH4IHTvu\nYNy44cyePX2fW0o2bNjAqad+ib/85R18J8wewCZ69Cjjscd+zr33PszixSvZubNjg3KnynDlld/m\nhz+8e8/vs6xsW6PpzVHmTNK/V7keL7Xfww//gX/+s4aPP95Fp06f4KCDjHPOObnB/hs2bGDs2G/w\n6qvrca47ZpsZMqQPy5ffQzwe3+v4o0cfBxgrVvypVeqgORRajyJRlvq7efDB5U1nhvqrw1wf+PV5\nbw+9Nvxg+iuy5P9fYFFa2irgzkaOUQG4qqoq1xySyWSD1zU1NW7IkDNdLLbcQdLBegcDHPRx0MtB\n79DPTwTPewXPU2npj15pz1Ov+wSP3sHPAQ6GBq97OzjMwX0OPuXgJAcPOjgt9PxMB/c7+LSDpUF5\nXfDzPgd9g7y/cFAevM+vQ++xLMhbE7zXUgdbgueZtqXq41MOjg+9R/o+vgyx2HI3ZMiZrqampuDf\nz/r1611p6WFBXaR/xl87OMzFYtnLYLbQdez46SBP0+npZR43btw+fb+yf69yq6PUfmYLg8+Xvv+y\nPfuvX7/edeyY6buw1HXo8Cl3xBGnph1/S/A7XNLsv7fmqrd0hdZje9FS9ba/U701ruHfzXMOf3VZ\n4Ro7nze2ca/MfqL9WmB8Wvq9wENZ9vk7MCUt7Trg+UaO06wBQLpLL702qCQXPEYF/yQHBI9BoZ/9\nQun9QmnpjwFpz1OvTwq910kOLgyepx5Lg+NfGPzjv9bB10LPlwfbl4bKGy73haE8PYJ84fdI5b02\n9PraRraF3/drjexT/4jFlrkpU2YW/Ps4+uhRobpIf/9rQ+nZynBtDvtmL3Nz/WPZ+3uVWx3V79d0\nHfu6yvyZ4KvOn+jT66Blfm8t9Q+50HpsL3QiK4zqrXEN/26qXC4BQL63AHri2683pqVvBAZm2efg\nLPkPbuQ4nQDWrl2bZ/Fy8+CDy0kmx+NXBgT4a/Az1XyfGkXwMb6BI5VuobR0yVB6OP8/gtcfB8/r\ngucpvYLjb8IPX0w13aSejw+29wqVNyW831/xMx32SnuP1D6p96pOe56+Lfy+4fdIzxP65MlP8utf\nL+drXxu/d7Xk4JVX/oqvo0yfsbFyZ8qTS3rDMm/ZsoXq6qxDZXO29/cq8/Gy79d0eTdu/JDM9QSw\nDv9nFd7Wcr+35qq3dIXWY3vRUvW2v1O9Na7h382ec2en7HuQdwtAb/x/6hPT0m8GVmXZZyfw5bS0\n7wDvNXKcr+DPwHrooYceeuihR2GPrzRnC8CH+EvYXmnpvYD3s+zzfp75wc/Mcz7wFg0vl0VERKRx\nnfBr2Te6lG3eUwGb2bPAaufcd4PXhl9d58fOuVsy5P9f4ADn3DmhtJXAi865i/M6uIiIiDSLQqYC\nngvca2ZV1A8DLMd3BMTM7gPedc5dHeS/HXjSzC7DDwOciJ9/91v7VnQREREpVN4BgHPuV2bWE7gB\n35T/AjDaOfdBkOVQQoPknXOrzOwrwOzg8RfgHNdCcwCIiIhI04pyNUARERFpWfvdWgAiIiLSNAUA\nIiIiEVR0AUC+Cw3tz8xsppkl0x6vhbZ3NLP5ZvahmSXM7EEz+2Tae/Q1s6Vmts3M3jezH5lZ0f3e\n94WZnWxmi8xsfVBHe80SY2Y3mNkGM9tuZo+a2eFp23uY2S/NbIuZbTKzn5lZ57Q8R5vZU8F38+9m\ndnlLf7aW1FS9mdk9Gb5/y9LyRLHerjKzNWZWY2YbzewhMzsiLU+z/G2a2almVmVmH5vZ62b2tdb4\njM0txzp7Mu27Vmdmd6bliUydtYaiOhGEFhqaCRwLvIhfaKhnmxasbb2C72x5cPAYEdp2G3A2MAH4\nHHAIfu1jAII/jGX4zp4nAV8Dvo7vwLk/6YzvjHoxfvKLBszs+8Bk/AJWJwDb8N+rDqFsDwCDgZH4\nOv0ccHfoPeL4MbVv4qeqvhy4zsz+Xwt8ntbSaL0FltPw+zcxbXsU6+1k4A7gROAM/BTpvzOzA0J5\n9vlv08z645c0fRwYih9R9TMzO7NFPlXLyqXOHPCf1H/fegNXpDZGsM5aXj4zAbb0gzwXGtrfH/hA\nqDrLtq74WRbPC6UNxM/UeELweix+7YaeoTzfxs/zW9rWn6+F6izJ3mtVbACmptXdDuBfg9eDg/2O\nDeUZjR/NcnDw+jv4ibBKQ3l+ALzW1p+5BevtHuA3jewzKOr1FnyenkE9jAh9v/b5bxM/w+pLacda\nACxr68/c3HUWpP0emNvIPpGus5Z4FE0LgJmV4ecHeDyV5vxv7zFgWFuVqwh8Omii/ZuZ3W9mfYP0\nSnwkHK6vP+MnZUrV10nAy67hMs0rgG7AkJYvetszswH4q4lwPdUAq2lYT5ucc8+Hdn0Mf0VyYijP\nU8658DrQK4CBZtathYpfDE4NmmzXmdmdZnZgaNswVG8A3fGf+aPgdXP9bZ6Er0/S8uwP/w/T6yzl\nfDP7wMxeNrOb0loIol5nza5oAgAaX2iosYWD9mfP4pu4RgOTgAHAU8E91oOBXcHJLCxcX9kWYoLo\n1OnB+H80jX2vDsav1LSHc64O/88pynW5HLgQOB3fFHsKsMzMUqteRb7egrq4Dfijq5/bpLn+NrPl\n6WpmHfe17G0lS50B/BK4ADgVuAn4KvCL0PbI1llLKWQmQGklzrnwPM6vmNka/PLK/4rWSJAW5pz7\nVejlq2b2MvA3/D/o37dJoYrPncBnaNg3RxqXqrPh4UTn3M9CL181s/eBx81sgHPuzdYsYFQUUwtA\nIQsNRYpzbgvwOnA4vk46mFnXtGzh+sq2EBNEp07fx/claex79T6Q3kO7BDgQeC+UJ9J1GfwT/hD/\n/YOI15uZ/QQ4CzjVObchtGlf/zabqrsa59zOfSl7W0mrs/eayL46+Bn+vkWuzlpS0QQAzrlaoArf\nmxjY01Q0EnimrcpVTMysC/ApfKe2Knxnq3B9DQT6UV9fq4Cj0kZRjAK2AJGYijk4ab1Pw3rqir9H\nHa6n7mZ2bGjXkfjAYU0oz+eCE1zKKODPQWC23zOzQ4GDqP9nG9l6C05k5wCnOefeTtu8r3+ba0N5\nRtLQqCC93WmizjI5Fn/7Lvx9i1Sdtbi27oUYfuCbtrfj7zsOwg8n+ifwibYuWxvVxy34IUSHAZ8F\nHsXfzzoo2H4nfnjVqfiORyuBp0P7x/BDKZcDR+P7EmwEbmzrz9bM9dQZP+TnGHzP4u8Fr/sG268I\nvkfjgKOA3+LXpOgQeo9lwHPA8fimyT8Dvwht74oPvH6Ob778MrAV+Pe2/vwtUW/Bth/hA6XD8P9U\nn8P/oy2LeL3die95fjL+6jL16JSWZ5/+NvHLuSbwPdsH4odr7gLOaOs6aO46A/4FuAY/VPQwYDzw\nV+CJqNZZq/xe2roAGb4oFwNv4YdprQKOa+sytWFdLMAPg9yB70H8ADAgtL0jfmzth8GX/tfAJ9Pe\noy9+XOzW4I/lZiDW1p+tmevplOAEVpf2+J9QnuuCE9F2fK/gw9PeoztwP/5qYhPwX0B5Wp4jgT8E\n7/E2ML2tP3tL1Rt+PfFH8K0nHwNvAHeRFoxHtN4y1VkdcGEoT7P8beIvAKqC/wF/Ab7a1p+/JeoM\nv4jck8AHwffkz/jhol2iWmet8dBiQCIiIhFUNH0AREREpPUoABAREYkgBQAiIiIRpABAREQkghQA\niIiIRJACABERkQhSACAiIhJBCgBEREQiSAGAiIhIBCkAECkCZvZ7M5vbwse4x8x+05LHEJH2QwGA\niOx3zOxNM5vS1uUQKWYKAEQkZ2nL+opIO6YAQKTImFkHM5tjZu+a2VYzW2Vmp4S2f83MNpnZKDN7\nzcwSZrbczHqF8sTMbG6Q7wMzuxmwtOOYmV1lZm+Y2XYze97MJoS2n2JmSTMbY2bPmdnH+CV/myr/\nODNbY2Y7gmMvDG3rbmb3mdlHZrbNzJaZ2eGh7TPN7Pm09/uumb0Zen2PmT1kZtPMbIOZfWhmP0kF\nJ2b2e/ySsvOC8tflVvMi0aIAQKT4zAdOBP4VOAq/lOxyM/tUKE85MA04H7/Gej9gTmj7dOBC4OvA\nCOBA4Ly041wNXABcBHwGmAf8wsxOTsv3A+D7wGDgpcYKbmZnA7/BL9l6DHA6sCaU5ef4Nd8/D5yE\nD0qWpbUsZFqiND3tNPwa8qeGPufXg21fwC+j/R/AwUDvxsosElWlbV0AEalnZn3xJ7K+zrn3g+S5\nZjYW+AZwTZBWCnzbOfdWsN9P8Ce8lO8CNznnHg62TwJGh47TAbgKGOmcWx0kvxWc/L8NPB16r/9w\nzj2e40e4GnjAOXdDKO3l4JiHA+OAYaljmtn5wDvAucBCcvcRMNn59cxfN7OlwEjgv51zm4Kr/q3O\nuX/k8Z4ikaIAQKS4HAWU4E9q4Sb7DsCHodfbUyf/wHvAJwHMrCv+qnfPlbdzrs7MngvlPxzfivBo\n2nHKgOrQawdU5VH+Y4D/zLJtMFCbVq6PzOzPwbZ8vBqc/FPeA47M8z1EIk0BgEhx6QLsxjeTJ9O2\nbQ09r03b5ki7x5/DcQDOAjakbduZ9npbHu+7I4+8mSTZ+3OUZciX6fPrlqZIHvQHI1JcnscH5r2c\nc2+kPXJqznbO1eCviE9MpQX32CtD2V7Dn+gPy3Cc9ftQ/pfwTfGZrMV/tnC5DgIGAq8GSR/g79uH\nHVtAOXbhW1JEJAsFACJFxDn3F+CXwH1mdp6Z9TezE8zsyqAfQK5uB640s3PMbCBwJ9A9dJyt+E6D\n88zsQjP7FzM71swmm9lXQ++TT6sCwPXARDO7zswGmdlRZnZFcMy/AouA/zKz4WY2FLgf3wdgUbD/\nk8AnzOyKoEyXAGPyLAPAW8DnzOyQIMgQkTQKAESKQ/h+9teB+/An6HX4XvXHAW/n8X63Ar8A7gWe\nAWqC96k/oHP/AdwIXIlvEViOvyXwZjhbHsfEOfcH4Ev4zn7PA48Bx4eyfB3fp2AxsBLf5H+2c64u\n2H8dcHHweAH/uW/JpwyBa4H+wN8AdQQUycAa9qMRERGRKFALgIiISAQpABCRnJnZK8HMg+mPGjOb\n2NblE5Hc6RaAiOQsmKgo07A8gI3OuXyGDIpIG1IAICIiEkG6BSAiIhJBCgBEREQiSAGAiIhIBCkA\nEBERiSAFACIiIhGkAEBERCSCFACIiIhE0P8H1E6XE3i3H2oAAAAASUVORK5CYII=\n",
  768. "text/plain": [
  769. "<matplotlib.figure.Figure at 0x7f12b780c810>"
  770. ]
  771. },
  772. "metadata": {},
  773. "output_type": "display_data"
  774. }
  775. ],
  776. "source": [
  777. "%matplotlib inline\n",
  778. "train.groupby('lender_count')['bad_loan'].mean().plot(style='o');"
  779. ]
  780. },
  781. {
  782. "cell_type": "code",
  783. "execution_count": 19,
  784. "metadata": {
  785. "collapsed": false
  786. },
  787. "outputs": [
  788. {
  789. "data": {
  790. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAFkCAYAAAAuZk/PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XmYXXWd5/H3JwJB0EBjJMEWRKSNcaOhBEK3RJwIYWvo\nGVEspRu3oVFcJo7KtGM3ND5NCz4sokZBVGSxZhDa1gYkCiqgbJogDRKCo8HIkkgUEwQSlvzmj3Nu\nc3KtVJKbqrq3Uu/X89RTdc/53nO+98et1IffWW5KKUiSJI13E7rdgCRJUi8wFEmSJGEokiRJAgxF\nkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJkgBDkSRJEtADoSjJ8UluT7Ki/roxycGN9ROT\nfC7J8iSPJLksyY5t29g5yZVJHk2yNMnpSSa01RyQZH6SVUnuSXLsIL2ckGRxkseT3Jxk77b16+1F\nkiSNTV0PRcCvgROBvYA+4HvAN5NMr9efDRwGvBGYCbwAuLz15Dr8XAVsAcwAjgXeDpzSqNkVuAK4\nFtgD+DRwfpIDGzVHA2cAJwF7ArcD85JMbvQ6ZC+SJGnsSi9+IGyS3wIfpgocDwFvKaV8o143DVgI\nzCil3JrkEOBbwE6llOV1zd8BnwSeX0p5KslpwCGllFc39jEAbFdKObR+fDNwSynlg/XjUAW2c0op\npyeZtL5eRnhYJEnSCOqFmaL/lGRCkrcA2wA3Uc0cbUE1wwNAKWURsATYr140A7ijFYhq84DtgFc0\naq5p29281jaSbFnvq7mfUj+ntZ/XbEAvkiRpjNqi2w0AJHklVQjaGngE+K+llLuT7Ak8UUpZ2faU\nZcDU+uep9eP29a11tw9RMynJRGAH4FnrqJlW/zxlA3oZ7LU9D5gN3AusWledJEn6I1sDuwLzSim/\nHemd9UQoAu6mOtdnO+Ao4MIkM7vb0rCZDVzS7SYkSRrD3gZ8baR30hOhqJTyFPDL+uFtSfYBPghc\nCmyVZFLbDM0UYGn981JgravE6vUADzZqpgxSs7KUsjrJcuDpddQ097O+XgZzL8DFF1/M9OnThyhT\nuzlz5nDWWWd1u40xxTHrjOO28RyzzjhuG2fhwoUcc8wxUP8tHWk9EYoGMQGYCMwHngJmAc2Tm3cB\nbqxrbwI+lmRy47yig4AVVCdBt2oOadvHQfVySilPJplf7+db9X5SPz6nrh+ql5uGeC2rAKZPn85e\ne+21wQMg2G677RyzjeSYdcZx23iOWWcct46NyuknXQ9FSU4Fvk11wvJzqabIXgccVEpZmeRLwJlJ\nHqY63+gc4EellB/Xm/gOcBdwUZITgZ2ATwCfLaU8Wdd8ATihvgrty1TB5ijg0EYrZwIX1OHoVmAO\n1QnfFwCspxevPJMkaYzreigCdgS+ShVmVgD/QRWIvlevn0N1aOsyqtmjq4ETWk8upaxJcjjwearZ\no0epgsxJjZp7kxwGnAV8ALgPeFcp5ZpGzaX1PYlOoTok9lNgdinloUavQ/YiSZLGrq6HolLKu9ez\nfjXw/vprXTW/Bg5fz3aup7rsfqiaucDcTelFkiSNTT11nyKpqb+/v9stjDmOWWcct43nmHXGcett\nPXlH681Jkr2A+fPnz/fkOkmSNsKCBQvo6+sD6CulLBjp/TlTJEmShKFIkiQJMBRJkiQBhiJJkiTA\nUCRJkgQYiiRJkgBDkSRJEmAokiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJkgBD\nkSRJEmAokiRJAgxFkiRJgKFIkiQJMBSNmiOPPIojjngjq1ev7nYrkiRpEIaiUXLffbvx7//+r9x/\n//3dbkWSJA3CUDRq3tTtBiRJ0hAMRZIkSRiKJEmSAEORJEkSYCiSJEkCDEWSJEmAoUiSJAkwFEmS\nJAGGIkmSJMBQJEmSBBiKJEmSAEORJEkSYCiSJEkCDEWSJEmAoUiSJAkwFEmSJAGGIkmSJMBQJEmS\nBBiKJEmSgB4IRUn+PsmtSVYmWZbkG0le2lbzgyRrGl9PJ5nbVrNzkiuTPJpkaZLTk0xoqzkgyfwk\nq5Lck+TYQfo5IcniJI8nuTnJ3m3rJyb5XJLlSR5JclmSHYdzTCRJ0ujreigC9gc+A+wLvAHYEvhO\nkmc3agpwHjAFmArsBHy0tbIOP1cBWwAzgGOBtwOnNGp2Ba4ArgX2AD4NnJ/kwEbN0cAZwEnAnsDt\nwLwkkxu9nA0cBrwRmAm8ALh8UwZAkiR13xbdbqCUcmjzcZK3A78B+oAfNlY9Vkp5aB2bmQ28DHh9\nKWU5cEeSfwA+meTkUspTwHuAX5ZSWmFqUZLXAnOA79bL5gDnllIurHs5nioAvRM4Pcmk+ue3lFKu\nq2veASxMsk8p5daOB0KSJHVVL8wUtdueambod23L35bkoSR3JDm1bSZpBnBHHYha5gHbAa9o1FzT\nts15wH4ASbakCmLXtlaWUkr9nP3qRa+hCpLNmkXAkkaNJEkag7o+U9SUJFSHp35YSrmrseoS4FfA\nA8CrgdOBlwJH1eunAsvaNresse72IWomJZkI7AA8ax010+qfpwBPlFJWDlIzdQNeoiRJ6lE9FYqA\nucDLgb9sLiylnN94+LMkS4Frk7y4lLJ4NBvsXHVe+HHHHcc222wDQH9/P/39/d1sSpKknjAwMMDA\nwMBay1asWDGqPfRMKEryWeBQYP9SyoPrKb+l/r47sBhYCuzdVjOl/t7a1tLGsmbNylLK6iTLgafX\nUbO0sY2tkkxqmy1q1qzDe4HjOe+889htt92GLpUkaZwZbKJgwYIF9PX1jVoPPXFOUR2IjqQ6UXrJ\nBjxlT6rzjlqB5ybgVW1XiR0ErAAWNmpmtW3noHo5pZQngfnNmvpw3izgxnrRfOCptpppwC6t7UiS\npLGp6zNF9f2G+oEjgEeTtGZqVpRSViXZDXgr1SX3v6W6nP5M4LpSyp117XeAu4CLkpxIdcn+J4DP\n1mEH4AvACUlOA75MFWyOopqdajkTuCDJfOBWqqvRtgEuACilrEzyJeDMJA8DjwDnAD/yyjNJksa2\nroci4HiqWZ8ftC1/B3Ah8ATV/Ys+CGwL/Br4OvDPrcJSypokhwOfp5rVeZQqyJzUqLk3yWHAWcAH\ngPuAd5VSrmnUXFrPNp1CdUjsp8DstlsBzKE6zHYZMBG4GjhhUwZAkiR1X9dDUSllyEN4pZT7gAM2\nYDu/Bg5fT831VJfdD1Uzl9ZZ0YOvXw28v/6SJEmbiZ44p0iSJKnbDEWSJEkYiiRJkgBDkSRJEmAo\nkiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJkgBDkSRJEmAokiRJAgxFkiRJgKFI\nkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJkgBDkSRJEmAokiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJ\nkiTAUCRJkgQYiiRJkgBDkSRJEmAokiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJ\nkgBDkSRJEmAokiRJAgxFkiRJgKFIkiQJMBRJkiQBPRCKkvx9kluTrEyyLMk3kry0rWZiks8lWZ7k\nkSSXJdmxrWbnJFcmeTTJ0iSnJ5nQVnNAkvlJViW5J8mxg/RzQpLFSR5PcnOSvTe2F0mSNPZ0PRQB\n+wOfAfYF3gBsCXwnybMbNWcDhwFvBGYCLwAub62sw89VwBbADOBY4O3AKY2aXYErgGuBPYBPA+cn\nObBRczRwBnASsCdwOzAvyeQN7UWSJI1NW3S7gVLKoc3HSd4O/AboA36YZBLwTuAtpZTr6pp3AAuT\n7FNKuRWYDbwMeH0pZTlwR5J/AD6Z5ORSylPAe4BfllI+Wu9qUZLXAnOA79bL5gDnllIurPdzPFUA\neidw+gb2IkmSxqBemClqtz1QgN/Vj/uowtu1rYJSyiJgCbBfvWgGcEcdiFrmAdsBr2jUXNO2r3mt\nbSTZst5Xcz+lfk5rP6/ZgF4kSdIY1FOhKEmoDk/9sJRyV714KvBEKWVlW/myel2rZtkg69mAmklJ\nJgKTgWeto6a1jSkb0IskSRqDun74rM1c4OXAa7vdiCRJGl96JhQl+SxwKLB/KeWBxqqlwFZJJrXN\n0Eyp17Vq1rpKrF4P8GCjZsogNStLKauTLAeeXkdNcz/r62Ud5gJw3HHHsc022wDQ399Pf3//0E+T\nJGkcGBgYYGBgYK1lK1asGNUeeiIU1YHoSOB1pZQlbavnA08Bs4Bv1PXTgF2AG+uam4CPJZncOK/o\nIGAFsLBRc0jbtg+ql1NKeTLJ/Ho/36r3k/rxORvQy01Dv8r3Asdz3nnnsdtuuw1dKknSODPYRMGC\nBQvo6+sbtR66HoqSzAX6gSOAR5O0ZmpWlFJWlVJWJvkScGaSh4FHqELKj0opP65rvwPcBVyU5ERg\nJ+ATwGdLKU/WNV8ATkhyGvBlqmBzFNXsVMuZwAV1OLqV6mq0bYALANbTi1eeSZI0hnU9FAHHU11t\n9oO25e8ALqx/nkN1aOsyYCJwNXBCq7CUsibJ4cDnqWaPHqUKMic1au5NchhwFvAB4D7gXaWUaxo1\nl9b3JDqF6pDYT4HZpZSHGn0N2YskSRqbuh6KSinrvQKulLIaeH/9ta6aXwOHr2c711Nddj9UzVxa\nJwB12IskSRp7euqSfEmSpG4xFEmSJGEokiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQY\niiRJkoAeuKP1eHPnnXfy+9//nsmTJ7PLLrt0ux1JklQzFI2a3wETOPLIIwHYeuttWLRoocFIkqQe\n4eGzUfMHYA1wMXAxq1Y9xvLly7vckyRJanGmaNRN73YDkiRpEB3NFCX5myRbD3czkiRJ3dLp4bOz\ngKVJzk2yz3A2JEmS1A2dhqIXAP8deCHwoyR3JvmfSZ4/fK1JkiSNno5CUSnliVLK10sphwG7ABcB\n7wLuS/KvSQ5LkuFsVJIkaSRt8tVnpZQHgWuA7wMFeA0wAPw8yf6bun1JkqTR0HEoSjI5yf9Icjvw\nI2BH4K+BFwF/CvwbcOGwdClJkjTCOrokP8k3gEOBxcD5wFdLKQ81Sh5JcjrwoU1vUZIkaeR1ep+i\nlcAbSik3DFHzEPBnHW5fkiRpVHUUikopx25ATQF+0cn2JUmSRlunN288K8n7Bll+QpIzNr0tSZKk\n0dXpidZvAm4eZPnNwNGdtyNJktQdnYaiycDDgyxfUa+TJEkaUzoNRb8AZg+yfDbVFWmSJEljSqdX\nn50NnJ3kecD36mWzgI8CHx6OxiRJkkZTp1effTHJ1sDHgH+qF98HfKCU8uXhak6SJGm0dDpTRCnl\nM8BnkuwEPF5K+f3wtSVJkjS6Og5FLfVnn0mSJI1pnd6n6PlJvpJkSZJVSZ5ofg13k5IkSSOt05mi\nC4CXAJ8CHgTKcDUkSZLUDZ2GopnAzFLKbcPZjCRJUrd0ep+i+3B2SJIkbUY6DUVzgH9J8sLhbEaS\nJKlbOj18dhHwXOBXSVYCTzZXllJ23NTGJEmSRlOnoeh/DWsXkiRJXdbpHa2/NNyNSJIkdVOn5xSR\nZNckJye5KMmO9bKDkkwfvvYkSZJGR6c3b9wf+BnwOuDNwHPqVX3AKcPTmiRJ0ujpdKboNODkUsrr\ngeYdrK8FZmxyV5IkSaOs01D0auCyQZb/Bnj+xm4syf5JvpXk/iRrkhzRtv4r9fLm11VtNX+S5JIk\nK5I8nOT8JNu21bw6yfVJHk/yqyQfGaSXNyVZWNfcnuSQQWpOSfJAkseSfDfJ7hv7miVJUm/pNBSt\nAKYOsnwP4P4Otrct8FPgvaz7ppDfBqbU+50K9Let/xowHZgFHEZ11+1zWyuTPBeYBywG9gI+Apyc\n5N2Nmr+ot/NF4M+BbwL/luTljZoTgfcBxwH7AI8C85Js1cHrliRJPaLTS/L/L/DJJEdRh5gk+wJn\nABdv7MZKKVcDV9fbyTrKVpdSHhpsRZKXAbOBvtZHjyR5P3Blkg+XUpYCxwBbAu8qpTwFLEyyJ/Ah\n4Px6Ux8Avl1KObN+/I9JDqQKQe+tl30Q+EQp5Yp6P38LLAP+Grh0Y1+7JEnqDZ3OFP098EvgAaqT\nrO8CbgR+DHxieFr7IwckWZbk7iRzk+zQWLcf8HDbZ7FdQxXY9q0fzwCurwNRyzxgWpLtGtu5pm2/\n8+rlJNmNapbq2tbKUspK4JZWjSRJGps6vU/RauAdSU4BXkUVjBaUUu4ezuYavg1cTnXo6yXAvwBX\nJdmvlFKogspv2np8OsnveOYw31SqINe0rLGudUhw2SA1rW1MoQpaQ9VIkqQxqNPDZwCUUhZTBZUR\nVUppHpb6WZI7gF8ABwDf38TNr+tw3TD7ev19zn8uufrqq9lrr71GZ/eSJPWwgYEBBgYG1lq2YsWK\nUe2ho1CU5Lyh1pdSjuusnQ1TSlmcZDmwO1UoWgqs9XlrSZ4F7AA8WC9aSjXT09Sa+Vm6nprm+tTL\nlrXV3MaQ3gScCpxVP+7j4IMPHvopkiSNE/39/fT3r30N1YIFC+jr6xu1Hjo9p2intq9dgEOobuQ4\n4oeRkrwQeB7PBJ6bgO3rE6dbZlEFmFsbNTPrsNRyELColLKiUTOrbXcH1stbM2NLmzVJJlGdt3Tj\nJr4sSZLURZ2eU/RX7cuSbAF8geqk641S309od545lLVbkj2A39VfJ1GdU7S0rjsNuIfqJGhKKXcn\nmQd8Mcl7gK2AzwAD9ZVnUF1q/4/Al5OcRnUu1AeoriZr+TTwgyQfAq6kuuy/D/jvjZqzgY8n+X/A\nvVQnlt9Hdfm+JEkaozr+7LN29VVdn6K6/8/Geg3V4af5VIezzgAWAP8EPE11s8hvAouo7iH0Y2Bm\nKeXJxjbeCtxNdfXYFcD1wN81+ltJNTO0K/CTuteTmx9uW0q5qd7OcVT3TfpvwJGllLsaNadTBa5z\nqa46ezZwSCmleWdvSZI0xmzSidaDeDHVvYA2SinlOoYOaOs9+aaU8nuqexENVXMn1ee1DVVzOdWs\n1FA1JwMnr68nSZI0dnR6ovXp7Yuozi06gg5u3ihJktRtnc4Utd+ocA3wEPC/qA5vSZIkjSmdnmi9\n/3A3IkmS1E3DdqK1JEnSWNbpOUU/Zt2fZr+WUso+nexDkiRpNHV6TtH3qS53v4f6xoZUH7g6jepS\n9dWb3pokSdLo6TQUbQ98rpTysebCJP8MTCmlvHuTO5MkSRpFnZ5T9GbgK4Msv4DqQ74kSZLGlE5D\n0Wqqw2XtZuChM0mSNAZ1evjsHODc+gNYWx+4ui/VZ4T9y3A0JkmSNJo6vU/RPydZTPVhqq3zhxYC\nx5VSvjZczUmSJI2Wjj/7rA4/BiBJkrRZ6PjmjUkmJXl7klOS/Em9bI8kOw1fe5IkSaOj05s3vhK4\nBngM2JnqqrOHgaOBPwWOHab+JEmSRkWnM0VnUR06ewmwqrH8SmDmpjYlSZI02joNRXsDc0sp7R/1\ncT/g4TNJkjTmdBqKngSeM8jy3YHlnbcjSZLUHZ2Gon8H/iFJ65ykkuRPgU8C/zosnUmSJI2iTkPR\n/wR2AJYCzwa+B/yS6vyijw3xPEmSpJ7U6c0bHwZen+R1wB5Uh9IWAPMGOc9IkiSp5210KEqyJXAF\n8L5SynXAdcPelSRJ0ijb6MNnpZQngT7AGSFJkrTZ6PScokuAdwxnI5IkSd3U6WefFeB9Sd4A/AR4\ndK2VpXx0UxuTJEkaTZ2Goj7gP+qfX922zsNqkiRpzNmoUJRkN2BxKWX/EepHkiSpKzb2nKKfA89v\nPUjyf5NMGd6WJEmSRt/GhqK0PT4U2HaYepEkSeqaTq8+kyRJ2qxsbCgq/PGJ1J5YLUmSxryNvfos\nwAVJVtePtwa+kKT9kvz/NhzNSZIkjZaNDUVfbXt88XA1IkmS1E0bFYpKKd7FWpIkbZY80VqSJAlD\nkSRJEmAokiRJAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgT0SChKsn+SbyW5P8maJEcMUnNK\nkgeSPJbku0l2b1v/J0kuSbIiycNJzk+ybVvNq5Ncn+TxJL9K8pFB9vOmJAvrmtuTHLKxvUiSpLGn\nJ0IRsC3wU+C9DPIBs0lOBN4HHAfsAzwKzEuyVaPsa8B0YBZwGDATOLexjecC84DFwF7AR4CTk7y7\nUfMX9Xa+CPw58E3g35K8fCN7kSRJY8zGfvbZiCilXA1cDZAkg5R8EPhEKeWKuuZvgWXAXwOXJpkO\nzAb6Sim31TXvB65M8uFSylLgGGBL4F2llKeAhUn2BD4EnF/v5wPAt0spZ9aP/zHJgVQh6L0b0suw\nDIgkSRp1vTJTtE5JXgxMBa5tLSulrARuAfarF80AHm4Foto1VLNO+zZqrq8DUcs8YFqS7erH+9XP\no61mv7qX3TagF0mSNAb1fCiiCiGFajamaVm9rlXzm+bKUsrTwO/aagbbBhtQ01o/ZQN6kSRJY1BP\nHD7rssEO142Ar9ff5/znkquvvpq99tprdHYvSVIPGxgYYGBgYK1lK1asGNUexkIoWkoVXKaw9gzN\nFOC2Rs2OzScleRawA/Bgo2ZK27ZbMz9L11PTXL++XtbhTcCpwFn14z4OPvjgoZ8iSdI40d/fT39/\n/1rLFixYQF9f36j10POHz0opi6nCyKzWsiSTqM4VurFedBOwfX3idMssqgBza6NmZh2WWg4CFpVS\nVjRqZrG2A+vlG9qLJEkag3oiFCXZNskeSf68XrRb/Xjn+vHZwMeT/FWSVwEXAvdRXTJPKeVuqhOi\nv5hk7yR/CXwGGKivPIPqUvsngC8neXmSo6muNjuj0cqngYOTfCjJtCQnA33AZxs1Q/YiSZLGpl45\nfPYa4PtUh7IKzwSVrwLvLKWcnmQbqvsObQ/cABxSSnmisY23UoWXa4A1wGVUl88D1VViSQ4CPgf8\nBFgOnFxK+VKj5qYkbwX+uf76OXBkKeWuRs2G9CJJksaYnghFpZTrWM+sVSnlZODkIdb/nupeRENt\n407gdeupuRy4fFN6kSRJY09PHD6TJEnqNkORJEkShiJJkiTAUCRJkgQYiiRJkgBDkSRJEmAokiRJ\nAgxFkiRJgKFIkiQJMBRJkiQBhiJJkiTAUCRJkgQYiiRJkgBDkSRJEmAo6qqFCxeyZMmSbrchSZIw\nFHXJg8AEjjnmGKZNm24wkiSpBxiKuuL3wBrg46xa9RjLly/vdkOSJI17hqKuelG3G5AkSTVDkSRJ\nEoYiSZIkwFAkSZIEGIokSZIAQ5EkSRJgKJIkSQIMRZIkSYChSJIkCTAUSZIkAYYiSZIkwFAkSZIE\nGIokSZIAQ5EkSRJgKJIkSQIMRZIkSYChSJIkCTAUSZIkAYYiSZIkwFAkSZIEGIokSZIAQ5EkSRJg\nKJIkSQLGSChKclKSNW1fdzXWT0zyuSTLkzyS5LIkO7ZtY+ckVyZ5NMnSJKcnmdBWc0CS+UlWJbkn\nybGD9HJCksVJHk9yc5K9R+6VS5Kk0TImQlHtTmAKMLX+em1j3dnAYcAbgZnAC4DLWyvr8HMVsAUw\nAzgWeDtwSqNmV+AK4FpgD+DTwPlJDmzUHA2cAZwE7AncDsxLMnkYX6ckSeqCsRSKniqlPFRK+U39\n9TuAJJOAdwJzSinXlVJuA94B/GWSfernzgZeBrytlHJHKWUe8A/ACUm2qGveA/yylPLRUsqiUsrn\ngMuAOY0e5gDnllIuLKXcDRwPPFbvX5IkjWFjKRT9WZL7k/wiycVJdq6X91HNAF3bKiylLAKWAPvV\ni2YAd5RSlje2Nw/YDnhFo+aatn3Oa20jyZb1vpr7KfVz9kOSJI1pYyUU3Ux1uGs21ezMi4Hrk2xL\ndSjtiVLKyrbnLKvXUX9fNsh6NqBmUpKJwGTgWeuomYokSRrTtlh/SffVh7ta7kxyK/Ar4M3Aqu50\nJUmSNidjIhS1K6WsSHIPsDvV4autkkxqmy2aAiytf14KtF8lNqX+/mCjZsogNStLKauTLAeeXkfN\nUtbr6/X3OcAj9c+3rP9pkiSNAwMDAwwMDKy1bMWKFaPaw5gMRUmeA7wE+CowH3gKmAV8o14/DdgF\nuLF+yk3Ax5JMbpxXdBCwAljYqDmkbVcH1csppTyZZH69n2/V+0n9+Jz1d/0m4FTgrHqXxwD7Audv\n8OuWJGlz1d/fT39//1rLFixYQF9f36j1MCZCUZJPAf9OdcjsT4F/ogpC/6eUsjLJl4AzkzxMNQ1z\nDvCjUsqP6018B7gLuCjJicBOwCeAz5ZSnqxrvkB1NdppwJepws5RwKGNVs4ELqjD0a1U0z7bABeM\nyAuXJEmjZkyEIuCFwNeA5wEPAT8EZpRSfluvn0N1aOsyYCJwNXBC68mllDVJDgc+TzV79ChVkDmp\nUXNvksOopnI+ANwHvKuUck2j5tL6nkSnUB02+ykwu5Ty0Ai8ZkmSNIrGRCgqpfSvZ/1q4P3117pq\nfg0cvp7tXE912f1QNXOBuUPVSJKksWesXJIvSZI0ogxFkiRJGIokSZIAQ5EkSRJgKJIkSQIMRZIk\nSYChSJIkCTAUSZIkAYYiSZIkwFAkSZIEGIokSZIAQ5EkSRJgKJIkSQIMRZIkSYChSJIkCTAUSZIk\nAYYiSZIkwFAkSZIEGIp6wsKFC1myZEm325AkaVwzFHXVb4EJHHPMMUybNt1gJElSFxmKuuoPwBrg\n46xa9RjLly/vdkOSJI1bhqKe8KJuNyBJ0rhnKJIkScJQJEmSBBiKJEmSAEORJEkSYCiSJEkCDEWS\nJEmAoUiSJAkwFEmSJAGGIkmSJMBQJEmSBMAW3W5Az1i4cCEAkydPZpdddulyN5IkjS+Gop7wW2AC\nxxxzDABbb70NixYtNBhJkjSKPHzWE/4ArAEuBi5m1arHWL58eZd7kiRpfHGmqKdM73YDkiSNW84U\n9aiFCxeyZMmSbrchSdK4YSjqOQ/SOr9o2rTpBiNJkkaJoajn/J7q/KKPe26RJEmjyFDUs14EeBhN\nkqTRYijqWc9cpj9eD6MNDAx0u4UxxzHrjOO28Ryzzjhuvc1Q1KEkJyRZnOTxJDcn2Xt499C6TL86\njHbDDTerBcB0AAAI3UlEQVSwYMGCcRWO/Mdj4zlmnXHcNp5j1hnHrbd5SX4HkhwNnAEcB9wKzAHm\nJXlpKWWYTwJ6Dt7YUZKkkedMUWfmAOeWUi4spdwNHA88Brxz+Hf1xzd2vOGGG8bVjJEkSaPBmaKN\nlGRLoA84tbWslFKSXAPsN3J7nk7zcv2JE7fm8ssvY4cddmDixImAn5kmSdKmMBRtvMnAs4BlbcuX\nAdMGqd+6+nZb/fAq4IH65zvavm/IujXA61m9+gccfvjhVJN9awDYcsuJfOpTp7HjjjuyZs0aJkyY\nwJo11brWz+v6PlTNaK9rfb/vvvsYGBjoiZ7Gyhjef//9XHLJJT3V01gYw7HyXuulMWy913qpp7Ew\nhoO917rdUy+O05o1a5g8eXLztjRbMwpSShmN/Ww2kuwE3A/sV0q5pbH8NGBmKWW/tvq3ApeMbpeS\nJG1W3lZK+dpI78SZoo23HHgamNK2fAqwdJD6ecDbgHuBVSPamSRJm5etgV2p/paOOGeKOpDkZuCW\nUsoH68cBlgDnlFI+1dXmJElSR5wp6syZwAVJ5vPMJfnbABd0sylJktQ5Q1EHSimXJpkMnEJ12Oyn\nwOxSykPd7UySJHXKw2eSJEl480ZJkiTAUCRJkgQYikbcyH9w7NiR5KQka9q+7mqsn5jkc0mWJ3kk\nyWVJdmzbxs5JrkzyaJKlSU5Pstm8j5Psn+RbSe6vx+eIQWpOSfJAkseSfDfJ7m3r/yTJJUlWJHk4\nyflJtm2reXWS6+v35a+SfGSkX9tIWt+4JfnKIO+9q9pqxtW4Jfn7JLcmWZlkWZJvJHlpW82w/E4m\nOSDJ/CSrktyT5NjReI3DbQPH7Adt77Onk8xtqxk3YwaQ5Pgkt9e/WyuS3Jjk4Mb6nnmfbTZ/THpR\nnvng2JOAPYHbqT44dnJXG+uuO6lOTp9af722se5s4DDgjcBM4AXA5a2V9S/AVVQXCMwAjgXeTnXC\n++ZiW6oT998L/NEJf0lOBN5H9WHE+wCPUr2ntmqUfY3qc2FmUY3nTODcxjaeS3XPj8XAXsBHgJOT\nvHsEXs9oGXLcat9m7fdef9v68TZu+wOfAfYF3gBsCXwnybMbNZv8O5lkV+AK4FpgD+DTwPlJDhyR\nVzWyNmTMCnAez7zXdgI+2lo5DscM4NfAiVS/N33A94BvJpler++d91kpxa8R+gJuBj7deBzgPuCj\n3e6tS+NxErBgHesmAauB/9pYNo3qM0z2qR8fAjwJTG7U/B3wMLBFt1/fCIzXGuCItmUPAHPaxu1x\n4M314+n18/Zs1MwGngKm1o/fQ3UT0i0aNf8C3NXt1zyC4/YV4F+HeM7LHDcm12Pw2sZ7a5N/J4HT\ngP9o29cAcFW3X/Nwj1m97PvAmUM8Z1yPWeP1/BZ4R6+9z5wpGiF55oNjr20tK9V/pRH+4Nie92f1\nIY5fJLk4yc718j6q/wtojtciqptitsZrBnBHKWV5Y3vzgO2AV4x8692V5MVU/+fZHKOVwC2sPUYP\nl1Juazz1Gqr/e923UXN9KeWpRs08YFqS7Uao/V5wQH3I4+4kc5Ps0Fi3H47b9lSv93f14+H6nZxB\nNZa01WwO/w62j1nL25I8lOSOJKe2zSSN6zFLMiHJW6ju7XcTPfY+MxSNnKE+OHbq6LfTE26mmvKc\nDRwPvBi4vj5vYyrwRP1Hvqk5XlMZfDxhfIzpVKp/gId6T00FftNcWUp5muof7fE8jt8G/hb4L1SH\nMl4HXJUk9fpxPW71OJwN/LCU0jrPb7h+J9dVMynJxE3tvVvWMWZQfdblMcABwKnA3wAXNdaPyzFL\n8sokj1DNCs2lmhm6mx57n3nzRo2aUkrzs2vuTHIr8Cvgzfi5cBpBpZRLGw9/luQO4BdUf7i+35Wm\nestc4OWsfY6fhtYas79sLiylnN94+LMkS4Frk7y4lLJ4NBvsMXdTneuzHXAUcGGSmd1t6Y85UzRy\nNvaDY8edUsoK4B5gd6ox2SrJpLay5ngtZfDxhPExpkupzksb6j21FGi/auNZwA7Ag42a8TyO1H+c\nllO992Acj1uSzwKHAgeUUh5orNrU38n1jdvKUsrqTem9W9rG7MH1lN9Sf2++18bdmJVSniql/LKU\nclsp5X9TXXj0QXrsfWYoGiGllCeB+VRXsgD/Od06C7ixW331kiTPAV5CdfLwfKqTWpvjNQ3YhWfG\n6ybgVW1X7x0ErACa09ebpfoP+VLWHqNJVOe8NMdo+yR7Np46iypM3dqomVn/0W85CFhUB9XNXpIX\nAs/jmX9Qx+W41X/cjwReX0pZ0rZ6U38nFzZqZrG2g+rlY856xmwwe1Id9m6+18bVmK3DBGAivfY+\n6/YZ6JvzF9VhoceozmV4GdXlvb8Fnt/t3ro0Hp+iutzyRcBfAN+lOub7vHr9XKrLnQ+gOvnuR8AN\njedPoPq/i28Dr6Y6N2kZ8Iluv7ZhHKNtqaaY/5zq6ov/UT/euV7/0fo99FfAq4B/A34ObNXYxlXA\nT4C9qab2FwEXNdZPogqiX6Wa/j8a+APwrm6//pEYt3rd6VTh8UVU/3D+hOof0y3H67jVv28PU11m\nPqXxtXVbzSb9TgK7Ao9QXR00jeq2CU8Ab+j2GAz3mAG7AR+nuvT8RcARwP8Dvjdex6x+PafWY/Yi\n4JVUV20+BfyXXnufdX2wNvev+j/MvVSXTd8EvKbbPXVxLAaobknwONWVBV8DXtxYP5HqHiDL6zf3\n14Ed27axM9W9KP5Q/1KcBkzo9msbxjF6HdUf9afbvr7cqDmZ6o/zY1RXV+zeto3tgYup/i/qYeCL\nwDZtNa8Erqu3sQT4cLdf+0iNG7A1cDXVLNsq4JfA52n7n5PxNm7rGK+ngb9t1AzL7yTV/wzNr3/3\nfw78Tbdf/0iMGfBC4AfAQ/V7ZBFVAHjOeB2z+rWcX//ePV7/Hn6HOhD12vvMD4SVJEnCc4okSZIA\nQ5EkSRJgKJIkSQIMRZIkSYChSJIkCTAUSZIkAYYiSZIkwFAkSZIEGIokSZIAQ5EkSRJgKJIkSQLg\n/wP64vtuq3uIGQAAAABJRU5ErkJggg==\n",
  791. "text/plain": [
  792. "<matplotlib.figure.Figure at 0x7f129146ea90>"
  793. ]
  794. },
  795. "metadata": {},
  796. "output_type": "display_data"
  797. }
  798. ],
  799. "source": [
  800. "train['lender_count'].plot(kind='hist', bins=200);"
  801. ]
  802. },
  803. {
  804. "cell_type": "markdown",
  805. "metadata": {},
  806. "source": [
  807. "Finally, want to get an idea of how well this model will perform in production:"
  808. ]
  809. },
  810. {
  811. "cell_type": "code",
  812. "execution_count": 1,
  813. "metadata": {
  814. "collapsed": false
  815. },
  816. "outputs": [],
  817. "source": [
  818. "import pandas as pd\n",
  819. "from sklearn.ensemble import RandomForestClassifier\n",
  820. "import pickle\n",
  821. "\n",
  822. "train = pd.read_csv('processed_train.csv', index_col=0).dropna(axis=1)\n",
  823. "valid = pd.read_csv('processed_validate.csv', index_col=0).dropna(axis=1)\n",
  824. "\n",
  825. "full = pd.concat((train, valid))\n",
  826. "full_x = full.drop('bad_loan', axis=1)\n",
  827. "full_y = full['bad_loan']\n",
  828. "rf = RandomForestClassifier(n_estimators=10)\n",
  829. "\n",
  830. "rf.fit(full_x, full_y)\n",
  831. "\n",
  832. "with open('rf_validated.pickle', 'w') as handle:\n",
  833. " pickle.dump(rf, handle)"
  834. ]
  835. },
  836. {
  837. "cell_type": "code",
  838. "execution_count": 1,
  839. "metadata": {
  840. "collapsed": false
  841. },
  842. "outputs": [
  843. {
  844. "name": "stdout",
  845. "output_type": "stream",
  846. "text": [
  847. "Model test accuracy: 93.651%\n",
  848. "Naive test accuracy: 89.952%\n"
  849. ]
  850. }
  851. ],
  852. "source": [
  853. "import pandas as pd\n",
  854. "import pickle\n",
  855. "\n",
  856. "test = pd.read_csv('processed_test.csv', index_col=0).dropna(axis=1)\n",
  857. "test_x = test.drop('bad_loan', axis=1)\n",
  858. "test_y = test['bad_loan']\n",
  859. "with open('rf_validated.pickle') as handle:\n",
  860. " rf = pickle.load(handle)\n",
  861. " score = rf.score(test_x, test_y)\n",
  862. " \n",
  863. " print 'Model test accuracy: {:.3f}%'.format(score*100)\n",
  864. " \n",
  865. "print 'Naive test accuracy: {:.3f}%'.format(\n",
  866. " (1 - test_y.mean())*100\n",
  867. ")"
  868. ]
  869. }
  870. ],
  871. "metadata": {
  872. "kernelspec": {
  873. "display_name": "Python 2",
  874. "language": "python",
  875. "name": "python2"
  876. },
  877. "language_info": {
  878. "codemirror_mode": {
  879. "name": "ipython",
  880. "version": 2
  881. },
  882. "file_extension": ".py",
  883. "mimetype": "text/x-python",
  884. "name": "python",
  885. "nbconvert_exporter": "python",
  886. "pygments_lexer": "ipython2",
  887. "version": "2.7.12"
  888. }
  889. },
  890. "nbformat": 4,
  891. "nbformat_minor": 0
  892. }