mirror of
https://github.com/bspeice/betterwithdata_cleaning_4
synced 2025-05-16 02:51:29 -04:00
Add emergency and outpatient data
Using the new scraper!
This commit is contained in:
parent
ff88efe156
commit
42873d2bf3
28548
data/dental2013.csv
28548
data/dental2013.csv
File diff suppressed because it is too large
Load Diff
6443
data/emergency visits/h102e.csv
Normal file
6443
data/emergency visits/h102e.csv
Normal file
File diff suppressed because it is too large
Load Diff
5759
data/emergency visits/h110e.csv
Normal file
5759
data/emergency visits/h110e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6116
data/emergency visits/h118e.csv
Normal file
6116
data/emergency visits/h118e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6956
data/emergency visits/h126e.csv
Normal file
6956
data/emergency visits/h126e.csv
Normal file
File diff suppressed because it is too large
Load Diff
5517
data/emergency visits/h135e.csv
Normal file
5517
data/emergency visits/h135e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6196
data/emergency visits/h144e.csv
Normal file
6196
data/emergency visits/h144e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6863
data/emergency visits/h152e.csv
Normal file
6863
data/emergency visits/h152e.csv
Normal file
File diff suppressed because it is too large
Load Diff
37337
data/emergency visits/h159.csv
Normal file
37337
data/emergency visits/h159.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
"","duid","pid","dupersid","evntidx","eventrn","erhevidx","ffeeidx","panel","mpcdata","erdateyr","erdatemm","vstctgry","vstrelcn","labtest","sonogram","xrays","mammog","mri","ekg","eeg","rcvvac","anesth","thrtswab","othsvce","surgproc","medpresc","erccc1x","erccc2x","erccc3x","ffertype","erxp13x","ertc13x","erfsf13x","erfmr13x","erfmd13x","erfpv13x","erfva13x","erftr13x","erfof13x","erfsl13x","erfwc13x","erfor13x","erfou13x","erfot13x","erfxp13x","erftc13x","erdsf13x","erdmr13x","erdmd13x","erdpv13x","erdva13x","erdtr13x","erdof13x","erdsl13x","erdwc13x","erdor13x","erdou13x","erdot13x","erdxp13x","erdtc13x","impflag","perwt13f","varstr","varpsu"
|
||||
"DUID","PID","DUPERSID","EVNTIDX","EVENTRN","ERHEVIDX","FFEEIDX","PANEL","MPCDATA","ERDATEYR","ERDATEMM","VSTCTGRY","VSTRELCN","LABTEST","SONOGRAM","XRAYS","MAMMOG","MRI","EKG","EEG","RCVVAC","ANESTH","THRTSWAB","OTHSVCE","SURGPROC","MEDPRESC","ERCCC1X","ERCCC2X","ERCCC3X","FFERTYPE","ERXP13X","ERTC13X","ERFSF13X","ERFMR13X","ERFMD13X","ERFPV13X","ERFVA13X","ERFTR13X","ERFOF13X","ERFSL13X","ERFWC13X","ERFOR13X","ERFOU13X","ERFOT13X","ERFXP13X","ERFTC13X","ERDSF13X","ERDMR13X","ERDMD13X","ERDPV13X","ERDVA13X","ERDTR13X","ERDOF13X","ERDSL13X","ERDWC13X","ERDOR13X","ERDOU13X","ERDOT13X","ERDXP13X","ERDTC13X","IMPFLAG","PERWT13F","VARSTR","VARPSU"
|
||||
"1",20012,101,"20012101","200121010011",4,"-1","-1",17,1,2013,4,1,2,95,95,95,95,95,95,95,95,95,95,95,2,1,"-1","-1","-1",-1,270.82,1053,0,0,94.46,0,0,0,0,0,0,0,0,0,94.46,353,0,176.36,0,0,0,0,0,0,0,0,0,0,176.36,700,2,3151.347991,1057,1
|
||||
"2",20016,102,"20016102","200161020101",4,"-1","-1",17,2,2013,6,2,1,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,"230","-1","-1",-1,847.41,1852,208.54,0,0,334.16,0,0,0,0,0,0,0,0,542.7,1464,0,0,0,304.71,0,0,0,0,0,0,0,0,304.71,388,3,7921.950896,1162,2
|
||||
"3",20023,101,"20023101","200231010011",4,"-1","-1",17,1,2013,5,1,1,95,95,95,95,95,95,95,95,95,95,95,2,1,"253","-1","-1",-1,454.45,563.75,0,0,0,0,0,0,0,0,0,0,0,159.7,159.7,269,294.75,0,0,0,0,0,0,0,0,0,0,0,294.75,294.75,3,7674.126079,1138,2
|
Can't render this file because it is too large.
|
6846
data/emergency visits/h77e.csv
Normal file
6846
data/emergency visits/h77e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6828
data/emergency visits/h85e.csv
Normal file
6828
data/emergency visits/h85e.csv
Normal file
File diff suppressed because it is too large
Load Diff
6447
data/emergency visits/h94e.csv
Normal file
6447
data/emergency visits/h94e.csv
Normal file
File diff suppressed because it is too large
Load Diff
13325
data/outpatient/h102f.csv
Normal file
13325
data/outpatient/h102f.csv
Normal file
File diff suppressed because it is too large
Load Diff
11862
data/outpatient/h110f.csv
Normal file
11862
data/outpatient/h110f.csv
Normal file
File diff suppressed because it is too large
Load Diff
11174
data/outpatient/h118f.csv
Normal file
11174
data/outpatient/h118f.csv
Normal file
File diff suppressed because it is too large
Load Diff
13134
data/outpatient/h126f.csv
Normal file
13134
data/outpatient/h126f.csv
Normal file
File diff suppressed because it is too large
Load Diff
10963
data/outpatient/h135f.csv
Normal file
10963
data/outpatient/h135f.csv
Normal file
File diff suppressed because it is too large
Load Diff
11544
data/outpatient/h144f.csv
Normal file
11544
data/outpatient/h144f.csv
Normal file
File diff suppressed because it is too large
Load Diff
11484
data/outpatient/h152f.csv
Normal file
11484
data/outpatient/h152f.csv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
16333
data/outpatient/h77f.csv
Normal file
16333
data/outpatient/h77f.csv
Normal file
File diff suppressed because it is too large
Load Diff
15780
data/outpatient/h85f.csv
Normal file
15780
data/outpatient/h85f.csv
Normal file
File diff suppressed because it is too large
Load Diff
14347
data/outpatient/h94f.csv
Normal file
14347
data/outpatient/h94f.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
211
exploration/MEPS Scraping - Python.ipynb
Normal file
211
exploration/MEPS Scraping - Python.ipynb
Normal file
@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from html.parser import HTMLParser\n",
|
||||
"from IPython.display import display\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"emergency_data_url = \"http://meps.ahrq.gov/mepsweb/data_stats/download_data_files_results.jsp?cboDataYear=All&cboDataTypeY=2%2CHousehold+Event+File&buttonYearandDataType=Search&cboPufNumber=All&SearchTitle=Emergency+Room+Visits\"\n",
|
||||
"\n",
|
||||
"emergency_page = requests.get(emergency_data_url).text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['HC-160E',\n",
|
||||
" 'HC-152E',\n",
|
||||
" 'HC-144E',\n",
|
||||
" 'HC-135E',\n",
|
||||
" 'HC-126E',\n",
|
||||
" 'HC-118E',\n",
|
||||
" 'HC-110E',\n",
|
||||
" 'HC-102E',\n",
|
||||
" 'HC-094E',\n",
|
||||
" 'HC-085E',\n",
|
||||
" 'HC-077E',\n",
|
||||
" 'HC-067E',\n",
|
||||
" 'HC-059E',\n",
|
||||
" 'HC-051E',\n",
|
||||
" 'HC-033E',\n",
|
||||
" 'HC-026E',\n",
|
||||
" 'HC-016E',\n",
|
||||
" 'HC-010E']"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class MepsScraper(HTMLParser):\n",
|
||||
" p = re.compile('HC-[0-9]*[A-Z]?')\n",
|
||||
" pufs = []\n",
|
||||
" \n",
|
||||
" def handle_data(self, data):\n",
|
||||
" \n",
|
||||
" if self.p.match(data):\n",
|
||||
" self.pufs.append(data)\n",
|
||||
" \n",
|
||||
"meps = MepsScraper()\n",
|
||||
"meps.feed(emergency_page)\n",
|
||||
"display(meps.pufs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['h160e',\n",
|
||||
" 'h152e',\n",
|
||||
" 'h144e',\n",
|
||||
" 'h135e',\n",
|
||||
" 'h126e',\n",
|
||||
" 'h118e',\n",
|
||||
" 'h110e',\n",
|
||||
" 'h102e',\n",
|
||||
" 'h94e',\n",
|
||||
" 'h85e',\n",
|
||||
" 'h77e',\n",
|
||||
" 'h67e',\n",
|
||||
" 'h59e',\n",
|
||||
" 'h51e',\n",
|
||||
" 'h33e',\n",
|
||||
" 'h26e',\n",
|
||||
" 'h16e',\n",
|
||||
" 'h10e']"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def norm_puf(puf):\n",
|
||||
" splits = puf.split(\"C-\")\n",
|
||||
" if splits[1][0] == '0':\n",
|
||||
" return ''.join([splits[0], splits[1][1:]]).lower()\n",
|
||||
" else:\n",
|
||||
" return ''.join(splits).lower()\n",
|
||||
" \n",
|
||||
"final_pufs = list(map(norm_puf, meps.pufs))\n",
|
||||
"display(final_pufs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"puf_url = lambda x: 'http://meps.ahrq.gov/mepsweb/data_files/pufs/' + x + 'ssp.zip'\n",
|
||||
"\n",
|
||||
"puf_urls = list(map(puf_url, final_pufs))\n",
|
||||
"\n",
|
||||
"puf_files = {puf: requests.get(puf_url(puf)) for puf in final_pufs}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Error extracting h26e'"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Error extracting h16e'"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Error extracting h10e'"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from zipfile import ZipFile, BadZipFile\n",
|
||||
"from io import BytesIO\n",
|
||||
"\n",
|
||||
"for key, value in puf_files.items():\n",
|
||||
" try:\n",
|
||||
" puf_zip = ZipFile(BytesIO(value.content))\n",
|
||||
" puf_zip.extractall()\n",
|
||||
" except BadZipFile:\n",
|
||||
" display(\"Error extracting {}\".format(key))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
87
exploration/MEPS Scraping.ipynb
Normal file
87
exploration/MEPS Scraping.ipynb
Normal file
@ -0,0 +1,87 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"library(httr)\n",
|
||||
"library(foreign)\n",
|
||||
" \n",
|
||||
"download_puf <- function(short_puf) {\n",
|
||||
" puf_base <- \"http://meps.ahrq.gov/mepsweb/data_files/pufs/\"\n",
|
||||
" puf_suffix <- \"ssp.zip\"\n",
|
||||
" \n",
|
||||
" zip_filename <- paste0(short_puf, \"ssp.zip\")\n",
|
||||
" filename <- paste0(short_puf, \".ssp\")\n",
|
||||
" puf_url <- paste0(puf_base, zip_filename)\n",
|
||||
" download.file(puf_url, zip_filename)\n",
|
||||
" \n",
|
||||
" # unzip\n",
|
||||
" unzip(zip_filename, files = filename)\n",
|
||||
" saveName <- paste0(short_puf, \".csv\")\n",
|
||||
"\n",
|
||||
" # read sas file and return as csv file\n",
|
||||
" mydata <- read.xport(filename)\n",
|
||||
" write.table(mydata, file = saveName, sep = \",\")\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"normalize_puf <- function(full_puf) {\n",
|
||||
" stage_1 <- gsub(\"C-0\", \"\", full_puf)\n",
|
||||
" stage_2 <- gsub(\"C-\", \"\", stage_1)\n",
|
||||
" \n",
|
||||
" tolower(stage_2)\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ERROR",
|
||||
"evalue": "Error in lookup.xport(file): file not in SAS transfer format\n",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"Error in lookup.xport(file): file not in SAS transfer format\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Emergency Visit PUF's\n",
|
||||
"# pufs <- c('HC-160E','HC-152E','HC-144E','HC-135E','HC-126E','HC-118E','HC-110E','HC-102E','HC-094E','HC-085E','HC-077E','HC-067E','HC-059E','HC-051E','HC-033E','HC-026E','HC-016E','HC-010E')\n",
|
||||
"\n",
|
||||
"# Outpatient Visits\n",
|
||||
"pufs <- c('HC-160F','HC-152F','HC-144F','HC-135F','HC-126F','HC-118F','HC-110F','HC-102F','HC-094F','HC-085F','HC-077F','HC-067F','HC-059F','HC-051F','HC-033F','HC-026F','HC-016F','HC-010F')\n",
|
||||
"\n",
|
||||
"puf_downloads = c()\n",
|
||||
"for (puf in pufs) {\n",
|
||||
" download_puf(normalize_puf(puf))\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "R",
|
||||
"language": "R",
|
||||
"name": "ir"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "r",
|
||||
"file_extension": ".r",
|
||||
"mimetype": "text/x-r-source",
|
||||
"name": "R",
|
||||
"pygments_lexer": "r",
|
||||
"version": "3.2.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
BIN
exploration/h102f.ssp
Normal file
BIN
exploration/h102f.ssp
Normal file
Binary file not shown.
BIN
exploration/h102fssp.zip
Normal file
BIN
exploration/h102fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h110f.ssp
Normal file
BIN
exploration/h110f.ssp
Normal file
Binary file not shown.
BIN
exploration/h110fssp.zip
Normal file
BIN
exploration/h110fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h118f.ssp
Normal file
BIN
exploration/h118f.ssp
Normal file
Binary file not shown.
BIN
exploration/h118fssp.zip
Normal file
BIN
exploration/h118fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h126f.ssp
Normal file
BIN
exploration/h126f.ssp
Normal file
Binary file not shown.
BIN
exploration/h126fssp.zip
Normal file
BIN
exploration/h126fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h135f.ssp
Normal file
BIN
exploration/h135f.ssp
Normal file
Binary file not shown.
BIN
exploration/h135fssp.zip
Normal file
BIN
exploration/h135fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h144f.ssp
Normal file
BIN
exploration/h144f.ssp
Normal file
Binary file not shown.
BIN
exploration/h144fssp.zip
Normal file
BIN
exploration/h144fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h152f.ssp
Normal file
BIN
exploration/h152f.ssp
Normal file
Binary file not shown.
BIN
exploration/h152fssp.zip
Normal file
BIN
exploration/h152fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h159 .ssp
Normal file
BIN
exploration/h159 .ssp
Normal file
Binary file not shown.
BIN
exploration/h159.ssp
Normal file
BIN
exploration/h159.ssp
Normal file
Binary file not shown.
BIN
exploration/h160f.ssp
Normal file
BIN
exploration/h160f.ssp
Normal file
Binary file not shown.
BIN
exploration/h160fssp.zip
Normal file
BIN
exploration/h160fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h67e.ssp
Normal file
BIN
exploration/h67e.ssp
Normal file
Binary file not shown.
BIN
exploration/h67f.ssp
Normal file
BIN
exploration/h67f.ssp
Normal file
Binary file not shown.
BIN
exploration/h67fssp.zip
Normal file
BIN
exploration/h67fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h77f.ssp
Normal file
BIN
exploration/h77f.ssp
Normal file
Binary file not shown.
BIN
exploration/h77fssp.zip
Normal file
BIN
exploration/h77fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h85f.ssp
Normal file
BIN
exploration/h85f.ssp
Normal file
Binary file not shown.
BIN
exploration/h85fssp.zip
Normal file
BIN
exploration/h85fssp.zip
Normal file
Binary file not shown.
BIN
exploration/h94f.ssp
Normal file
BIN
exploration/h94f.ssp
Normal file
Binary file not shown.
BIN
exploration/h94fssp.zip
Normal file
BIN
exploration/h94fssp.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user