Add emergency and outpatient data

Using the new scraper!
This commit is contained in:
bspeice 2015-11-07 15:51:57 -05:00
parent ff88efe156
commit 42873d2bf3
55 changed files with 244711 additions and 41695 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

37337
data/emergency visits/h159.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
"","duid","pid","dupersid","evntidx","eventrn","erhevidx","ffeeidx","panel","mpcdata","erdateyr","erdatemm","vstctgry","vstrelcn","labtest","sonogram","xrays","mammog","mri","ekg","eeg","rcvvac","anesth","thrtswab","othsvce","surgproc","medpresc","erccc1x","erccc2x","erccc3x","ffertype","erxp13x","ertc13x","erfsf13x","erfmr13x","erfmd13x","erfpv13x","erfva13x","erftr13x","erfof13x","erfsl13x","erfwc13x","erfor13x","erfou13x","erfot13x","erfxp13x","erftc13x","erdsf13x","erdmr13x","erdmd13x","erdpv13x","erdva13x","erdtr13x","erdof13x","erdsl13x","erdwc13x","erdor13x","erdou13x","erdot13x","erdxp13x","erdtc13x","impflag","perwt13f","varstr","varpsu"
"DUID","PID","DUPERSID","EVNTIDX","EVENTRN","ERHEVIDX","FFEEIDX","PANEL","MPCDATA","ERDATEYR","ERDATEMM","VSTCTGRY","VSTRELCN","LABTEST","SONOGRAM","XRAYS","MAMMOG","MRI","EKG","EEG","RCVVAC","ANESTH","THRTSWAB","OTHSVCE","SURGPROC","MEDPRESC","ERCCC1X","ERCCC2X","ERCCC3X","FFERTYPE","ERXP13X","ERTC13X","ERFSF13X","ERFMR13X","ERFMD13X","ERFPV13X","ERFVA13X","ERFTR13X","ERFOF13X","ERFSL13X","ERFWC13X","ERFOR13X","ERFOU13X","ERFOT13X","ERFXP13X","ERFTC13X","ERDSF13X","ERDMR13X","ERDMD13X","ERDPV13X","ERDVA13X","ERDTR13X","ERDOF13X","ERDSL13X","ERDWC13X","ERDOR13X","ERDOU13X","ERDOT13X","ERDXP13X","ERDTC13X","IMPFLAG","PERWT13F","VARSTR","VARPSU"
"1",20012,101,"20012101","200121010011",4,"-1","-1",17,1,2013,4,1,2,95,95,95,95,95,95,95,95,95,95,95,2,1,"-1","-1","-1",-1,270.82,1053,0,0,94.46,0,0,0,0,0,0,0,0,0,94.46,353,0,176.36,0,0,0,0,0,0,0,0,0,0,176.36,700,2,3151.347991,1057,1
"2",20016,102,"20016102","200161020101",4,"-1","-1",17,2,2013,6,2,1,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,"230","-1","-1",-1,847.41,1852,208.54,0,0,334.16,0,0,0,0,0,0,0,0,542.7,1464,0,0,0,304.71,0,0,0,0,0,0,0,0,304.71,388,3,7921.950896,1162,2
"3",20023,101,"20023101","200231010011",4,"-1","-1",17,1,2013,5,1,1,95,95,95,95,95,95,95,95,95,95,95,2,1,"253","-1","-1",-1,454.45,563.75,0,0,0,0,0,0,0,0,0,0,0,159.7,159.7,269,294.75,0,0,0,0,0,0,0,0,0,0,0,294.75,294.75,3,7674.126079,1138,2
Can't render this file because it is too large.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

13325
data/outpatient/h102f.csv Normal file

File diff suppressed because it is too large Load Diff

11862
data/outpatient/h110f.csv Normal file

File diff suppressed because it is too large Load Diff

11174
data/outpatient/h118f.csv Normal file

File diff suppressed because it is too large Load Diff

13134
data/outpatient/h126f.csv Normal file

File diff suppressed because it is too large Load Diff

10963
data/outpatient/h135f.csv Normal file

File diff suppressed because it is too large Load Diff

11544
data/outpatient/h144f.csv Normal file

File diff suppressed because it is too large Load Diff

11484
data/outpatient/h152f.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

16333
data/outpatient/h77f.csv Normal file

File diff suppressed because it is too large Load Diff

15780
data/outpatient/h85f.csv Normal file

File diff suppressed because it is too large Load Diff

14347
data/outpatient/h94f.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,211 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests\n",
"from html.parser import HTMLParser\n",
"from IPython.display import display\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"emergency_data_url = \"http://meps.ahrq.gov/mepsweb/data_stats/download_data_files_results.jsp?cboDataYear=All&cboDataTypeY=2%2CHousehold+Event+File&buttonYearandDataType=Search&cboPufNumber=All&SearchTitle=Emergency+Room+Visits\"\n",
"\n",
"emergency_page = requests.get(emergency_data_url).text"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['HC-160E',\n",
" 'HC-152E',\n",
" 'HC-144E',\n",
" 'HC-135E',\n",
" 'HC-126E',\n",
" 'HC-118E',\n",
" 'HC-110E',\n",
" 'HC-102E',\n",
" 'HC-094E',\n",
" 'HC-085E',\n",
" 'HC-077E',\n",
" 'HC-067E',\n",
" 'HC-059E',\n",
" 'HC-051E',\n",
" 'HC-033E',\n",
" 'HC-026E',\n",
" 'HC-016E',\n",
" 'HC-010E']"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"class MepsScraper(HTMLParser):\n",
" p = re.compile('HC-[0-9]*[A-Z]?')\n",
" pufs = []\n",
" \n",
" def handle_data(self, data):\n",
" \n",
" if self.p.match(data):\n",
" self.pufs.append(data)\n",
" \n",
"meps = MepsScraper()\n",
"meps.feed(emergency_page)\n",
"display(meps.pufs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['h160e',\n",
" 'h152e',\n",
" 'h144e',\n",
" 'h135e',\n",
" 'h126e',\n",
" 'h118e',\n",
" 'h110e',\n",
" 'h102e',\n",
" 'h94e',\n",
" 'h85e',\n",
" 'h77e',\n",
" 'h67e',\n",
" 'h59e',\n",
" 'h51e',\n",
" 'h33e',\n",
" 'h26e',\n",
" 'h16e',\n",
" 'h10e']"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def norm_puf(puf):\n",
" splits = puf.split(\"C-\")\n",
" if splits[1][0] == '0':\n",
" return ''.join([splits[0], splits[1][1:]]).lower()\n",
" else:\n",
" return ''.join(splits).lower()\n",
" \n",
"final_pufs = list(map(norm_puf, meps.pufs))\n",
"display(final_pufs)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"puf_url = lambda x: 'http://meps.ahrq.gov/mepsweb/data_files/pufs/' + x + 'ssp.zip'\n",
"\n",
"puf_urls = list(map(puf_url, final_pufs))\n",
"\n",
"puf_files = {puf: requests.get(puf_url(puf)) for puf in final_pufs}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'Error extracting h26e'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Error extracting h16e'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Error extracting h10e'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from zipfile import ZipFile, BadZipFile\n",
"from io import BytesIO\n",
"\n",
"for key, value in puf_files.items():\n",
" try:\n",
" puf_zip = ZipFile(BytesIO(value.content))\n",
" puf_zip.extractall()\n",
" except BadZipFile:\n",
" display(\"Error extracting {}\".format(key))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,87 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"library(httr)\n",
"library(foreign)\n",
" \n",
"download_puf <- function(short_puf) {\n",
" puf_base <- \"http://meps.ahrq.gov/mepsweb/data_files/pufs/\"\n",
" puf_suffix <- \"ssp.zip\"\n",
" \n",
" zip_filename <- paste0(short_puf, \"ssp.zip\")\n",
" filename <- paste0(short_puf, \".ssp\")\n",
" puf_url <- paste0(puf_base, zip_filename)\n",
" download.file(puf_url, zip_filename)\n",
" \n",
" # unzip\n",
" unzip(zip_filename, files = filename)\n",
" saveName <- paste0(short_puf, \".csv\")\n",
"\n",
" # read sas file and return as csv file\n",
" mydata <- read.xport(filename)\n",
" write.table(mydata, file = saveName, sep = \",\")\n",
"}\n",
"\n",
"normalize_puf <- function(full_puf) {\n",
" stage_1 <- gsub(\"C-0\", \"\", full_puf)\n",
" stage_2 <- gsub(\"C-\", \"\", stage_1)\n",
" \n",
" tolower(stage_2)\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ERROR",
"evalue": "Error in lookup.xport(file): file not in SAS transfer format\n",
"output_type": "error",
"traceback": [
"Error in lookup.xport(file): file not in SAS transfer format\n"
]
}
],
"source": [
"# Emergency Visit PUF's\n",
"# pufs <- c('HC-160E','HC-152E','HC-144E','HC-135E','HC-126E','HC-118E','HC-110E','HC-102E','HC-094E','HC-085E','HC-077E','HC-067E','HC-059E','HC-051E','HC-033E','HC-026E','HC-016E','HC-010E')\n",
"\n",
"# Outpatient Visits\n",
"pufs <- c('HC-160F','HC-152F','HC-144F','HC-135F','HC-126F','HC-118F','HC-110F','HC-102F','HC-094F','HC-085F','HC-077F','HC-067F','HC-059F','HC-051F','HC-033F','HC-026F','HC-016F','HC-010F')\n",
"\n",
"puf_downloads = c()\n",
"for (puf in pufs) {\n",
" download_puf(normalize_puf(puf))\n",
"}"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.2.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

BIN
exploration/h102f.ssp Normal file

Binary file not shown.

BIN
exploration/h102fssp.zip Normal file

Binary file not shown.

BIN
exploration/h110f.ssp Normal file

Binary file not shown.

BIN
exploration/h110fssp.zip Normal file

Binary file not shown.

BIN
exploration/h118f.ssp Normal file

Binary file not shown.

BIN
exploration/h118fssp.zip Normal file

Binary file not shown.

BIN
exploration/h126f.ssp Normal file

Binary file not shown.

BIN
exploration/h126fssp.zip Normal file

Binary file not shown.

BIN
exploration/h135f.ssp Normal file

Binary file not shown.

BIN
exploration/h135fssp.zip Normal file

Binary file not shown.

BIN
exploration/h144f.ssp Normal file

Binary file not shown.

BIN
exploration/h144fssp.zip Normal file

Binary file not shown.

BIN
exploration/h152f.ssp Normal file

Binary file not shown.

BIN
exploration/h152fssp.zip Normal file

Binary file not shown.

BIN
exploration/h159 .ssp Normal file

Binary file not shown.

BIN
exploration/h159.ssp Normal file

Binary file not shown.

BIN
exploration/h160f.ssp Normal file

Binary file not shown.

BIN
exploration/h160fssp.zip Normal file

Binary file not shown.

BIN
exploration/h67e.ssp Normal file

Binary file not shown.

BIN
exploration/h67f.ssp Normal file

Binary file not shown.

BIN
exploration/h67fssp.zip Normal file

Binary file not shown.

BIN
exploration/h77f.ssp Normal file

Binary file not shown.

BIN
exploration/h77fssp.zip Normal file

Binary file not shown.

BIN
exploration/h85f.ssp Normal file

Binary file not shown.

BIN
exploration/h85fssp.zip Normal file

Binary file not shown.

BIN
exploration/h94f.ssp Normal file

Binary file not shown.

BIN
exploration/h94fssp.zip Normal file

Binary file not shown.