Add emergency and outpatient data

Using the new scraper!
This commit is contained in:
bspeice
2015-11-07 15:51:57 -05:00
parent ff88efe156
commit 42873d2bf3
55 changed files with 244711 additions and 41695 deletions

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,211 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests\n",
"from html.parser import HTMLParser\n",
"from IPython.display import display\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"emergency_data_url = \"http://meps.ahrq.gov/mepsweb/data_stats/download_data_files_results.jsp?cboDataYear=All&cboDataTypeY=2%2CHousehold+Event+File&buttonYearandDataType=Search&cboPufNumber=All&SearchTitle=Emergency+Room+Visits\"\n",
"\n",
"emergency_page = requests.get(emergency_data_url).text"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['HC-160E',\n",
" 'HC-152E',\n",
" 'HC-144E',\n",
" 'HC-135E',\n",
" 'HC-126E',\n",
" 'HC-118E',\n",
" 'HC-110E',\n",
" 'HC-102E',\n",
" 'HC-094E',\n",
" 'HC-085E',\n",
" 'HC-077E',\n",
" 'HC-067E',\n",
" 'HC-059E',\n",
" 'HC-051E',\n",
" 'HC-033E',\n",
" 'HC-026E',\n",
" 'HC-016E',\n",
" 'HC-010E']"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"class MepsScraper(HTMLParser):\n",
" p = re.compile('HC-[0-9]*[A-Z]?')\n",
" pufs = []\n",
" \n",
" def handle_data(self, data):\n",
" \n",
" if self.p.match(data):\n",
" self.pufs.append(data)\n",
" \n",
"meps = MepsScraper()\n",
"meps.feed(emergency_page)\n",
"display(meps.pufs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['h160e',\n",
" 'h152e',\n",
" 'h144e',\n",
" 'h135e',\n",
" 'h126e',\n",
" 'h118e',\n",
" 'h110e',\n",
" 'h102e',\n",
" 'h94e',\n",
" 'h85e',\n",
" 'h77e',\n",
" 'h67e',\n",
" 'h59e',\n",
" 'h51e',\n",
" 'h33e',\n",
" 'h26e',\n",
" 'h16e',\n",
" 'h10e']"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def norm_puf(puf):\n",
" splits = puf.split(\"C-\")\n",
" if splits[1][0] == '0':\n",
" return ''.join([splits[0], splits[1][1:]]).lower()\n",
" else:\n",
" return ''.join(splits).lower()\n",
" \n",
"final_pufs = list(map(norm_puf, meps.pufs))\n",
"display(final_pufs)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"puf_url = lambda x: 'http://meps.ahrq.gov/mepsweb/data_files/pufs/' + x + 'ssp.zip'\n",
"\n",
"puf_urls = list(map(puf_url, final_pufs))\n",
"\n",
"puf_files = {puf: requests.get(puf_url(puf)) for puf in final_pufs}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'Error extracting h26e'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Error extracting h16e'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'Error extracting h10e'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from zipfile import ZipFile, BadZipFile\n",
"from io import BytesIO\n",
"\n",
"for key, value in puf_files.items():\n",
" try:\n",
" puf_zip = ZipFile(BytesIO(value.content))\n",
" puf_zip.extractall()\n",
" except BadZipFile:\n",
" display(\"Error extracting {}\".format(key))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,87 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"library(httr)\n",
"library(foreign)\n",
" \n",
"download_puf <- function(short_puf) {\n",
" puf_base <- \"http://meps.ahrq.gov/mepsweb/data_files/pufs/\"\n",
" puf_suffix <- \"ssp.zip\"\n",
" \n",
" zip_filename <- paste0(short_puf, \"ssp.zip\")\n",
" filename <- paste0(short_puf, \".ssp\")\n",
" puf_url <- paste0(puf_base, zip_filename)\n",
" download.file(puf_url, zip_filename)\n",
" \n",
" # unzip\n",
" unzip(zip_filename, files = filename)\n",
" saveName <- paste0(short_puf, \".csv\")\n",
"\n",
" # read sas file and return as csv file\n",
" mydata <- read.xport(filename)\n",
" write.table(mydata, file = saveName, sep = \",\")\n",
"}\n",
"\n",
"normalize_puf <- function(full_puf) {\n",
" stage_1 <- gsub(\"C-0\", \"\", full_puf)\n",
" stage_2 <- gsub(\"C-\", \"\", stage_1)\n",
" \n",
" tolower(stage_2)\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ERROR",
"evalue": "Error in lookup.xport(file): file not in SAS transfer format\n",
"output_type": "error",
"traceback": [
"Error in lookup.xport(file): file not in SAS transfer format\n"
]
}
],
"source": [
"# Emergency Visit PUF's\n",
"# pufs <- c('HC-160E','HC-152E','HC-144E','HC-135E','HC-126E','HC-118E','HC-110E','HC-102E','HC-094E','HC-085E','HC-077E','HC-067E','HC-059E','HC-051E','HC-033E','HC-026E','HC-016E','HC-010E')\n",
"\n",
"# Outpatient Visits\n",
"pufs <- c('HC-160F','HC-152F','HC-144F','HC-135F','HC-126F','HC-118F','HC-110F','HC-102F','HC-094F','HC-085F','HC-077F','HC-067F','HC-059F','HC-051F','HC-033F','HC-026F','HC-016F','HC-010F')\n",
"\n",
"puf_downloads = c()\n",
"for (puf in pufs) {\n",
" download_puf(normalize_puf(puf))\n",
"}"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.2.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

BIN
exploration/h102f.ssp Normal file

Binary file not shown.

BIN
exploration/h102fssp.zip Normal file

Binary file not shown.

BIN
exploration/h110f.ssp Normal file

Binary file not shown.

BIN
exploration/h110fssp.zip Normal file

Binary file not shown.

BIN
exploration/h118f.ssp Normal file

Binary file not shown.

BIN
exploration/h118fssp.zip Normal file

Binary file not shown.

BIN
exploration/h126f.ssp Normal file

Binary file not shown.

BIN
exploration/h126fssp.zip Normal file

Binary file not shown.

BIN
exploration/h135f.ssp Normal file

Binary file not shown.

BIN
exploration/h135fssp.zip Normal file

Binary file not shown.

BIN
exploration/h144f.ssp Normal file

Binary file not shown.

BIN
exploration/h144fssp.zip Normal file

Binary file not shown.

BIN
exploration/h152f.ssp Normal file

Binary file not shown.

BIN
exploration/h152fssp.zip Normal file

Binary file not shown.

BIN
exploration/h159 .ssp Normal file

Binary file not shown.

BIN
exploration/h159.ssp Normal file

Binary file not shown.

BIN
exploration/h160f.ssp Normal file

Binary file not shown.

BIN
exploration/h160fssp.zip Normal file

Binary file not shown.

BIN
exploration/h67e.ssp Normal file

Binary file not shown.

BIN
exploration/h67f.ssp Normal file

Binary file not shown.

BIN
exploration/h67fssp.zip Normal file

Binary file not shown.

BIN
exploration/h77f.ssp Normal file

Binary file not shown.

BIN
exploration/h77fssp.zip Normal file

Binary file not shown.

BIN
exploration/h85f.ssp Normal file

Binary file not shown.

BIN
exploration/h85fssp.zip Normal file

Binary file not shown.

BIN
exploration/h94f.ssp Normal file

Binary file not shown.

BIN
exploration/h94fssp.zip Normal file

Binary file not shown.