From 63c1ff7386d75ee26e95ac3a43b5c2c33ce48d56 Mon Sep 17 00:00:00 2001 From: Ivan Ivanov Date: Sat, 7 Nov 2015 17:04:11 -0500 Subject: [PATCH] mv --- scripts/enrich_dataset.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scripts/enrich_dataset.py b/scripts/enrich_dataset.py index 59e7285..e6feea1 100644 --- a/scripts/enrich_dataset.py +++ b/scripts/enrich_dataset.py @@ -38,7 +38,12 @@ def get_labels(PUFId, varName): def load_dictionary(dictfile): df = pd.read_csv(dictfile) - return dict(zip(df.NAME, df.DESCRIPTION)) + + dictionary = dict(zip(df.NAME, df.DESCRIPTION)) + srcs = dictionary.keys() + for k in srcs: + dictionary[k + '_label'] = dictionary[k] + '_label' + return dictionary # hardcoded config of which file corresponds to which year. @@ -49,11 +54,15 @@ YEAR_FILES = dict([('h{}e.csv'.format(idx), 2013-i) for i, idx in enumerate([160 if __name__ == '__main__': + # Usage: + # $0 --input-file data/emergency\ visits/h94e.csv \ + # --output-dir data/emergency\ visits/ \ + # --column-dictionary FYCCodebook_2013.csv + # parser = argparse.ArgumentParser() parser.add_argument('--input-file') parser.add_argument('--output-dir') parser.add_argument('--column-dictionary') - parser.add_argument('--category') args = parser.parse_args() infile = os.path.basename(args.input_file) @@ -73,9 +82,6 @@ if __name__ == '__main__': if args.column_dictionary: dictionary = load_dictionary(args.column_dictionary) - srcs = dictionary.keys() - for k in srcs: - dictionary[k + '_label'] = dictionary[k] + '_label' raw_data.columns = [dictionary.get(col, col) for col in raw_data.columns]