mirror of
https://github.com/bspeice/betterwithdata_cleaning_4
synced 2024-12-04 21:28:09 -05:00
mv
This commit is contained in:
parent
318f990a30
commit
63c1ff7386
@ -38,7 +38,12 @@ def get_labels(PUFId, varName):
|
|||||||
|
|
||||||
def load_dictionary(dictfile):
|
def load_dictionary(dictfile):
|
||||||
df = pd.read_csv(dictfile)
|
df = pd.read_csv(dictfile)
|
||||||
return dict(zip(df.NAME, df.DESCRIPTION))
|
|
||||||
|
dictionary = dict(zip(df.NAME, df.DESCRIPTION))
|
||||||
|
srcs = dictionary.keys()
|
||||||
|
for k in srcs:
|
||||||
|
dictionary[k + '_label'] = dictionary[k] + '_label'
|
||||||
|
return dictionary
|
||||||
|
|
||||||
|
|
||||||
# hardcoded config of which file corresponds to which year.
|
# hardcoded config of which file corresponds to which year.
|
||||||
@ -49,11 +54,15 @@ YEAR_FILES = dict([('h{}e.csv'.format(idx), 2013-i) for i, idx in enumerate([160
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# Usage:
|
||||||
|
# $0 --input-file data/emergency\ visits/h94e.csv \
|
||||||
|
# --output-dir data/emergency\ visits/ \
|
||||||
|
# --column-dictionary FYCCodebook_2013.csv
|
||||||
|
#
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--input-file')
|
parser.add_argument('--input-file')
|
||||||
parser.add_argument('--output-dir')
|
parser.add_argument('--output-dir')
|
||||||
parser.add_argument('--column-dictionary')
|
parser.add_argument('--column-dictionary')
|
||||||
parser.add_argument('--category')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
infile = os.path.basename(args.input_file)
|
infile = os.path.basename(args.input_file)
|
||||||
@ -73,9 +82,6 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if args.column_dictionary:
|
if args.column_dictionary:
|
||||||
dictionary = load_dictionary(args.column_dictionary)
|
dictionary = load_dictionary(args.column_dictionary)
|
||||||
srcs = dictionary.keys()
|
|
||||||
for k in srcs:
|
|
||||||
dictionary[k + '_label'] = dictionary[k] + '_label'
|
|
||||||
|
|
||||||
raw_data.columns = [dictionary.get(col, col) for col in raw_data.columns]
|
raw_data.columns = [dictionary.get(col, col) for col in raw_data.columns]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user