Rename and Replace

import pandas as pd 
# read a dataset of UFO reports into a DataFrame
ufo = pd.read_csv('http://bit.ly/uforeports')
# examine the first 5 rows 
ufo.head()
City Colors Reported Shape Reported State Time
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 Abilene NaN DISK KS 6/1/1931 13:00
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00
# examine the column names 
ufo.columns
Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')

Rename

# rename two of the columns by useing `rename` method
ufo.rename(columns={'Colors Reported': 'Colors_Reported', 'Shape Reported': 'Shape_Reported'}, inplace=True)
ufo.head()
City Colors_Reported Shape_Reported State Time
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 Abilene NaN DISK KS 6/1/1931 13:00
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00
# replace all of the column names by overwritting the 'colums' attribute 
ufo_cols = ['city', 'colors reported', 'shape reported', 'state', 'time']
ufo.columns = ufo_cols
# see modified columns 
ufo.columns
Index(['city', 'colors reported', 'shape reported', 'state', 'time'], dtype='object')
# replace the column names during the file reading process by using the 'names' parameter
ufo = pd.read_csv('http://bit.ly/uforeports', names=ufo_cols)
# examine the 5 rows 
ufo.head()
city colors reported shape reported state time
0 City Colors Reported Shape Reported State Time
1 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
2 Willingboro NaN OTHER NJ 6/30/1930 20:00
3 Holyoke NaN OVAL CO 2/15/1931 14:00
4 Abilene NaN DISK KS 6/1/1931 13:00
# replace all spaces with underscores in the column names by using the 'str.replace' method
ufo.columns = ufo.columns.str.replace(' ', '_')
ufo.columns
Index(['city', 'colors_reported', 'shape_reported', 'state', 'time'], dtype='object')
# let's look at DataFrame
ufo.head()
city colors_reported shape_reported state time
0 City Colors Reported Shape Reported State Time
1 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
2 Willingboro NaN OTHER NJ 6/30/1930 20:00
3 Holyoke NaN OVAL CO 2/15/1931 14:00
4 Abilene NaN DISK KS 6/1/1931 13:00

Replace

# read another dataset 
fm = pd.read_csv("../data/framingham.csv")
# examine  first few rows 
fm.head() 
male age education currentSmoker cigsPerDay BPMeds prevalentStroke prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose TenYearCHD
0 1 39 4.0 0 0.0 0.0 0 0 0 195.0 106.0 70.0 26.97 80.0 77.0 0
1 0 46 2.0 0 0.0 0.0 0 0 0 250.0 121.0 81.0 28.73 95.0 76.0 0
2 1 48 1.0 1 20.0 0.0 0 0 0 245.0 127.5 80.0 25.34 75.0 70.0 0
3 0 61 3.0 1 30.0 0.0 0 1 0 225.0 150.0 95.0 28.58 65.0 103.0 1
4 0 46 3.0 1 23.0 0.0 0 0 0 285.0 130.0 84.0 23.10 85.0 85.0 0
# first rename `male` to `sex` 
fm.rename(columns={"male": "sex"}, inplace=True)
# Now take a look at dataset 
fm.head() 
sex age education currentSmoker cigsPerDay BPMeds prevalentStroke prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose TenYearCHD
0 1 39 4.0 0 0.0 0.0 0 0 0 195.0 106.0 70.0 26.97 80.0 77.0 0
1 0 46 2.0 0 0.0 0.0 0 0 0 250.0 121.0 81.0 28.73 95.0 76.0 0
2 1 48 1.0 1 20.0 0.0 0 0 0 245.0 127.5 80.0 25.34 75.0 70.0 0
3 0 61 3.0 1 30.0 0.0 0 1 0 225.0 150.0 95.0 28.58 65.0 103.0 1
4 0 46 3.0 1 23.0 0.0 0 0 0 285.0 130.0 84.0 23.10 85.0 85.0 0

Replace Value for Better Understanding of Dataset

sex

  • 1 = Male

  • 0 = Female

diabetes

  • 1 = Yes

  • 0 = No

# replace sex column value
fm['sex'].replace({1: "male", 0: "female"}, inplace=True)
# replace diabetes column value 
fm['diabetes'].replace({1: "yes", 0: "no"}, inplace=True)
# Examine dataset
fm.head() 
sex age education currentSmoker cigsPerDay BPMeds prevalentStroke prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose TenYearCHD
0 male 39 4.0 0 0.0 0.0 0 0 no 195.0 106.0 70.0 26.97 80.0 77.0 0
1 female 46 2.0 0 0.0 0.0 0 0 no 250.0 121.0 81.0 28.73 95.0 76.0 0
2 male 48 1.0 1 20.0 0.0 0 0 no 245.0 127.5 80.0 25.34 75.0 70.0 0
3 female 61 3.0 1 30.0 0.0 0 1 no 225.0 150.0 95.0 28.58 65.0 103.0 1
4 female 46 3.0 1 23.0 0.0 0 0 no 285.0 130.0 84.0 23.10 85.0 85.0 0