Rename and Replace¶
import pandas as pd
# read a dataset of UFO reports into a DataFrame
ufo = pd.read_csv('http://bit.ly/uforeports')
# examine the first 5 rows
ufo.head()
City | Colors Reported | Shape Reported | State | Time | |
---|---|---|---|---|---|
0 | Ithaca | NaN | TRIANGLE | NY | 6/1/1930 22:00 |
1 | Willingboro | NaN | OTHER | NJ | 6/30/1930 20:00 |
2 | Holyoke | NaN | OVAL | CO | 2/15/1931 14:00 |
3 | Abilene | NaN | DISK | KS | 6/1/1931 13:00 |
4 | New York Worlds Fair | NaN | LIGHT | NY | 4/18/1933 19:00 |
# examine the column names
ufo.columns
Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')
Rename¶
# rename two of the columns by useing `rename` method
ufo.rename(columns={'Colors Reported': 'Colors_Reported', 'Shape Reported': 'Shape_Reported'}, inplace=True)
ufo.head()
City | Colors_Reported | Shape_Reported | State | Time | |
---|---|---|---|---|---|
0 | Ithaca | NaN | TRIANGLE | NY | 6/1/1930 22:00 |
1 | Willingboro | NaN | OTHER | NJ | 6/30/1930 20:00 |
2 | Holyoke | NaN | OVAL | CO | 2/15/1931 14:00 |
3 | Abilene | NaN | DISK | KS | 6/1/1931 13:00 |
4 | New York Worlds Fair | NaN | LIGHT | NY | 4/18/1933 19:00 |
# replace all of the column names by overwritting the 'colums' attribute
ufo_cols = ['city', 'colors reported', 'shape reported', 'state', 'time']
ufo.columns = ufo_cols
# see modified columns
ufo.columns
Index(['city', 'colors reported', 'shape reported', 'state', 'time'], dtype='object')
# replace the column names during the file reading process by using the 'names' parameter
ufo = pd.read_csv('http://bit.ly/uforeports', names=ufo_cols)
# examine the 5 rows
ufo.head()
city | colors reported | shape reported | state | time | |
---|---|---|---|---|---|
0 | City | Colors Reported | Shape Reported | State | Time |
1 | Ithaca | NaN | TRIANGLE | NY | 6/1/1930 22:00 |
2 | Willingboro | NaN | OTHER | NJ | 6/30/1930 20:00 |
3 | Holyoke | NaN | OVAL | CO | 2/15/1931 14:00 |
4 | Abilene | NaN | DISK | KS | 6/1/1931 13:00 |
# replace all spaces with underscores in the column names by using the 'str.replace' method
ufo.columns = ufo.columns.str.replace(' ', '_')
ufo.columns
Index(['city', 'colors_reported', 'shape_reported', 'state', 'time'], dtype='object')
# let's look at DataFrame
ufo.head()
city | colors_reported | shape_reported | state | time | |
---|---|---|---|---|---|
0 | City | Colors Reported | Shape Reported | State | Time |
1 | Ithaca | NaN | TRIANGLE | NY | 6/1/1930 22:00 |
2 | Willingboro | NaN | OTHER | NJ | 6/30/1930 20:00 |
3 | Holyoke | NaN | OVAL | CO | 2/15/1931 14:00 |
4 | Abilene | NaN | DISK | KS | 6/1/1931 13:00 |
Replace¶
# read another dataset
fm = pd.read_csv("../data/framingham.csv")
# examine first few rows
fm.head()
male | age | education | currentSmoker | cigsPerDay | BPMeds | prevalentStroke | prevalentHyp | diabetes | totChol | sysBP | diaBP | BMI | heartRate | glucose | TenYearCHD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 39 | 4.0 | 0 | 0.0 | 0.0 | 0 | 0 | 0 | 195.0 | 106.0 | 70.0 | 26.97 | 80.0 | 77.0 | 0 |
1 | 0 | 46 | 2.0 | 0 | 0.0 | 0.0 | 0 | 0 | 0 | 250.0 | 121.0 | 81.0 | 28.73 | 95.0 | 76.0 | 0 |
2 | 1 | 48 | 1.0 | 1 | 20.0 | 0.0 | 0 | 0 | 0 | 245.0 | 127.5 | 80.0 | 25.34 | 75.0 | 70.0 | 0 |
3 | 0 | 61 | 3.0 | 1 | 30.0 | 0.0 | 0 | 1 | 0 | 225.0 | 150.0 | 95.0 | 28.58 | 65.0 | 103.0 | 1 |
4 | 0 | 46 | 3.0 | 1 | 23.0 | 0.0 | 0 | 0 | 0 | 285.0 | 130.0 | 84.0 | 23.10 | 85.0 | 85.0 | 0 |
# first rename `male` to `sex`
fm.rename(columns={"male": "sex"}, inplace=True)
# Now take a look at dataset
fm.head()
sex | age | education | currentSmoker | cigsPerDay | BPMeds | prevalentStroke | prevalentHyp | diabetes | totChol | sysBP | diaBP | BMI | heartRate | glucose | TenYearCHD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 39 | 4.0 | 0 | 0.0 | 0.0 | 0 | 0 | 0 | 195.0 | 106.0 | 70.0 | 26.97 | 80.0 | 77.0 | 0 |
1 | 0 | 46 | 2.0 | 0 | 0.0 | 0.0 | 0 | 0 | 0 | 250.0 | 121.0 | 81.0 | 28.73 | 95.0 | 76.0 | 0 |
2 | 1 | 48 | 1.0 | 1 | 20.0 | 0.0 | 0 | 0 | 0 | 245.0 | 127.5 | 80.0 | 25.34 | 75.0 | 70.0 | 0 |
3 | 0 | 61 | 3.0 | 1 | 30.0 | 0.0 | 0 | 1 | 0 | 225.0 | 150.0 | 95.0 | 28.58 | 65.0 | 103.0 | 1 |
4 | 0 | 46 | 3.0 | 1 | 23.0 | 0.0 | 0 | 0 | 0 | 285.0 | 130.0 | 84.0 | 23.10 | 85.0 | 85.0 | 0 |
Replace Value for Better Understanding of Dataset¶
sex
1 = Male
0 = Female
diabetes
1 = Yes
0 = No
# replace sex column value
fm['sex'].replace({1: "male", 0: "female"}, inplace=True)
# replace diabetes column value
fm['diabetes'].replace({1: "yes", 0: "no"}, inplace=True)
# Examine dataset
fm.head()
sex | age | education | currentSmoker | cigsPerDay | BPMeds | prevalentStroke | prevalentHyp | diabetes | totChol | sysBP | diaBP | BMI | heartRate | glucose | TenYearCHD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | male | 39 | 4.0 | 0 | 0.0 | 0.0 | 0 | 0 | no | 195.0 | 106.0 | 70.0 | 26.97 | 80.0 | 77.0 | 0 |
1 | female | 46 | 2.0 | 0 | 0.0 | 0.0 | 0 | 0 | no | 250.0 | 121.0 | 81.0 | 28.73 | 95.0 | 76.0 | 0 |
2 | male | 48 | 1.0 | 1 | 20.0 | 0.0 | 0 | 0 | no | 245.0 | 127.5 | 80.0 | 25.34 | 75.0 | 70.0 | 0 |
3 | female | 61 | 3.0 | 1 | 30.0 | 0.0 | 0 | 1 | no | 225.0 | 150.0 | 95.0 | 28.58 | 65.0 | 103.0 | 1 |
4 | female | 46 | 3.0 | 1 | 23.0 | 0.0 | 0 | 0 | no | 285.0 | 130.0 | 84.0 | 23.10 | 85.0 | 85.0 | 0 |