Intro to Plotly Express¶
# import libraries
import pandas as pd # data processing,
import plotly.express as px # for visualization
from plotly.figure_factory import create_table # for creating nice table
Reading and Exploring Data¶
# load built-in gapminder dataset from plotly
gapminder = px.data.gapminder()
# examine first few rows
gapminder.head()
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
---|---|---|---|---|---|---|---|---|
0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
# examine last few rows
gapminder.tail()
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
---|---|---|---|---|---|---|---|---|
1699 | Zimbabwe | Africa | 1987 | 62.351 | 9216418 | 706.157306 | ZWE | 716 |
1700 | Zimbabwe | Africa | 1992 | 60.377 | 10704340 | 693.420786 | ZWE | 716 |
1701 | Zimbabwe | Africa | 1997 | 46.809 | 11404948 | 792.449960 | ZWE | 716 |
1702 | Zimbabwe | Africa | 2002 | 39.989 | 11926563 | 672.038623 | ZWE | 716 |
1703 | Zimbabwe | Africa | 2007 | 43.487 | 12311143 | 469.709298 | ZWE | 716 |
# examine specific number of rows
gapminder.head(10)
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
---|---|---|---|---|---|---|---|---|
0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
5 | Afghanistan | Asia | 1977 | 38.438 | 14880372 | 786.113360 | AFG | 4 |
6 | Afghanistan | Asia | 1982 | 39.854 | 12881816 | 978.011439 | AFG | 4 |
7 | Afghanistan | Asia | 1987 | 40.822 | 13867957 | 852.395945 | AFG | 4 |
8 | Afghanistan | Asia | 1992 | 41.674 | 16317921 | 649.341395 | AFG | 4 |
9 | Afghanistan | Asia | 1997 | 41.763 | 22227415 | 635.341351 | AFG | 4 |
# check the shape of the dataset
gapminder.shape
(1704, 8)
# column names
gapminder.columns
Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
'iso_alpha', 'iso_num'],
dtype='object')
# data type of each column
gapminder.dtypes
country object
continent object
year int64
lifeExp float64
pop int64
gdpPercap float64
iso_alpha object
iso_num int64
dtype: object
Note
object: categorical variable
float / int: numeric variable
# get information about dataset
gapminder.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 country 1704 non-null object
1 continent 1704 non-null object
2 year 1704 non-null int64
3 lifeExp 1704 non-null float64
4 pop 1704 non-null int64
5 gdpPercap 1704 non-null float64
6 iso_alpha 1704 non-null object
7 iso_num 1704 non-null int64
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB
# summary statistics
gapminder.describe()
year | lifeExp | pop | gdpPercap | iso_num | |
---|---|---|---|---|---|
count | 1704.00000 | 1704.000000 | 1.704000e+03 | 1704.000000 | 1704.000000 |
mean | 1979.50000 | 59.474439 | 2.960121e+07 | 7215.327081 | 425.880282 |
std | 17.26533 | 12.917107 | 1.061579e+08 | 9857.454543 | 248.305709 |
min | 1952.00000 | 23.599000 | 6.001100e+04 | 241.165876 | 4.000000 |
25% | 1965.75000 | 48.198000 | 2.793664e+06 | 1202.060309 | 208.000000 |
50% | 1979.50000 | 60.712500 | 7.023596e+06 | 3531.846989 | 410.000000 |
75% | 1993.25000 | 70.845500 | 1.958522e+07 | 9325.462346 | 638.000000 |
max | 2007.00000 | 82.603000 | 1.318683e+09 | 113523.132900 | 894.000000 |
Creating a Publication Quality Table¶
# create a publication quality table
table = create_table(gapminder.head(10))
py.iplot(table)
Quick Visualizations with Bar Charts¶
fig = px.bar(data_frame=gapminder, x='year', y='pop')
fig.show()
# height
fig = px.bar(data_frame=gapminder, x='year', y='pop', height=400)
fig.show()
# let's add color by lifeExp and other parameters
fig = px.bar(data_frame = gapminder, x='year', y='pop', color='lifeExp',
labels={'pop': 'Population of Canada'}, height=400)
fig.show()
Plot Life Expectency vs GDP Per Capita¶
# filter 2007 data only from dataset
gapminder2007 = gapminder.query('year == 2007')
# create scatter plot
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp')
fig.show()
# color by continent
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent')
fig.show()
Create Interactive Bubble Charts¶
# create a bubble chart
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60)
fig.show()
# hover name
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60,
hover_name='country')
fig.show()
Create Interactive Animations and Facet Plots¶
# create a facet plot
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60,
hover_name='country', facet_col='continent')
fig.show()
# log scale on x-axis
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60,
hover_name='country', facet_col='continent', log_x=True)
fig.show()
# let's add animation
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=40,
hover_name='country', log_x=True, animation_frame='year',
animation_group='country', range_x=[25, 10000], range_y=[25,90])
fig.show()
# customize the labels
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=40,
hover_name='country',log_x=True, animation_frame='year',
animation_group='country', range_x=[25, 10000], range_y=[25,90],
labels=dict(pop="Population", gdpPercap="GDP Per Capita", lifeExp="Life Expectency"))
fig.show()
Represent Geographic Data as Animated Maps¶
# create a map using line_geo()
fig = px.line_geo(gapminder.query('year == 2007'), locations='iso_alpha', color='continent', projection='orthographic')
fig.show()
# create a map using choropleth
fig = px.choropleth(gapminder, locations='iso_alpha', color='lifeExp', hover_name='country',
animation_frame='year', color_continuous_scale=px.colors.sequential.Plasma, projection='natural earth')
fig.show()
Using Plotly Template in Any Graphs¶
# print available themes or template
import plotly.io as pio
pio.templates
Templates configuration
-----------------------
Default template: 'plotly'
Available templates:
['ggplot2', 'seaborn', 'simple_white', 'plotly',
'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
'ygridoff', 'gridon', 'none']
# let's use plotly_dark in our previous bar chart
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
height=400, template='plotly_dark')
fig.show()
# seaborn
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
height=400, template='seaborn')
fig.show()
# ggplot2
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
height=400, template='ggplot2')
fig.show()