Intro to Plotly Express

# import libraries 
import pandas as pd # data processing, 
import plotly.express as px # for visualization 
from plotly.figure_factory import create_table # for creating nice table 

Reading and Exploring Data

# load built-in gapminder dataset from plotly 
gapminder = px.data.gapminder() 
# examine first few rows 
gapminder.head() 
country continent year lifeExp pop gdpPercap iso_alpha iso_num
0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4
1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4
2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG 4
3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG 4
4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG 4
# examine last few rows 
gapminder.tail() 
country continent year lifeExp pop gdpPercap iso_alpha iso_num
1699 Zimbabwe Africa 1987 62.351 9216418 706.157306 ZWE 716
1700 Zimbabwe Africa 1992 60.377 10704340 693.420786 ZWE 716
1701 Zimbabwe Africa 1997 46.809 11404948 792.449960 ZWE 716
1702 Zimbabwe Africa 2002 39.989 11926563 672.038623 ZWE 716
1703 Zimbabwe Africa 2007 43.487 12311143 469.709298 ZWE 716
# examine specific number of rows 
gapminder.head(10)
country continent year lifeExp pop gdpPercap iso_alpha iso_num
0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4
1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4
2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG 4
3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG 4
4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG 4
5 Afghanistan Asia 1977 38.438 14880372 786.113360 AFG 4
6 Afghanistan Asia 1982 39.854 12881816 978.011439 AFG 4
7 Afghanistan Asia 1987 40.822 13867957 852.395945 AFG 4
8 Afghanistan Asia 1992 41.674 16317921 649.341395 AFG 4
9 Afghanistan Asia 1997 41.763 22227415 635.341351 AFG 4
# check the shape of the dataset
gapminder.shape 
(1704, 8)
# column names 
gapminder.columns
Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num'],
      dtype='object')
# data type of each column 
gapminder.dtypes
country       object
continent     object
year           int64
lifeExp      float64
pop            int64
gdpPercap    float64
iso_alpha     object
iso_num        int64
dtype: object

Note

  • object: categorical variable

  • float / int: numeric variable

# get information about dataset 
gapminder.info() 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB
# summary statistics
gapminder.describe() 
year lifeExp pop gdpPercap iso_num
count 1704.00000 1704.000000 1.704000e+03 1704.000000 1704.000000
mean 1979.50000 59.474439 2.960121e+07 7215.327081 425.880282
std 17.26533 12.917107 1.061579e+08 9857.454543 248.305709
min 1952.00000 23.599000 6.001100e+04 241.165876 4.000000
25% 1965.75000 48.198000 2.793664e+06 1202.060309 208.000000
50% 1979.50000 60.712500 7.023596e+06 3531.846989 410.000000
75% 1993.25000 70.845500 1.958522e+07 9325.462346 638.000000
max 2007.00000 82.603000 1.318683e+09 113523.132900 894.000000

Creating a Publication Quality Table

# create a publication quality table 
table = create_table(gapminder.head(10))
py.iplot(table)

Quick Visualizations with Bar Charts

fig = px.bar(data_frame=gapminder, x='year', y='pop')
fig.show() 
# height 
fig = px.bar(data_frame=gapminder, x='year', y='pop', height=400)
fig.show() 
# let's add color by lifeExp and other parameters 
fig = px.bar(data_frame = gapminder, x='year', y='pop', color='lifeExp', 
             labels={'pop': 'Population of Canada'}, height=400)
fig.show() 

Plot Life Expectency vs GDP Per Capita

# filter 2007 data only from dataset 
gapminder2007 = gapminder.query('year == 2007')
# create scatter plot 
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp')
fig.show() 
# color by continent 
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent')
fig.show() 

Create Interactive Bubble Charts

# create a bubble chart 
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60)
fig.show() 
# hover name 
fig = px.scatter(gapminder2007, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60, 
                 hover_name='country')
fig.show() 

Create Interactive Animations and Facet Plots

# create a facet plot 
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60, 
                hover_name='country', facet_col='continent')
fig.show() 
# log scale on x-axis 
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=60, 
                hover_name='country', facet_col='continent', log_x=True)
fig.show()
# let's add animation 
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=40, 
                hover_name='country', log_x=True, animation_frame='year',
                 animation_group='country', range_x=[25, 10000], range_y=[25,90])
fig.show()
# customize the labels 
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', color='continent', size='pop', size_max=40, 
                hover_name='country',log_x=True, animation_frame='year',
                 animation_group='country', range_x=[25, 10000], range_y=[25,90], 
                labels=dict(pop="Population", gdpPercap="GDP Per Capita", lifeExp="Life Expectency"))
fig.show()

Represent Geographic Data as Animated Maps

# create a map using line_geo()
fig = px.line_geo(gapminder.query('year == 2007'), locations='iso_alpha', color='continent', projection='orthographic')
fig.show() 
# create a map using choropleth
fig = px.choropleth(gapminder, locations='iso_alpha', color='lifeExp', hover_name='country', 
                    animation_frame='year', color_continuous_scale=px.colors.sequential.Plasma, projection='natural earth')
fig.show() 

Using Plotly Template in Any Graphs

# print available themes or template 
import plotly.io as pio
pio.templates
Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']
# let's use plotly_dark in our previous bar chart 
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
             height=400, template='plotly_dark')
fig.show()
# seaborn
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
             height=400, template='seaborn')
fig.show()
# ggplot2 
fig = px.bar(gapminder, x='year', y='pop', color='lifeExp', labels={'pop': 'Population of Canada'},
             height=400, template='ggplot2')
fig.show()