Introduction to Seaborn in Python, used with Pandas

This post is a rough introduction to the Seaborn module in Python. I use it for data visualization, in combination with Pandas. Read the comments to understand my workflow. Any questions you can leave as a comment at the bottom of the post.

# pandas is a python module for working woth data frames
import pandas
# seaborn is a wrapper for matplotlib and is meant for data visualization
import seaborn
# pandas and numpy are related, therefore I always read in numpy when working with pandas
import numpy
# and since seaborn is a wrapper around matplotlib I also read in matplotlib.pyplot
import matplotlib.pyplot as plt
# reading in a data set as data frame using pandas;
# the data is OICA automotive industry production data
data_df = pandas.read_csv("oica.csv",sep=",")
data_df.head()
yearcountryoutput
02018Argentina466649
12018Austria164900
22018Belgium308493
32018Brazil2879809
42018Canada2020840
# let's also see the tail
data_df.tail()
yearcountryoutput
8351999Ukraine1918
8361999UK1973519
8371999USA13024978
8381999Uzbekistan44433
8391999Others11965
# let's try .describe()
data_df.describe()
yearoutput
count840.0000008.400000e+02
mean2008.2845241.840118e+06
std5.7098083.407141e+06
min1999.0000003.600000e+01
25%2004.0000001.633742e+05
50%2008.0000005.586175e+05
75%2013.0000001.970880e+06
max2018.0000002.901543e+07
# set graph size using matplotlib.pyplot
plt.figure(figsize=(20,15))
# creating a scatterplot using seaborn
plot1 = seaborn.scatterplot(x="year",y="output ",hue="country ", data=data_df)
# add plot title
plot1.set_title("production output by year (OICA data)", fontsize=22)
# add x- and y-axis labels 
plot1.set_xlabel("year",fontsize=16)
plot1.set_ylabel("output",fontsize=16)
Text(0, 0.5, 'output')
# set graph size with matplotlib.pyplot
plt.figure(figsize=(20,15))
# rotate x-axis ticks by 90 degrees
plt.xticks(rotation=90)
# let's create a boxplot graph
plot2 = seaborn.boxplot(x="country ",y="output ",data=data_df)
# set title
plot2.set_title("annual production output distribution by nation, 1999 - 2018 (OICA data)",fontsize=22)
# add labels to x- and y-xis
plot2.set_xlabel("country",fontsize=16)
plot2.set_ylabel("annual production output",fontsize=16)
Text(0, 0.5, 'annual production output')
# swarm plots, using seaborn
# set graph size using matplotlib.pyplot
plt.figure(figsize=(20,15))
# rotate axis ticks on x-axis
plt.xticks(rotation=90)
# set default seaborn style
seaborn.set_style("whitegrid")
# set default context
seaborn.set_context("talk")
# create the swarm plot
plot3 = seaborn.swarmplot(x="country ",y="output ",data=data_df,color="green")
# set title
plot3.set_title("annual automotive industry production output by country from 1999 to 2018, according to OICA",fontsize=22)
# set axis labels
plot3.set_xlabel("country",fontsize=22)
plot3.set_ylabel("annual production output",fontsize=22)
# add reference line, using matplotlib.pyplot
plt.axhline(data_df["output "].mean(),color="blue")
<matplotlib.lines.Line2D at 0x2b02ec39b88>
# lastly, let's also create a line plot
# set graph size
plt.figure(figsize=(20,15))
# adjust context to "paper"
seaborn.set_context("paper")
# create line plot using seaborn
plot4 = seaborn.lineplot(x="year",y="output ",hue="country ",data=data_df)
# add title plot
plot4.set_title("OICA automotive industry production output time series, 1999 - 2018",fontsize=22)
# adjust axis labels
plot4.set_xlabel("year",fontsize=22)
plot4.set_ylabel("production output [units]",fontsize=22)
Text(0, 0.5, 'production output [units]')

Leave a Reply

Leave a Reply

Your email address will not be published. Required fields are marked *

Close

Meta