This post is a rough introduction to the Seaborn module in Python. I use it for data visualization, in combination with Pandas. Read the comments to understand my workflow. Any questions you can leave as a comment at the bottom of the post.
# pandas is a python module for working woth data frames
import pandas
# seaborn is a wrapper for matplotlib and is meant for data visualization
import seaborn
# pandas and numpy are related, therefore I always read in numpy when working with pandas
import numpy
# and since seaborn is a wrapper around matplotlib I also read in matplotlib.pyplot
import matplotlib.pyplot as plt
# reading in a data set as data frame using pandas;
# the data is OICA automotive industry production data
data_df = pandas.read_csv("oica.csv",sep=",")
data_df.head()
year
country
output
0
2018
Argentina
466649
1
2018
Austria
164900
2
2018
Belgium
308493
3
2018
Brazil
2879809
4
2018
Canada
2020840
# let's also see the tail
data_df.tail()
year
country
output
835
1999
Ukraine
1918
836
1999
UK
1973519
837
1999
USA
13024978
838
1999
Uzbekistan
44433
839
1999
Others
11965
# let's try .describe()
data_df.describe()
year
output
count
840.000000
8.400000e+02
mean
2008.284524
1.840118e+06
std
5.709808
3.407141e+06
min
1999.000000
3.600000e+01
25%
2004.000000
1.633742e+05
50%
2008.000000
5.586175e+05
75%
2013.000000
1.970880e+06
max
2018.000000
2.901543e+07
# set graph size using matplotlib.pyplot
plt.figure(figsize=(20,15))
# creating a scatterplot using seaborn
plot1 = seaborn.scatterplot(x="year",y="output ",hue="country ", data=data_df)
# add plot title
plot1.set_title("production output by year (OICA data)", fontsize=22)
# add x- and y-axis labels
plot1.set_xlabel("year",fontsize=16)
plot1.set_ylabel("output",fontsize=16)
Text(0, 0.5, 'output')
# set graph size with matplotlib.pyplot
plt.figure(figsize=(20,15))
# rotate x-axis ticks by 90 degrees
plt.xticks(rotation=90)
# let's create a boxplot graph
plot2 = seaborn.boxplot(x="country ",y="output ",data=data_df)
# set title
plot2.set_title("annual production output distribution by nation, 1999 - 2018 (OICA data)",fontsize=22)
# add labels to x- and y-xis
plot2.set_xlabel("country",fontsize=16)
plot2.set_ylabel("annual production output",fontsize=16)
Text(0, 0.5, 'annual production output')
# swarm plots, using seaborn
# set graph size using matplotlib.pyplot
plt.figure(figsize=(20,15))
# rotate axis ticks on x-axis
plt.xticks(rotation=90)
# set default seaborn style
seaborn.set_style("whitegrid")
# set default context
seaborn.set_context("talk")
# create the swarm plot
plot3 = seaborn.swarmplot(x="country ",y="output ",data=data_df,color="green")
# set title
plot3.set_title("annual automotive industry production output by country from 1999 to 2018, according to OICA",fontsize=22)
# set axis labels
plot3.set_xlabel("country",fontsize=22)
plot3.set_ylabel("annual production output",fontsize=22)
# add reference line, using matplotlib.pyplot
plt.axhline(data_df["output "].mean(),color="blue")
<matplotlib.lines.Line2D at 0x2b02ec39b88>
# lastly, let's also create a line plot
# set graph size
plt.figure(figsize=(20,15))
# adjust context to "paper"
seaborn.set_context("paper")
# create line plot using seaborn
plot4 = seaborn.lineplot(x="year",y="output ",hue="country ",data=data_df)
# add title plot
plot4.set_title("OICA automotive industry production output time series, 1999 - 2018",fontsize=22)
# adjust axis labels
plot4.set_xlabel("year",fontsize=22)
plot4.set_ylabel("production output [units]",fontsize=22)
Leave a Reply