Home

Matplotlib

Is a widely used graphing/illustrating library for python. Again this is a cheatsheet of sorts. For more in depth options see the Matplotlib website. There you can find a multitude of tutorials and examples

NB: Avoid using the function show() more than once in a session.

Creation

Plot

import matplotlib as mpl          # Conventional aliases
import matplotlib.pyplot as plt

# Optional - When working in a jupyter notebook opt for the latter to keep graphs inside your page
# %matplotlib
%matplotlib inline    #creates static images/plots
# %matplotlib notebook  #creates interactive plots

fig = plt.figure()                 # Creates the graphing object
plt.plot(x, np.sin(x), '-')        # will create and display the plot
#fig.savefig('someName.png')       # Will save to the current directory
plt.xlim(-1, 11)                   # alter axis interval
plt.ylim(-1.5, 1.5);               # Reverse args to reverse the graph
#plt.axis([-1, 11, -1.5, 1.5]);    # Alternative Method

#plt.axis('tight');                # will minimize the axes interval 
#plt.axis('equal');                # make axes the same

plt.title("Sine/Cosine Curves")    # Add Title bar
plt.xlabel("x-axis")               #
plt.ylabel("y-axis")               #
plt.plot(x, np.sin(x), '-',label='np.sin(x)') 
plt.legend()                       # Show legend. Req's label var from plot function

Plot Types

Lines

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline    
plt.style.use('dark_background')   # Sets the aesthetic style
#ref https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html

x = np.linspace(0, 10, 1000)
fig = plt.figure()
ax = plt.axes()
ax.plot(x, np.sin(x));                # could also use plt which creates the axes in the background
ax.plot(x, np.cos(x));                # Overlapping graphs is easy peazy 
# so is changing the color (HexCode, RGB, HTML name are all supported options)
ax.plot(x, np.cos(x-1)
    ,color='red'
    ,linestyle='dashed');  
ax.plot(x, np.sin(x-1)
    ,color='#f444ff'
    ,linestyle=':'); 
ax.plot(x, np.sin(x-2)
    ,'--g');
ax.plot(x, x/10)

plt.title("Sine/Cosine Curves")
plt.xlabel("x-axis")
plt.ylabel("y-axis")
Out[1]:
Text(0,0.5,'y-axis')

Scatter

ex1 - Plot()

The main difference between a line & scatter plot is the use of the marker argument in the function call. (try using a '-' as a marker and you'll get a line)

In [2]:
plt.style.use('seaborn-whitegrid')
rng = np.random.RandomState(7)
# the -ok will draw a line connecting the points, as well as the points themselves

for marker in ['o', '.', ',', 'x', '+', '-ok', '^', '<', '>', 's', 'd']:
    plt.plot(rng.rand(5), rng.rand(5), marker,
             label="marker='{0}'".format(marker))
plt.legend(numpoints=1)
plt.xlim(0, 1.8);

ex2 - Scatter()

The scatter plotter has the added advantage of being able to track & change a single point's characteristic. However, it is also less efficient as the data grows in size.

In [3]:
rng = np.random.RandomState(25)
colors = rng.rand(100)
sizes = 1000 * rng.rand(100)

plt.scatter(rng.randn(100), rng.randn(100), c=colors, s=sizes, alpha=0.3,
            cmap='Paired')
plt.colorbar();  

Error Bars

Finite data

In [4]:
x = np.linspace(0, 10, 50)
dt = 0.8
y = np.sin(x) + np.random.randn(50) * dt

#plt.plot(x,y,'.')                       # To see the actual data
plt.errorbar(x, y, yerr=dt, fmt='ok')
Out[4]:
<Container object of 3 artists>

Cont'n Data

In [5]:
from sklearn.gaussian_process import GaussianProcess
from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings('ignore')

# define the model and draw some data
model = lambda x: x * np.sin(x)
#Create some finite points
xdata = np.array([1, 3, 5, 6, 8])
ydata = model(xdata)
#plt.plot(xdata, ydata, 'or')

# Compute the Gaussian process fit
gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1E-1,
                     random_start=100)
gp.fit(xdata[:, np.newaxis], ydata)

xfit = np.linspace(0, 10, 1000)
yfit, MSE = gp.predict(xfit[:, np.newaxis], eval_MSE=True)
dyfit = 2 * np.sqrt(MSE)  # 2*sigma ~ 95% confidence region

# Visualize the result
plt.plot(xdata, ydata, 'or')
plt.plot(xfit, yfit, '-', color='gray')

# Add the grey shading between the upper and lower bounds
plt.fill_between(xfit, yfit - dyfit, yfit + dyfit,
                 color='gray', alpha=0.2)

plt.xlim(0, 10);

# The red points are our data points
# the grey represents the amount of error, or variance, at that point

Contour plots

Are good for visualizing 3dimensional data in a 2dimensional space

In [6]:
def f(x, y):
    return np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)

x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 40)

X, Y = np.meshgrid(x, y)
Z = f(X, Y)

# plt.cm.<tab> to see list of cmaps
# Method 1
#plt.contour(X, Y, Z, colors='black');
#plt.contour(X, Y, Z, 20, cmap='RdGy');
#plt.colorbar();  #Now we can interpret the black as peaks, and red as valleys

# Method 2
#plt.imshow(Z, extent=[0, 5, 0, 5], origin='lower',
#           cmap='RdGy')
#plt.colorbar()
#plt.axis(aspect='image');

contours = plt.contour(X, Y, Z, 3, colors='black')
plt.clabel(contours, inline=True, fontsize=8)
plt.imshow(Z, extent=[0, 5, 0, 5], origin='lower',
           cmap='RdGy', alpha=0.5)
plt.colorbar();

Histograms

1Dim

In [7]:
data = np.random.randn(1000)
plt.hist(data, bins=30, normed=True, alpha=0.5,
         histtype='stepfilled', color='steelblue',
         edgecolor='none');
In [8]:
x1 = np.random.normal(0, 0.8, 1000)
x2 = np.random.normal(-2, 1, 1000)
x3 = np.random.normal(3, 2, 1000)

kwargs = dict(histtype='stepfilled', alpha=0.3, normed=True, bins=40)

plt.hist(x1, **kwargs, color='green')
plt.hist(x2, **kwargs, color='red')
plt.hist(x3, **kwargs, color='blue');

2Dim

In [9]:
mean = [0, 0]
cov = [[1, 1], [1, 2]]
x, y = np.random.multivariate_normal(mean, cov, 10000).T

#Basic
#ref https://matplotlib.org/api/_as_gen/matplotlib.pyplot.hist2d.html
#plt.hist2d(x, y, 
#           bins=30, 
#           cmap='Blues')
#cb = plt.colorbar()
#cb.set_label('counts in bin')

#If you prefer hexagons
#plt.hexbin(x, y, gridsize=30, cmap='Blues')
#cb = plt.colorbar(label='count in bin')

#Smoothing out the data, or smearing
from scipy.stats import gaussian_kde
# fit an array of size [Ndim, Nsamples]
data = np.vstack([x, y])
kde = gaussian_kde(data)

# evaluate on a regular grid
xgrid = np.linspace(-3.5, 3.5, 40)
ygrid = np.linspace(-6, 6, 40)
Xgrid, Ygrid = np.meshgrid(xgrid, ygrid)
Z = kde.evaluate(np.vstack([Xgrid.ravel(), Ygrid.ravel()]))

# Plot the result as an image
plt.imshow(Z.reshape(Xgrid.shape),
           origin='lower', aspect='auto',
           extent=[-3.5, 3.5, -6, 6],
           cmap='Blues')
cb = plt.colorbar()
cb.set_label("density")

Radial

In [10]:
r = np.arange(0, 2, 0.01)
theta = 2 * np.pi * r

ax = plt.subplot(111, projection='polar')
ax.plot(theta, r)
ax.set_rmax(2)
ax.set_rticks([0.5, 1, 1.5, 2])  # less radial ticks
ax.set_rlabel_position(-22.5)  # get radial labels away from plotted line
ax.grid(True)

ax.set_title("A line plot on a polar axis", va='bottom')
#plt.show();
Out[10]:
Text(0.5,1.05,'A line plot on a polar axis')

Customizations

Legends

ref: https://jakevdp.github.io/PythonDataScienceHandbook/04.06-customizing-legends.html

ax.plot(... , label='Sine')        # Adds the label to the plot
ax.legend(                         # makes the legend appear
    loc      ='upper left'         # location_of_legend
    frameon  = False               # add frame/border
    ncol     = 2                   # number of columns
    fancybox =True                 # add rounded corners
    shadow   =True                 # add shading 
    )
In [11]:
plt.style.use('classic')
x = np.linspace(0, 10, 1000)
I = np.sin(x) * np.cos(x[:, np.newaxis])

plt.imshow(I)
plt.colorbar();

Multi-plots

ref: https://jakevdp.github.io/PythonDataScienceHandbook/04.08-multiple-subplots.html

Creating multiple graphs in one picture.

Ex1 - add_axes()

In [12]:
plt.style.use('seaborn-white')

fig = plt.figure()
# We create 2 axes or panels
#                   lft  btm  wdt  hgt 
ax1 = fig.add_axes([0.1, 0.5, 0.8, 0.4],
                   xticklabels=[], ylim=(-1.2, 1.2))

# note that btm+hgt = 0.5 = upper panel bottom
ax2 = fig.add_axes([0.1, 0.1, 0.8, 0.4],
                   ylim=(-1.2, 1.2))

x = np.linspace(0, 10)
ax1.plot(np.sin(x))
ax2.plot(np.cos(x));
In [13]:
fig = plt.figure()
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i in range(1, 7):
    ax = fig.add_subplot(2, 3, i)
    ax.text(0.5, 0.5, str((2, 3, i)),
           fontsize=18, ha='center')

Ex2 - subplots()

In [14]:
fig, ax = plt.subplots(2, 3, sharex='col', sharey='row')

for i in range(2):
    for j in range(3):
        ax[i, j].text(0.5, 0.5, str((i, j)),
                      fontsize=18, ha='center')

Ex3 - GridSpec()

In [15]:
# Create some normally distributed data
mean = [0, 0]
cov = [[1, 1], [1, 2]]
x, y = np.random.multivariate_normal(mean, cov, 3000).T

# Set up the axes with gridspec
fig = plt.figure(figsize=(6, 6))
grid = plt.GridSpec(4, 4, hspace=0.2, wspace=0.2)
main_ax = fig.add_subplot(grid[:-1, 1:])
y_hist = fig.add_subplot(grid[:-1, 0], xticklabels=[], sharey=main_ax)
x_hist = fig.add_subplot(grid[-1, 1:], yticklabels=[], sharex=main_ax)

# scatter points on the main axes
main_ax.plot(x, y, 'ok', markersize=3, alpha=0.2)

# histogram on the attached axes
x_hist.hist(x, 40, histtype='stepfilled',
            orientation='vertical', color='gray')
x_hist.invert_yaxis()

y_hist.hist(y, 40, histtype='stepfilled',
            orientation='horizontal', color='gray')
y_hist.invert_xaxis()
In [16]:
births = pd.read_csv('data/z0010_MonthlyBirthRate.csv')
# Columns = [year,month,day,gender,births]

quartiles = np.percentile(births['births'], [25, 50, 75])
mu, sig = quartiles[1], 0.74 * (quartiles[2] - quartiles[0])
births = births.query('(births > @mu - 5 * @sig) & (births < @mu + 5 * @sig)')

births['day'] = births['day'].astype(int)

births.index = pd.to_datetime(10000 * births.year +
                              100 * births.month +
                              births.day, format='%Y%m%d')
births_by_date = births.pivot_table('births',
                                    [births.index.month, births.index.day])
births_by_date.index = [pd.datetime(2012, month, day)
                        for (month, day) in births_by_date.index]

fig, ax = plt.subplots(figsize=(12, 4))
births_by_date.plot(ax=ax);

# Add labels
style = dict(size=10, color='red')
#        xVal       yVal   text             style obj
ax.text('2012-1-1', 3950, "New Year's Day", **style)
ax.text('2012-7-4', 4250, "Independence Day", ha='center', **style)
ax.text('2012-9-4', 4850, "Labor Day", ha='center', **style)
ax.text('2012-10-31', 4600, "Halloween", ha='right', **style)
ax.text('2012-11-25', 4450, "Thanksgiving", ha='center', **style)
ax.text('2012-12-25', 3850, "Christmas ", ha='right', **style)

# Label the axes
ax.set(title='USA births by day of year (1969-1988)',
       ylabel='average daily births')

# Format the x axis with centered month labels
ax.xaxis.set_major_locator(mpl.dates.MonthLocator())
ax.xaxis.set_minor_locator(mpl.dates.MonthLocator(bymonthday=15))
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_minor_formatter(mpl.dates.DateFormatter('%h'));


print(births_by_date.head())
              births
2012-01-01  4009.225
2012-01-02  4247.400
2012-01-03  4500.900
2012-01-04  4571.350
2012-01-05  4603.625

By x.y Coordinates

In [17]:
plt.style.use('seaborn-whitegrid')

fig, ax = plt.subplots(facecolor='lightgray')
ax.axis([0, 10, 0, 10])

# text align defaults to the top right of the point specified
# transform=ax.transData is the default, but we'll specify it anyway
ax.text(1, 5, ". Data: (1, 5)", transform=ax.transData)
ax.text(0.5, 0.1, ". Axes: (0.5, 0.1)", transform=ax.transAxes)
ax.text(0.2, 0.2, ". Figure: (0.2, 0.2)", transform=fig.transFigure);

Arrows

In [18]:
#%matplotlib inline

fig, ax = plt.subplots()

x = np.linspace(0, 20, 1000)
ax.plot(x, np.cos(x))
ax.axis('equal')

#            text                start_pt          end_pt
ax.annotate('local maximum', xy=(6.28, 1), xytext=(10, 4),
            arrowprops=dict(facecolor='black', shrink=0.05))

# This should produce a curved arrow but it fails silently
#ax.annotate('local minimum', xy=(5 * np.pi, -1), xytext=(2, -6),
#            arrowprops=dict(arrowstyle="->",facecolor='black',
#                            connectionstyle="angle3,angleA=0,angleB=-90"));


ax.annotate('local minimum', xy=(5 * np.pi, -1), xytext=(2, -6),
           arrowprops=dict(facecolor='black', shrink=0.05));

Misc tips & tricks

Axis Tick marks

ax = plt.axes(xscale='log', yscale='log')
In [19]:
import matplotlib.pyplot as plt
plt.style.use('classic')
%matplotlib inline
import numpy as np

#Ticks disappear for scale=log  why???
ax = plt.axes(xscale='log', yscale='linear')
ax.grid();

#for label in ax.get_xticklabels() + ax.get_yticklabels():
#    label.set_fontsize(16)
#    label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.65))
    
In [20]:
plt.plot([1, 2])
ax = plt.gca()
plt.draw()
#labels = ax.get_xticklabels()
#for label in labels:
#    print(label)
In [21]:
ax = plt.axes()
ax.plot(np.random.rand(50))

ax.yaxis.set_major_locator(plt.NullLocator())
#ax.xaxis.set_major_formatter(plt.NullFormatter())