pasztora
7/18/2019 - 12:52 PM

Matplotlib visualization (advanced)

# Making a bar plot with 2 dataseries, calculating the mean for each strain group and counting the groups
import matplotlib.pyplot as plt

ax = df3.groupby(['Strain'])['Max titer [g/L]'].agg(['mean', 'count']).sort_values('mean').plot(kind='barh', fontsize=7, grid=True)
ax.set(xlabel="3'SL [g/L]", ylabel="Strain name")

# Basic boxplots
df.boxplot(column=["Max titer [g/L]"], by=["Process"], rot=90)

# Basic scatter plot
df.plot.scatter(x="Max titer [g/L]", y="Yps at max titer [g/g]")

# Advanced scatter plots
# Combining scatter plots into 1 chart
fig, ax = plt.subplots()
df2[df2["Strain"] == "MAP1000"].plot.scatter(x="Max titer [g/L]", y="Yps at max titer [g/g]", ax=ax, color="orange", label="MAP1000")
df2[df2["Strain"] == "MAP1001d"].plot.scatter(x="Max titer [g/L]", y="Yps at max titer [g/g]", ax=ax, color="grey", label="MAP1001d")
df2[df2["Strain"] == "MAP1001g"].plot.scatter(x="Max titer [g/L]", y="Yps at max titer [g/g]", ax=ax, color="green", label="MAP1001g")
df2[df2["Strain"] == "MAP1001h"].plot.scatter(x="Max titer [g/L]", y="Yps at max titer [g/g]", ax=ax, color="red", label="MAP1001h")
ax.grid(linewidth=0.25)

# Stacked bar plots based on 2 df columns, combined with groupby
df.groupby(['Strain'])[["2FL [g/kg]", "DFL [g/kg]"]].mean().plot.bar(stacked=True, rot=0)

# Barplots with subplots sharing x-axis
axes = df[["2FL [g/kg]", "DFL [g/kg]", "DFL ratio [g/g]"]].plot.bar(grid=True, subplots=True)