Inspired by R package kevinblighe/EnhancedVolcano
Input: Pandas df
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from adjustText import adjust_text
def volcanoplot(res=rank, pval_cutoff=0.95, pval_colour_threshold=0.05, log2FC_colour_threshold=1, pval_label_cutoff=0.05, log2FC_label_cutoff=0.5, dotsize=4, title='Volcano Plot'):
res=rank # Pandas df
pval_cutoff=pval_cutoff # exclude all genes > pval_cutoff, as these swamp the plot
pval_colour_threshold=pval_colour_threshold # threshold for colouring dots
log2FC_colour_threshold=log2FC_colour_threshold # threshold for colouring dots
pval_label_cutoff=pval_label_cutoff # cutoff for dot labels
log2FC_label_cutoff=log2FC_label_cutoff # cutoff for dot labels
dotsize=dotsize
title=title
toplot = res[res.pvals_adj <= pval_cutoff]
# plot
# plot non-significant genes with log2FC < log2FC_colour_threshold
plt.plot(toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].log2FC,
toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].nlog10_pval_adj, 'o',
color='#808080', alpha=.6, ms=dotsize, label='NS & log2FC < '+str(log2FC_colour_threshold)) # green
# plot non-significant genes with log2FC >= log2FC_colour_threshold
plt.plot(toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].log2FC,
toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].nlog10_pval_adj, 'o',
color='#1a9641', alpha=.6, ms=dotsize, label='NS & log2FC >= '+str(log2FC_colour_threshold)) # grey
# plot significant genes with log2FC < log2FC_colour_threshold
plt.plot(toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].log2FC,
toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].nlog10_pval_adj, 'o',
color='#6495ED', alpha=.6, ms=dotsize, label='Sign. & log2FC < '+str(log2FC_colour_threshold)) # blue
# plot significant genes with log2FC >= log2FC_colour_threshold
plt.plot(toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].log2FC,
toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].nlog10_pval_adj, 'o',
color='#FF3131', alpha=.6, ms=dotsize, label='Sign. & log2FC >= '+str(log2FC_colour_threshold)) # red
# axis labels etc
plt.xlabel('log2FC')
plt.ylabel('-log10(p)')
plt.title(title)
plt.legend(frameon=True, fontsize=12)
# dot labels
main_x = toplot[(toplot.pvals_adj<=pval_label_cutoff) & (toplot['log2FC'].abs()>=log2FC_label_cutoff)].log2FC
main_y = toplot[(toplot.pvals_adj<=pval_label_cutoff) & (toplot['log2FC'].abs()>=log2FC_label_cutoff)].nlog10_pval_adj
texts = []
for x, y, s in zip(main_x, main_y, list(main_x.index)):
texts.append(plt.text(x, y, s))
adjust_text(texts,force_text=(0.1,0.1),arrowprops=dict(arrowstyle="-",lw=1))
return(plt)