Vector representations of gene co-expression in single cell RNAseq.
python3 -m venv cenv
python3 setup.py install
from compass.data import Context, CompassDataset
context = Context.build(adata)
dataset = CompassDataset(context)
from compass.model import CompassTrainer
cmps = CompassTrainer(dataset,output_file="genes.vec", batch_size=200)
cmps.train(10) # run for 10 iterations
from compass.embedding import GeneEmbedding, CellEmbedding
gembed = GeneEmbedding("genes.vec", context)
cembed = CellEmbedding(context, gembed)
gembed.compute_similarities("CD8A")
gene_clusters = gembed.cluster()
gembed.plot(gene_clusters,labels=["C1QC","C1QA","TYROBP"])
cluster_definitions = gembed.cluster_definitions(gene_clusters)
gene_df = gembed.cluster_definitions_as_df(cluster_definitions,top_n=10)
gembed.relabel_cluster(cluster_definitions, gene_clusters, 6, "Cell Cycle")
cd8tcellvec = gembed.generate_vector(["CD8A","CD8B","CD3D","CD3E","CD3G"])
gembed.plot_similarity_matrix(["CD8A","CD8B","CD3D","CD3E","CD3G"], png="cd8_matrix.png")
plot_similarity_network(["CD8A","CD8B","CD3D","CD3E","CD3G"], png="cd8_graph.png"
tsne_embedding = cembed.plot(column="cell_type")
cembed.plot(column="batch_label",pcs=tsne_embedding)
clusters = cembed.cluster(k=6)
cembed.batch_correct(column="batch_label",clusters=clusters)
corrected_tsne_embedding = cembed.plot(column="batchlb")
cembed.plot_distance(cd8tcellvec,png="highlightcd8cells.png")