forked from sua474/winnowing-pipeline-merged
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop25_cloud.py
67 lines (48 loc) · 2.2 KB
/
top25_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import pylab
import networkx
import matplotlib.pyplot as plt
import numpy as np
import minepy
data = pd.read_csv('/home/michelle/PycharmProjects/pipeline/weighted/bromeA_all-graph_centrality-eigenvector-select25by25-abundances.csv', header=0, index_col=0)
#create a pandas dataframe from the csv file with the otus, samples, and corresponding abundances
corr_type = 'MIC'
if corr_type == 'MIC':
# the pstats output for the mic and tic are condensed 1D arrays
# we need to turn the output into a 2D upper triangular matrix and then mirror it to get the
# full correlation matrix
micp, ticp = minepy.pstats(data.T, alpha=0.6, c=15, est="mic_approx")
num_features = data.shape[1]
tri = np.zeros((num_features, num_features))
tri[np.triu_indices(num_features, 1)] = micp
full_corr = tri + tri.T
corr_matrix = pd.DataFrame(full_corr)
corr_matrix.columns = data.columns.values
corr_matrix.index = data.columns.values
else:
corr_matrix=data.corr(method='spearman')
#create an nxn spearman correlation matrix
def inverse_correlation(matrix):
one=matrix.abs()
two=one.copy() -1
inv_corr= two.abs()
return inv_corr
#inverse the correlation matrix so that 0 means fully correlated and 1 means not correlated
inv_correlation=inverse_correlation(corr_matrix)
inv_correlation[(inv_correlation.abs() > 0.5)] = 1
#make any number above 0.5 equal to 1
labels=list(inv_correlation.index)
inv_correlation.insert(0,'var1',labels)
#insert a labels column called 'var1' of the OTUs to be used to melt the matrix into 3 columns
butter=pd.melt(inv_correlation,'var1',var_name='var2',value_name='edge')
#melt the matrix into three columns
print (butter)
butter=butter.loc[(butter['edge'] != 1)]
#only keep the edge values that are not 1
#butter.to_csv('/home/michelle/PycharmProjects/pipeline/bromeA_all-graph_centrality-degree-select25by25-dataframe.csv')
graph=networkx.from_pandas_dataframe(butter,'var1','var2','edge')
networkx.write_graphml(graph,'eigenvector_weighted_25by25_spearman.graphml')
#networkx.draw_networkx(graph,node_color='dodgerblue',edge_color='dimgrey')
#networkx.draw(graph,with_labels=True)
#pylab.show()
#make the networkx graph and show the graph