update test

zqfang · Dec 29, 2017 · 6f884a9 · 6f884a9
1 parent 9e6f5c8
commit 6f884a9
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 147 deletions.
diff --git a/tests/comparison.zip b/tests/comparison.zip
diff --git a/tests/test.ssgsea.R.r b/tests/test.ssgsea.R.r
@@ -71,105 +71,19 @@ ssgsea = function(X, gene_sets, alpha = 0.25, scale = T, norm = F, single = T) {
   return(es)
 }
 
-
 # setwd("~/github/GSEApy/tests/data")
-
 # comparison with gseapy.ssgsea
+X2 = read.table("./data/testSet_rand1200.gct", row.names = 1, header = T,
+                comment='#', sep="\t", stringsAsFactors = F)
+X3 = as.matrix.data.frame(X2[,-1])
+gene_sets = fgsea::gmtPathways("./data/randomSets.gmt")
 
-X2 = read.table("./data/P53_resampling_data2.txt", row.names = 1, header = T,sep="\t", stringsAsFactors = F)
-X3 = as.matrix.data.frame(X2)
-
-gm = c('GRB14',
-      'KAZALD1',
-      'POLR2I',
-      'C7orf26',
-      'MYOZ3',
-      'CRYBA4',
-      'C9orf85',
-      'PRPS1',
-      'C9',
-      'GTF2H4',
-      'PSME2',
-      'HAUS4',
-      'VPS16',
-      'SCOC',
-      'RHAG',
-      'AIF1',
-      'RPL41',
-      'C16orf5',
-      'LCT',
-      'C1orf83',
-      'GFAP',
-      'NUDCD3',
-      'ROGDI',
-      'HEATR1',
-      'MST1R',
-      'ZMPSTE24',
-      'HDAC1',
-      'NEO1',
-      'POLR3A',
-      'VPS54',
-      'F5',
-      'QKI',
-      'ITFG2',
-      'PPP2R3A',
-      'LIMS2',
-      'PCDH15',
-      'STOML2',
-      'FLT3',
-      'GABRR1',
-      'GNPDA2',
-      'PHLDA3',
-      'RARS',
-      'MRPS33',
-      'LCK',
-      'PTN',
-      'HRG',
-      'EIF3I',
-      'PMVK',
-      'UBOX5',
-      'VN2R1P',
-      'STAP2',
-      'CCNB3',
-      'ADAM8',
-      'LHCGR',
-      'PERP',
-      'COL1A2',
-      'ZSWIM1',
-      'BCAP29',
-      'PTP4A3',
-      'PIP4K2A',
-      'PRRX2',
-      'UHRF1',
-      'CEBPZ',
-      'UBE2J1',
-      'WFDC2',
-      'SGK2',
-      'ZBED3',
-      'CCDC82',
-      'TMOD1',
-      'CD2AP',
-      'C6orf203',
-      'TMEM85')
-
-gene_sets = list(raondom2=gm)
-print("Input gene set")
-print(gene_sets)
-print("\n\n\n")
-
-print("R implement of ssgsea with high speed:")
-es = ssgsea(X3, gene_sets)
-for (i in 1:length(es)){
-    outv = paste(i, colnames(es)[i], "-ES:", es[i], sep=" ")
-    print(outv)
-}
-
+# test
 system.time(assign('a', GSVA::gsva(X3, gene_sets, method = 'ssgsea')))
 system.time(assign('b', ssgsea(X3, gene_sets, scale = F, norm = T)))
 system.time(assign('b2', ssgsea(X3, gene_sets, scale = T, norm = F)))
 
 identical(a, b)
-
 print("TEST with GSVA::gsva(method='ssgsea')")
 print(a)
 

diff --git a/tests/test.ssgsea.gseapy.py b/tests/test.ssgsea.gseapy.py
@@ -9,10 +9,8 @@
 
 
 # In[3]:
-
-
-gp.__version__
-
+np.seterr(divide='ignore')
+print("GSEApy version: %s"%gp.__version__)
 
 # In[15]:
 
@@ -118,83 +116,46 @@ def enrichment_score_tensor(gene_mat, cor_mat, gene_sets, weighted_score_type, n
 
 
 # In[16]:
-
-
-gex = pd.read_table("./data/P53_resampling_data2.txt", index_col=0)
-
+gex = pd.read_table("./data/testSet_rand1200.gct", comment='#', index_col=0)
 # In[18]:
-
-
 gmt = gp.parser.gsea_gmt_parser("./data/randomSets.gmt")
-
-
-# In[19]:
-
-
-gmt.keys()
-
-
-# In[20]:
-
-
-gm = gmt['random2']
-
-
-# In[21]:
-
-print("Input gene set")
-print(gm)
-
-
 # In[24]:
 
 
-names=[]
-es = []
-hits = []
+
+df = pd.DataFrame(index=sorted(gmt.keys()))
 rs = np.random.RandomState(0)
 gexrnk = gex.rank(axis=0, method='average', na_option='bottom')
 for name, ser in gexrnk.iteritems():
     ser = ser.sort_values(ascending=False)
-    est = enrichment_score_tensor(gene_mat=ser.index.values, cor_mat=ser.values, gene_sets={'random2':gm},
+    est = enrichment_score_tensor(gene_mat=ser.index.values, cor_mat=ser.values, gene_sets=gmt,
                                   weighted_score_type=0.25, nperm=0,
                                   scale=True, single=True, rs=rs)[0]
-    es.append(est)
-    names.append(name)
+    df[name]=est
+
 
 
 # In[25]:
 print("\n\n\n")
 print("Scaled Enrichment Scores (ES):")
-for n, e  in zip(names, es):
-    print(n, ": ", e)
-
-
-# In[26]:
-
+print(df)
 
 ## no scale es
-names=[]
-es = []
-hits = []
+df2 = pd.DataFrame(index=sorted(gmt.keys()))
 rs = np.random.RandomState(0)
 gexrnk = gex.rank(axis=0, method='average', na_option='bottom')
 for name, ser in gexrnk.iteritems():
     ser = ser.sort_values(ascending=False)
-    est = enrichment_score_tensor(gene_mat=ser.index.values, cor_mat=ser.values, gene_sets={'random2':gm},
-                                  weighted_score_type=0.25, nperm=100,
+    est = enrichment_score_tensor(gene_mat=ser.index.values, cor_mat=ser.values, gene_sets=gmt,
+                                  weighted_score_type=0.25, nperm=0,
                                   scale=True, single=True, rs=rs)[0]
-    es.append(est)
-    names.append(name)
-
+    df2[name]=est
 
 # In[27]:
-
-
 ## no scale es values and norm by all samples
-es = np.array(es)
-nes = es/(es.max() -es.min())
+nes = df2/(df2.values.max() -df2.values.min())
 print("\n\n\n")
 print("Normalized Enrichment Scores (NES):")
-for n, e  in zip(names, nes):
-    print(n, ": ", e)
+# for n, e  in zip(names, nes):
+#     print(n, ": ", e)
+print(df2)