This repository has been archived by the owner on Jul 7, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
combine_macrostates.py
112 lines (96 loc) · 4.93 KB
/
combine_macrostates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env sage -python
import sys
from sage.all import *
import resource
from os import path, access, R_OK # W_OK for write permission
import re
""" This module contains function that combines the random macrostates generated by multiple non-interacting servers into a single
directory. Each server is expected to have created a directory with many text files, where each text file
holds randomly generated macrostates for a given combination of N and S. This program combines each of
those directories and their associated text files of macrostates into a single directory of text files.
For the continued generation of random macrostates, the newly created 'combined' directory is sent back out
to the non-interacting servers. There, the programs on each server receive an updated list of the macrostates
that have already been generated by the group of servers. """
# For each name in datasets,
# there should be a directory with that name and a text file holding site,
# species, and species abundance data in columns:
# e.g. site1, K sonoriense, 15
# site1, K baurii, 10
# site1, K flavescens, 8
# site2, K sonoriense, 18
# site2, K baurii, 13
# site2, K flavescens, 12
def get_NS_combos(datasets): # a function to get all unique N-S combinations in datasets
NS_combos = []
for dataset in datasets:
DATA = open('/home/kenlocey/data/' + dataset + '/' + dataset + '-data.txt','r')
ct1 = 0
ct2 = 0
d = DATA.readline()
m0 = re.match(r'\A\S*',d).group()
m2 = int(re.findall(r'\d*\S$',d)[0])
SAD = [int(m2)]
num = 0
for d in DATA: # for each line in the dataset
ct1+=1
m1 = re.match(r'\A\S*',d).group()
if m1 == m0:
m2 = int(re.findall(r'\d*\S$',d)[0])
if m2 > 0:SAD.append(m2)
else:
site_name = m0
m0 = m1
if len(SAD) > 9:
NS_combos.append([sum(SAD),len(SAD)])
ct2+=1
SAD = []
abundance = int(re.findall(r'\d*\S$',d)[0])
if abundance > 0:SAD.append(abundance)
return [list(x) for x in set(tuple(x) for x in NS_combos)]
def combine_macrostates(datasets):
NS_combos = get_NS_combos(datasets)
print len(NS_combos) # number of unique N-S combinations
""" At this point, we have our list of unique N-S combinations. Now, we go down the list, combination-by-combination
and combine the macrostates that were generated by independently working servers into a single text file. Eventually,
this will be changed so that macrostates are added to a python dictionary. """
folders = []
for name in os.listdir('/home/kenlocey/EC2-results'): # Here, EC2-results is a directory holding subdirectories from independently
folders.append(name) # working servers. Each subdirectory holds many text files of macrostates
# having specific N-S combinations.
folders = ['wash','jayne'] # if there is a specific subset of folders to be used
print folders
ct = 0
for combo in NS_combos:
N = int(combo[0])
S = int(combo[1])
fat_list = []
for name in folders:
PATH = '/home/kenlocey/EC2-results/'+name+'/combined/'+str(N)+'-'+str(S)+'.txt'
if path.exists(PATH) and path.isfile(PATH) and access(PATH, R_OK):
data = open(PATH,'r')
SADs = data.readlines()
for sad in SADs:
fat_list.append(eval(sad))
data.close()
PATH = '/home/kenlocey/combined1/'+str(N)+'-'+str(S)+'.txt'
if path.exists(PATH) and path.isfile(PATH) and access(PATH, R_OK):
data = open(PATH,'r')
SADs = data.readlines()
for sad in SADs:
fat_list.append(eval(sad))
data.close()
#PATH = '/home/kenlocey/combined/'+str(N)+'-'+str(S)+'.txt'
#if path.exists(PATH) and path.isfile(PATH) and access(PATH, R_OK):
# data = open(PATH,'r')
# SADs = data.readlines()
# for sad in SADs:
# fat_list.append(eval(sad))
# data.close()
#else: print 'no macrostates for combo',N,'-',S,'in',name
if len(fat_list) > 0:
combined = open('/home/kenlocey/combined1/'+str(N)+'-'+str(S)+'.txt','w')
clean_list = [list(x) for x in set(tuple(x) for x in fat_list)] # a list of unique macrostates gathered from files that were
ct+=1 # generated by independently working servers
print N,S,' ',len(clean_list),' ',ct
for sad in clean_list: print>>combined,sad
combined.close()