In [2]:
from fish_helpers import *
from scipy.io import mmread
import tqdm.notebook as tqdm
import seaborn as sns
import random
import operator
import time
from random import sample
from functools import partial
from multiprocessing import Pool
from random import sample
import sys
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import plotly.express as px
from plotly.subplots import make_subplots
import pylab as pl
from IPython import display

base_path = '/bigstore/GeneralStorage/Zach/MERCI/Autism/'
Gene_Matrix_Avg = pickle.load(open(os.path.join(base_path,'Gene_Matrix_Avg.pkl'),'rb'))
Gene_Matrix_std = pickle.load(open(os.path.join(base_path,'Gene_Matrix_std.pkl'),'rb'))
out_path = '/bigstore/GeneralStorage/Zach/MERCI/Autism/Results'
if not os.path.exists(out_path):
    os.mkdir(out_path)

In [3]:
def codebook_generator(min_length=4,max_length=8,hamming=2,positive_bits=3,bits='01',wait=0.5):
    codebook_dict = {}
    ne = operator.ne
    for i in range(max_length+1):
        if i<min_length:
            continue
        best_candidates = []
        very_last_time = time.time()
        base_str = str(''.join('1' for i in range(positive_bits)))+str(''.join('0' for i in range(i-positive_bits)))
        print(base_str)
        while time.time()-very_last_time<10:
            candidates = []
            bad_candidates = []
            last_time = time.time()
            while time.time()-last_time<wait:
                candidate = ''.join(sample(base_str,len(base_str)))
                if candidate in candidates:
                    continue
                elif candidate in bad_candidates:
                    continue
                good = 0
                for can in candidates:
                        if sum(map(ne, can, candidate)) < hamming:
                            good+=1
                            bad_candidates.append(candidate)
                            break
                if good==0:
                    candidates.append(candidate)
                    last_time = time.time()
            if len(candidates)>len(best_candidates):
                best_candidates = candidates
                very_last_time = time.time()

        print(i,' Total Barcodes: ',len(best_candidates))
        codebook_dict[str(i)] = best_candidates
    return codebook_dict

In [4]:
def generate_random_codebook(codebook_dict,Gene_Matrix_Avg,key='6'):
    Gene_Matrix_Avg = Gene_Matrix_Avg.drop(columns=[i for i in Gene_Matrix_Avg.columns if 'Blank' in i])
    key = str(key) # if len cell types > len codewords increase key
    codebook = np.zeros([len(codebook_dict[key]),len(codebook_dict[key][1])])
    for y,barcode in enumerate(codebook_dict[key]):
        for x,bit in enumerate(str(barcode)):
            codebook[y,x] = int(bit)
    multiplexed_codebook_df = pd.DataFrame(data=codebook)
    Gene_Matrix_Avg['blank'] = 0
    for i in range(len(multiplexed_codebook_df.index)-len(Gene_Matrix_Avg.columns)):
        Gene_Matrix_Avg['Blank'+str(i)] = 0
    multiplexed_codebook_df.index = random.sample(list(Gene_Matrix_Avg.columns),len(multiplexed_codebook_df.index))
    codebook_df = multiplexed_codebook_df.sort_index()
    codebook_df.loc['blank']=codebook_df.iloc[0]*0
    return codebook_df.drop(index=[i for i in codebook_df.index if 'Blank' in i])

In [5]:
def generate_random_species(genes,nbits=6,ngenes=25): # Prefilter genes
    genes = list(genes)
    species = pd.DataFrame(index=range(ngenes),columns=range(nbits))
    for i in range(nbits):
        species[i]=random.sample(genes,ngenes)
    return species

In [6]:
def generate_mutations_codebook(population,mutation_rate=1,offspring_size=10):
    mutations = []
    for codebook_df,survivors in population:
        for n in range(offspring_size):
            codebook_array = np.array(codebook_df)
            for i in range(mutation_rate):
                x = random.sample(range(len(codebook_df.index)),1)
                y = random.sample(range(len(codebook_df.columns)),1)
                if codebook_array[x,y]==1:
                    codebook_array[x,y]=0
                else:
                    codebook_array[x,y]=1
            new_codebook_df = pd.DataFrame(codebook_array,index=codebook_df.index,columns=codebook_df.columns)
            mutations.append([new_codebook_df,survivors])
    return mutations

In [7]:
def generate_starting_population(Gene_Matrix_Avg,genes,nbits=7,ngenes=25,keepers=10):
    codebook_dict = codebook_generator(min_length=nbits,max_length=nbits)
    population_codebook = []
    for i in range(keepers):
        codebook_df = generate_random_codebook(codebook_dict,Gene_Matrix_Avg,key=str(nbits))
        survivors = []
        for k in range(keepers):
            survivors.append(generate_random_species(genes,nbits=nbits,ngenes=ngenes))
        population_codebook.append([codebook_df,survivors])
    return population_codebook

In [8]:
def generate_offspring_species(survivors,ngenes=25,offspring_size=5):
    offspring = []
    for species in survivors:
        for i in range(offspring_size):
            mate = random.sample(survivors,1)[0]
            child = species.copy()
            for bit in child.columns:
                genes = list(np.unique(list(species[bit])+list(mate[bit])))
                while len(genes)<ngenes:
                    genes.append(random.sample(genes,1)[0])
                child[bit] = random.sample(genes,ngenes)
            offspring.append(child)
    return offspring

In [9]:
def generate_mutations_species(progenerators,genes,ngenes=25,mutation_rate=1):
    mutations = []
    for species in progenerators:
        mutated = species.copy()
        for bit in species.columns:
            gene_list = list(mutated[bit])
            for i in range(mutation_rate):
                gene = random.sample(genes,1)
                while gene in gene_list:
                    gene = random.sample(genes,1)
                gene_list[random.randint(0,ngenes-1)] = gene[0]
            mutated[bit] = gene_list
        mutations.append(mutated)
    return mutations

In [10]:
def populate_species(survivors,genes,ngenes=25,offspring_size=5,mutation_rate=1):
    offspring = generate_offspring_species(survivors,ngenes=ngenes,offspring_size=offspring_size)
    progenerators = survivors+offspring
    mutations = generate_mutations_species(progenerators,genes,ngenes=ngenes,mutation_rate=mutation_rate)
    population = survivors+offspring+mutations
    return population

In [11]:
def calculator_species(species,Gene_Matrix_Avg,codebook_df):
    Readout_df = pd.DataFrame(index=codebook_df.index,columns=species.columns).sort_index()
    Readout_df = Readout_df.drop(index=[i for i in Readout_df.index if 'Blank' in i])
    errors = []
    for bit in species.columns:
        Readout_df[bit] = np.sum(Gene_Matrix_Avg.loc[list(species[bit])],axis=0)
        Readout_df.loc['blank'] = 0
        pos_cells = list(codebook_df[codebook_df[bit]==1].index)
        neg_cells = list(codebook_df[codebook_df[bit]!=1].index)
        distances = []
        for p in pos_cells:
            for n in neg_cells:
                dist = np.abs(Readout_df[bit].loc[p]-Readout_df[bit].loc[n]) #maybe abs is a bad idea
                distances.append(dist)
        errors.append(np.min(distances)+np.average(distances))
    error = np.min(errors)
    if error!=0:
        Error = 1/error
    else:
        Error = 0
    return species,Readout_df,Error

In [12]:
def selection(codebook_dfs,Readout_dfs,population,Errors,keepers=10):
    if len(codebook_dfs) != len(Readout_dfs):
        cdfs = []
        for i in range(len(Readout_dfs)):
            cdfs.append(codebook_dfs)
    else:
        cdfs = codebook_dfs
    df = pd.DataFrame()
    df['codebook_dfs'] = cdfs
    df['population'] = population
    df['Readout_dfs'] = Readout_dfs
    df['Errors'] = Errors
    df = df.sort_values('Errors',ascending=False)
    df = df.iloc[0:keepers]
    survivor_codebooks = list(df['codebook_dfs']) 
    survivors= list(df['population'])
    survivor_Readout_dfs = list(df['Readout_dfs'])
    survivor_errors = list(df['Errors'])
    if len(codebook_dfs) != len(Readout_dfs):
        return codebook_dfs,survivors,survivor_Readout_dfs,survivor_errors
    else:
        return survivor_codebooks,survivors,survivor_Readout_dfs,survivor_errors

In [13]:
def artificial_selection_species(codebook_df,new_population,Gene_Matrix_Avg,ngenes=25,keepers=10,ncpu=10,offspring_size=5,mutation_rate=5,iterations=2):
    #Add iterations, mutations and breeding
    genes = list(Gene_Matrix_Avg.index)
    for i in range(iterations):
        population_species = populate_species(new_population,genes,ngenes=ngenes,offspring_size=offspring_size,mutation_rate=mutation_rate)
        Errors = []
        Readout_dfs = []
        new_population = []
        if ncpu==1:
            for individual_species in population_species:#tqdm.tqdm(population):
                species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
                Readout_dfs.append(Readout_df)
                new_population.append(species)
                Errors.append(Error)
        else:
            pfunc = partial(calculator_species,Gene_Matrix_Avg=Gene_Matrix_Avg,codebook_df=codebook_df)
            p = Pool(ncpu)
            sys.stdout.flush()
            for species,Readout_df,Error in p.imap(pfunc,population_species,chunksize=1):#tqdm.tqdm(p.imap(pfunc,population,chunksize=1),total=len(population)):
                Readout_dfs.append(Readout_df)
                new_population.append(species)
                Errors.append(Error)
            sys.stdout.flush()
            p.close()
            p.join()
        unused,survivors,survivor_dfs,survivor_errors = selection(Readout_dfs,Readout_dfs,new_population,Errors,keepers=keepers)
    return codebook_df,survivors,survivor_dfs,survivor_errors

In [14]:
def calculate_error_codebook(Readout_df):
    distances = []
    for x,a in enumerate(Readout_df.index):
        if 'Blank' in a:
            continue
        for y,b in enumerate(Readout_df.index):
            if 'Blank' in b:
                continue
            if x<y:
                A = np.array(Readout_df.loc[a])
                B = np.array(Readout_df.loc[b])
                dist = np.linalg.norm(A-B)
                distances.append(dist)
#     Error = ((np.average(distances)-np.min(distances))*np.std(distances))/np.average(distances) #Cost Function
    Error = np.min(distances)#1/(np.average(distances)+np.min(distances))
    return Error

In [15]:
def artificial_selection_codebook_pfunc(Individual,Gene_Matrix_Avg,offspring_size=5,mutation_rate=1,keepers=10,ngenes=25,ncpu=30):
    genes = list(Gene_Matrix_Avg.index)
    codebook_df,survivors_species = Individual.copy()
#     population_species = populate_species(survivors_species,genes,ngenes=ngenes,offspring_size=offspring_size,mutation_rate=mutation_rate)
    codebook_df,survivors,survivor_dfs,survivor_errors = artificial_selection_species(codebook_df,survivors_species,Gene_Matrix_Avg,ngenes=ngenes,keepers=keepers,ncpu=ncpu)
    Error = calculate_error_codebook(survivor_dfs[0]) # Maybe average all survivors not just best
    return Error,codebook_df,survivor_dfs,survivors

In [16]:
def artificial_selection_codebook(offspring_codebook,Gene_Matrix_Avg,offspring_size=5,mutation_rate=1,ngenes=25,ncpu=30,keepers=10):
    Errors = []
    codebook_dfs = []
    Readout_dfs = []
    new_population = []
    if ncpu==1:
        for Individual in tqdm.tqdm(offspring_codebook):
            Error,codebook_df,survivor_dfs,survivors = artificial_selection_codebook_pfunc(Individual,Gene_Matrix_Avg,
                                                                                           offspring_size=offspring_size,
                                                                                           mutation_rate=mutation_rate,
                                                                                           keepers=keepers,ngenes=ngenes,ncpu=30)
            Errors.append(Error)
            codebook_dfs.append(codebook_df)
            Readout_dfs.append(survivor_dfs)
            new_population.append(survivors)
    else:
        pfunc = partial(artificial_selection_codebook_pfunc,Gene_Matrix_Avg=Gene_Matrix_Avg,
                        offspring_size=offspring_size,mutation_rate=mutation_rate,keepers=keepers,ngenes=ngenes,ncpu=1)
        p = Pool(ncpu)
        sys.stdout.flush()
        for Error,codebook_df,survivor_dfs,survivors in tqdm.tqdm(p.imap(pfunc,offspring_codebook,chunksize=1),total=len(offspring_codebook)):
            Errors.append(Error)
            codebook_dfs.append(codebook_df)
            Readout_dfs.append(survivor_dfs)
            new_population.append(survivors)
        sys.stdout.flush()
        p.close()
        p.join()
    codebook_dfs,survivors_codebook,survivor_dfs,survivor_errors = selection(codebook_dfs,Readout_dfs,new_population,Errors,keepers=keepers)
    population = []
    for i,codebook_df in enumerate(codebook_dfs):
        population.append([codebook_df,survivors_codebook[i]])
    return population,survivor_dfs,survivor_errors

In [17]:
def generate_next_population(population_codebook,mutation_rate=1,offspring_size=10):
    mutations_codebook = generate_mutations_codebook(population_codebook,mutation_rate=mutation_rate,offspring_size=offspring_size)
    offspring_codebook = population_codebook+mutations_codebook
    return offspring_codebook

In [18]:
def view_progress(survivors,survivor_dfs,survivor_errors,verbose=False):
    Results = {}
    Results['all'] = {}
    Results['all']['Error'] = survivor_errors
    Error_avg = np.average(survivor_errors)
    Error_std = np.std(survivor_errors)
    Results['all']['Error_avg'] = Error_avg
    Results['all']['Error_std'] = Error_std
    if verbose:
        print('Error Summary')
        print('Average:',Results['all']['Error_avg'])
        print('Median:',np.median(survivor_errors))
        print('Min:',np.min(survivor_errors))
        print('STD:',Results['all']['Error_std'])
    for c,individual in enumerate(survivors):
        codebook_df,species = individual
        Readout_df = survivor_dfs[c][0]
        Error = survivor_errors[c]
        for cell in codebook_df.index:
            if 'Blank' in cell:
                continue
            if 'blank' in cell:
                continue
            if not cell in Results.keys():
                Results[cell] = {}
                Results[cell]['cell_pos'] = []
                Results[cell]['cell_neg'] = []
                Results[cell]['cell_sn'] = []
                Results[cell]['cell_diff'] = []
                Results[cell]['cell_snr'] = []
            cell_pos = []
            cell_neg = []
            R = Readout_df.loc[cell]
            for i,value in enumerate(codebook_df.loc[cell]):
                if value==1:
                    cell_pos.append(R[i])
                elif value==0:
                    cell_neg.append(R[i])
            cell_sn= np.average(cell_pos)/np.average(cell_neg)
            cell_diff = np.average(cell_pos)-np.average(cell_neg)
            cell_snr = np.average(cell_pos)/(np.std(cell_pos)*np.average(cell_neg)*np.std(cell_neg))
            Results[cell]['cell_pos'].extend(cell_pos)
            Results[cell]['cell_neg'].extend(cell_neg)
            Results[cell]['cell_sn'].append(cell_sn)
            Results[cell]['cell_diff'].append(cell_diff)
            Results[cell]['cell_snr'].append(cell_snr)

    for cell in Results.keys():
        if 'Blank' in cell:
                continue
        if 'blank' in cell:
                continue
        if 'all' in cell:
            continue
        Results[cell]['cell_pos_avg'] = np.average(Results[cell]['cell_pos'])
        Results[cell]['cell_neg_avg'] = np.average(Results[cell]['cell_neg'])
        Results[cell]['cell_pos_std'] = np.std(Results[cell]['cell_pos'])
        Results[cell]['cell_neg_std'] = np.std(Results[cell]['cell_neg'])
        Results[cell]['cell_sn_avg'] = np.average(Results[cell]['cell_sn'])
        Results[cell]['cell_diff_avg'] = np.average(Results[cell]['cell_diff'])
        Results[cell]['cell_snr_avg'] = np.average(Results[cell]['cell_snr'])
        Results[cell]['cell_sn_std'] = np.std(Results[cell]['cell_sn'])
        Results[cell]['cell_diff_std'] = np.std(Results[cell]['cell_diff'])
        Results[cell]['cell_snr_std'] = np.std(Results[cell]['cell_snr'])

    if verbose:
        df_dict = {}
        for cell,cell_result in Results.items():
            if cell=='all':
                continue
            for key,values in cell_result.items():
                if 'avg' in key:
                    continue
                if 'std' in key:
                    continue
                if not key in df_dict.keys():
                    df_dict[key]= pd.DataFrame()
                if len(values)!=len(df_dict[key]):
                    temp_df = pd.DataFrame()
                    temp_df[cell]=values
                    for column in df_dict[key].columns:
                        temp_df[column]=df_dict[key][column]
                    df_dict[key] = temp_df
                else:
                    df_dict[key][cell] = values 
        for key,df in df_dict.items():
            try:
                df[df==np.inf]=np.nan
                plt.figure(figsize=[12,4])
                chart = sns.violinplot(data=np.log10(df.sortlevel(axis=1)+1))
                chart.set_xticklabels(chart.get_xticklabels(), rotation=90)
                plt.title(key)
                plt.show()
            except:
                print(key)
                print(df)
    return Results,Error_avg,Error_std

In [19]:
def Evolution(Gene_Matrix_Avg,keepers=10,
              offspring_size=5,ngenes=25,nbits=6,ncpu=30,
              mutation_rate=1,iterations=100,New=True,
              rel_STD_thresh=0,MAX_thresh=0,verbose=False,
              out_path=os.getcwd()):
    
    Filtered_Gene_Matrix_Avg = Gene_Matrix_Avg.copy()
    STD = np.std(Filtered_Gene_Matrix_Avg,axis=1)
    MAX = np.max(Filtered_Gene_Matrix_Avg,axis=1)
    Filtered_Gene_Matrix_Avg = Filtered_Gene_Matrix_Avg[(STD>np.percentile(STD,rel_STD_thresh))&(MAX>MAX_thresh)]
    for i in range(ngenes):
        Filtered_Gene_Matrix_Avg.loc['Blank'+str(i)] = Filtered_Gene_Matrix_Avg.iloc[0]*0
    genes = list(Filtered_Gene_Matrix_Avg.index)
     # Initial Population
    if not os.path.exists(out_path):
        os.mkdir(out_path)
    O = 0
    X = []
    Y = []
    E_max = []
    E_min = []
    if New:
        survivors = generate_starting_population(Gene_Matrix_Avg,genes,nbits=nbits,
                                                          ngenes=ngenes,keepers=keepers)
        Iteration_results = {}
    else:
        Iteration_results = pickle.load(open(os.path.join(out_path,'Iteration_results.pkl'),'rb'))
        O = np.max(list(Iteration_results.keys()))
        try:
            survivors = Iteration_results[O]['survivors']
        except:
            survivors = Iteration_results[O-1]['survivors']
        for i in Iteration_results.keys():
            X.append(i)
            Error_avg = Iteration_results[i]['Results']['Error_avg']
            Error_std = Iteration_results[i]['Results']['Error_std']
            Y.append(np.log10(Error_avg))
            E_max.append((np.log10(Error_avg+Error_std)))
            E_min.append((np.log10(Error_avg+Error_std)))
            
    for i in range(iterations):
        I=i+O+1
        try:
            Iteration_results[I]={}
            mutations_codebook = generate_mutations_codebook(survivors,mutation_rate=mutation_rate,offspring_size=offspring_size)
            offspring_codebook = survivors+mutations_codebook
            Iteration_results[I]['offspring_codebook'] = offspring_codebook
            survivors,survivor_dfs,survivor_errors = artificial_selection_codebook(offspring_codebook,Filtered_Gene_Matrix_Avg,offspring_size=offspring_size,
                                                                                    mutation_rate=mutation_rate,ngenes=ngenes,ncpu=ncpu,keepers=keepers)
            Iteration_results[I]['survivors'] = survivors
            Iteration_results[I]['survivor_dfs'] = survivor_dfs
            Iteration_results[I]['survivor_errors'] = survivor_errors
            
            Results,Error_avg,Error_std = view_progress(survivors,survivor_dfs,survivor_errors,verbose=verbose)
            Iteration_results[I]['Results'] = Results
            
            pickle.dump(Iteration_results,open(os.path.join(out_path,'Iteration_results.pkl'),'wb'))
            X.append(I)
            Y.append(np.log10(Error_avg))
            E_max.append((np.log10(Error_avg+Error_std)))
            E_min.append((np.log10(Error_avg-Error_std)))
            pl.plot(X,Y,c='k')
            pl.fill_between(X,E_max,E_min,color='r')
            pl.xlabel('Iteration')
            pl.ylabel('Log10 Error')
            display.clear_output(wait=True)
            display.display(pl.gcf())
        except:
            pickle.dump(Iteration_results,open(os.path.join(out_path,'Iteration_results.pkl'),'wb'))
            raise NameError('Iteration failed')
    if verbose:
        print('')
        print('Elite Survivor')
    elite_survivor = Iteration_results[I]['survivors'][0]
    elite_survivor_df = Iteration_results[I]['survivor_dfs'][0]
    elite_survivor_error = Iteration_results[I]['survivor_errors'][0]
    Results,Error_avg,Error_std = view_progress([elite_survivor],[elite_survivor_df],[elite_survivor_error],verbose=verbose)
    X.append(I+1)
    Y.append(np.log10(Error_avg))
    E_max.append((np.log10(Error_avg)+np.log10(Error_std)))
    E_min.append((np.log10(Error_avg)+np.log10(Error_std)))
    pl.plot(X,Y,c='k')
    pl.fill_between(X,E_max,E_min,color='r')
    pl.title('Opimization Progress')
    pl.xlabel('Iteration')
    pl.ylabel('Log10 min Distance')
    display.clear_output(wait=True)
    display.display(pl.gcf())
    return Iteration_results,elite_survivor,elite_survivor_df,elite_survivor_error

out_path = '/bigstore/GeneralStorage/Zach/MERCI/Autism/Results/'
Iteration_results,elite_survivor,elite_survivor_df,elite_survivor_error = Evolution(Gene_Matrix_Avg,keepers=10,
                                                                          offspring_size=10,ngenes=25,nbits=7,ncpu=30,
                                                                          mutation_rate=5,iterations=100,New=True,
                                                                          rel_STD_thresh=10,MAX_thresh=0,verbose=False,
                                                                          out_path=out_path)

1110000
7  Total Barcodes:  35


HBox(children=(IntProgress(value=0, max=110), HTML(value='')))

Process ForkPoolWorker-5:
Process ForkPoolWorker-19:
Process ForkPoolWorker-12:
Process ForkPoolWorker-2:
Process ForkPoolWorker-17:
Process ForkPoolWorker-23:
Process ForkPoolWorker-13:
Process ForkPoolWorker-30:
Process ForkPoolWorker-21:
Process ForkPoolWorker-29:
Process ForkPoolWorker-16:
Process ForkPoolWorker-25:
Process ForkPoolWorker-26:
Process ForkPoolWorker-7:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-15:
Traceback (most recent call last):
Process ForkPoolWorker-22:
Process ForkPoolWorker-4:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/numpy/core/numerictypes.py", line 328, in issubclass_
    return issubclass(arg1, arg2)
  File "<ipython-input-11-ae338c5ff14f>", line 13, in calculator_species
    dist = np.abs(Readout_df[bit].loc[p]-Readout_df[bit].loc[n]) #maybe abs is a bad idea
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
  File "<ipython-input-11-ae338c5ff14f>", line 6, in calculator_species
    Readout_df[bit] = np.sum(Gene_Matrix_Avg.loc[list(species[bit])],axis=0)
Traceback (most recent call last):
  File "<ipython-input-11-ae338c5ff14f>", line 6, in calculator_species
    Readout_df[bit] = np.sum(Gene_Matrix_Avg.loc[list(species[bit])],axis=0)
  File "<ipython-inpu

  File "<ipython-input-15-e7ab25e97eee>", line 5, in artificial_selection_codebook_pfunc
    codebook_df,survivors,survivor_dfs,survivor_errors = artificial_selection_species(codebook_df,survivors_species,Gene_Matrix_Avg,ngenes=ngenes,keepers=keepers,ncpu=ncpu)
  File "<ipython-input-11-ae338c5ff14f>", line 7, in calculator_species
    Readout_df.loc['blank'] = 0
  File "<ipython-input-11-ae338c5ff14f>", line 7, in calculator_species
    Readout_df.loc['blank'] = 0
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 189, in __setitem__
    self._setitem_with_indexer(indexer, value)
KeyboardInterrupt
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/generic.py", line 3102, in drop
    axes, _ = self._construct_axes_from_arguments((index, columns), {})
  File "pandas/_libs/properties.pyx", line 36, in pandas._libs.properties.CachedProperty.__get__
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1901, in _getitem_axis
    return self._getitem_iterable(key, axis=axis)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/frame.py", line 3353, in reindexer
    value = value.reindex(self.index)._values
Process ForkPoolWorker-9:
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 189, in __setitem__
    self._setitem_with_indexer(indexer, value)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
Ke

KeyboardInterrupt
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/numpy/core/numeric.py", line 2799, in geterr
    val = (maskvalue >> SHIFT_DIVIDEBYZERO) & mask
  File "<ipython-input-15-e7ab25e97eee>", line 5, in artificial_selection_codebook_pfunc
    codebook_df,survivors,survivor_dfs,survivor_errors = artificial_selection_species(codebook_df,survivors_species,Gene_Matrix_Avg,ngenes=ngenes,keepers=keepers,ncpu=ncpu)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 1799, in _convert_arr_indexer
    keyarr = com._asarray_tuplesafe(keyarr)
KeyboardInterrupt
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/common.py", line 312, in _asarray_tuplesafe
    result = np.asarray(values, dtype=dtype)
  

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/series.py", line 1235, in __unicode__
    max_rows=max_rows, length=show_dimensions)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 3693, in setitem
    return self.apply('setitem', **kwargs)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 3581, in apply
    applied = getattr(b, f)(**kwargs)
KeyboardInterrupt
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/series.py", line 1279, in to_string
    result = formatter.to_string()
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/frame.py", line 3195, in _set_item
    NDFrame._set_item(self, key, value)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/io/formats/format.py", line 264, in to_string
    footer = self._get_footer()
  File "/home/zach/minicond

NameError: Iteration failed

Process ForkPoolWorker-49:
Process ForkPoolWorker-35:
Process ForkPoolWorker-20:
Process ForkPoolWorker-40:
Process ForkPoolWorker-6:
Process ForkPoolWorker-28:
Process ForkPoolWorker-18:
Process ForkPoolWorker-44:
Process ForkPoolWorker-24:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "<ipython-input-15-e7ab25e97eee>",

  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
Traceback (most recent call last):
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-11-ae338c5ff14f>", line 7, in calculator_species
    Readout_df.loc['blank'] = 0
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-11-ae338c5ff14f>", line 9, in ca

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 4243, in set
    loc = self.items.get_loc(item)
Traceback (most recent call last):
KeyboardInterrupt
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 3385, in _get_items
    return self.axes[0]
Traceback (most recent call last):
  File "<ipython-input-15-e7ab25e97eee>", line 5, in artificial_selection_codebook_pfunc
    codebook_df,survivors,survivor_dfs,survivor_errors = artificial_selection_species(codebook_df,survivors_species,Gene_Matrix_Avg,ngenes=ngenes,keepers=keepers,n

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
  File "<ipython-input-13-0146e2938865>", line 11, in artificial_selection_species
    species,Readout_df,Error = calculator_species(individual_species,Gene_Matrix_Avg,codebook_df)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 3693, in setitem
    return self.apply('setitem', **kwargs)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/internals.py", line 3581, in apply
    applied = getattr(b, f)(**kwargs)
  File "<ipython-input-15-e7ab25e97eee>", line 5, in artificial_selection_codebook_pfunc
    codebook_df,survivors,survivor_dfs,survivor_errors = artificial_selection_species(c

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3385, in get_indexer_for
    return self.get_indexer(target, **kwargs)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 615, in _setitem_with_indexer
    setter(item, value)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 4960, in _ensure_index
    converted, all_arrays = lib.clean_index_list(index_like)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1142, in _getitem_iterable
    indexer = ax.get_indexer_for(key)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3222, in get_indexer
    target = _ensure_index(target)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1142, in _getitem_iterable
    indexer = ax.ge

  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/generic.py", line 2600, in _set_item
    self._data.set(key, value)
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/frame.py", line 3195, in _set_item
    NDFrame._set_item(self, key, value)
  File "pandas/_libs/index.pyx", line 142, in pandas._libs.index.IndexEngine.get_loc
  File "<frozen importlib._bootstrap_external>", line 1241, in find_spec
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/generic.py", line 4428, in _protect_consolidate
    result = f()
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/frame.py", line 3119, in __setitem__
    self._set_item(key, value)
KeyboardInterrupt
KeyboardInterrupt
  File "<frozen importlib._bootstrap_external>", line 1122, in _get_spec
  File "/home/zach/miniconda3/envs/pyspots/lib/python3.6/site-packages/pandas/core/indexing.py", line 1867, in _getitem_axis


In [None]:
#memory leak

In [None]:
i=

In [None]:
out_path = '/bigstore/GeneralStorage/Zach/MERCI/2_Tier_Genetic_Optimizer/'
Iteration_results = pickle.load(open(os.path.join(out_path,'Iteration_results.pkl'),'rb'))

In [None]:
Iteration_results.keys()