<a href="https://colab.research.google.com/github/yashlal/Deepfake-Microbiomes/blob/main/Combination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from newsolver import predict_community_fullnp
import numpy as np
import pandas as pd
import random as rd
from numba import njit
from numba.typed import List
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pickle
import torch.optim as optim
import time
from math import sqrt
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from modules import regenerate_PWMatrix
from scipy.stats import wasserstein_distance as WD

train_size, test_size = 5000, 25

data = pd.read_excel('RealData.xlsx', index_col=0)
specs = data.columns.tolist()
trimmed_specs = []

for i in range(len(specs)):
    if data.iloc[:,i].astype(bool).sum() >= 85:
        trimmed_specs.append(specs[i])
dim1 = len(trimmed_specs)

typed_trimmed_specs = List()
[typed_trimmed_specs.append(x) for x in trimmed_specs]

@njit()
def get_LT(full_ar):
    ar = []
    for i in range(len(full_ar)):
        for j in range(i):
            ar.append(full_ar[i][j])
    return ar

@njit()
def generate_matrix(comm, tolerance):
    dim = len(comm)
    ar = np.zeros((dim,dim))

    for i in range(dim):
        for j in range(i+1):
            if i == j:
                ar[i][j] = 0
            else:
                r = rd.random()
                # m = mult[i*dim1+j]
                ar[i][j] = r
                ar[j][i] = (1-r)

    return ar

def datagen():
    lm = generate_matrix(typed_trimmed_specs, 0)
    cm = predict_community_fullnp(lm, trimmed_specs, verb=False)
    return (cm, get_LT(lm))

# select CUDA if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if str(device) == 'cuda:0':
    print('CUDA device selected!')
elif str(device) == 'cpu':
	print('CUDA device not available. CPU selected')


class MyNet(nn.Module):
    def __init__(self, hyperparam):
        super(MyNet, self).__init__()
        self.fc1 = nn.Linear(462, hyperparam)
        self.fc2 = nn.Linear(hyperparam, 231*461)
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

mytest_x = []
mytest_y = []
mytrain_x = []
mytrain_y = []

pbar1=tqdm(range(test_size))
pbar1.set_description('Generating Test Set')
for i in pbar1:
    x, y = datagen()
    mytest_x.append(torch.from_numpy(x).float().to(device))
    mytest_y.append(torch.FloatTensor(y).to(device))

pbar2=tqdm(range(train_size))
pbar2.set_description('Generating Train Set')
for i in pbar2:
    x, y = datagen()
    mytrain_x.append(torch.from_numpy(x).float().to(device))
    mytrain_y.append(torch.FloatTensor(y).to(device))

def test_net_comm(model, test_x):
    for i in range(test_size):
        cm_real = test_x[i]
        output = model(input).to(device).tolist()
        mat_y = np.array(regenerate_PWMatrix(output))
        cm_pred = predict_community_fullnp(mat_y, trimmed_specs)
        print(f'Test {i}: WD Distance {WD(cm_pred, cm_real)}')

def train_net(model, train_x, train_y):
    pbar3=tqdm(range(train_size))
    pbar3.set_description('Training Neural Net')
    for i in pbar3:
        optimizer.zero_grad()
        input, true_y = train_x[i], train_y[i]
        output = model(input).to(device)
        loss = criterion(output, true_y).to(device)
        loss.backward()
        optimizer.step()

def main(param1):
    net = MyNet(param1).to(device)

    #Multi GPU Support
    if torch.cuda.device_count() > 1:
          print(f'Using {torch.cuda.device_count()} GPUs')
          net = nn.DataParallel(net)
    elif torch.cuda.device_count() == 1:
        print(f'Using {torch.cuda.device_count()} GPU')
    criterion = nn.MSELoss(reduction='sum')
    optimizer = optim.Adam(net.parameters(), lr=1e-4)
    train_net(net, mytrain_x, mytrain_y)
    test_net_comm(net, mytest_x)
    