## Stochasticising

This notebook converts each current adjacency matrix into a stochastic transition matrix by converting all rows to probabilities (ie normalising them to 1). We then raise it to the power of 7 – which is equivalent to saying you can move one town per day, and then convert the probabilities back to relative flows and save it as new adjacency matrices (again setting the diagonals as equal to 0), writing them into the folder ‘stochasticised’. It raises a few concerns though; the value of C is super sensitive when trying to run the discrete SEIR model using it, and should we really be setting the diagonal as 0?!

In [5]:
#Import necessary libraries
import pandas as pd
import numpy as np
import os

In [6]:
# Read in the kalman filtered adjacency matrices
kl = sorted(os.listdir("/Volumes/HardDrive/New_Workflow/kalman_all_44/"))
kal_flow = []
for item in kl:
    if '._' not in item:
        kal_flow.append(item)
        
d = {}
for i in range(len(kal_flow)):
    d[str(i)] = pd.read_csv("/Volumes/HardDrive/New_Workflow/kalman_all_44/"+kal_flow[i], header = None)

## Further Adjustments

In [7]:
def stoch(dfs):
    
    df = dfs.copy()
    rowsums = []
    
    # Get the sum of each row, then divide each entry by that sum (normalise)
    for index, row in df.iterrows():
        a = sum(row)
        rowsums.append(a)
        if a > 0:
            for i in df.columns:
                df.iloc[index, i] = df.iloc[index, i]/a
    
    tot_df = df.copy()/7
    temp_df = df.copy()
    # Raise the transition matrix to the power of 7
    for i in range(6):
        temp_df = np.matmul(temp_df,df)
        tot_df += temp_df/7
    
    selfloop = []
    # Set diagonal to 0  
    for i in range(len(tot_df.columns)):
        selfloop.append(tot_df.iloc[i,i])
        tot_df.iloc[i,i]=0
    
    # Convert it back to relative flows (distribute diagonals evenly throughout the row)
    for index, row in tot_df.iterrows():
        for i in tot_df.columns:
            tot_df.iloc[index, i] = tot_df.iloc[index, i]/(1-selfloop[index])*rowsums[index]
        
    return tot_df

In [8]:
# Run the function over each dataframe and write it to a folder
for i in range(len(kal_flow)):
    print(i)
    temp = stoch(d[str(i)])
    temp.to_csv("/Volumes/HardDrive/New_Workflow/stochastic_44/"+kal_flow[i],index = False, header = False)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96


# Checks

In [12]:
rowsums = []
for index, row in temp.iterrows():
    a = sum(row)
    rowsums.append(a)

In [13]:
rowsums == ro

NameError: name 'ro' is not defined

In [160]:
for i in range(len(rowsums)):
    print(rowsums[i],ro[i],rowsums[i]-ro[i])

0.00012811167991790353 0.00012811167991790353 0.0
2.66345062903406e-06 2.66345062903406e-06 0.0
2.983190018081857e-06 2.9831900180818573e-06 -4.235164736271502e-22
4.7084075283894344e-07 4.708407528389434e-07 5.293955920339377e-23
8.033764508294465e-07 8.033764508294461e-07 3.1763735522036263e-22
2.4096643946915874e-06 2.409664394691588e-06 -8.470329472543003e-22
1.7145235024755435e-06 1.7145235024755435e-06 0.0
3.0530375173598006e-07 3.053037517359801e-07 -5.293955920339377e-23
0.0 0.0 0.0
0.0 0.0 0.0
3.6030787873426255e-05 3.603078787342627e-05 -1.3552527156068805e-20
3.6650936559082564e-07 3.665093655908257e-07 -5.293955920339377e-23
3.1947807837682094e-07 3.194780783768209e-07 5.293955920339377e-23
1.235500384525891e-07 1.2355003845258913e-07 -2.6469779601696886e-23
1.5326466065634129e-06 1.5326466065634125e-06 4.235164736271502e-22
6.531773709682459e-07 6.531773709682458e-07 1.0587911840678754e-22
0.0 0.0 0.0
0.0 0.0 0.0
7.428975888384215e-08 7.428975888384215e-08 0.0
1.0608142319

## Scrap Code

In [6]:
def stochasticise(dfs):
    
    df = dfs.copy()
    rowsums = []
    
    # Get the sum of each row, then divide each entry by that sum (normalise)
    for index, row in df.iterrows():
        a = sum(row)
        rowsums.append(a)
        if a > 0:
            for i in df.columns:
                df.iloc[index, i] = df.iloc[index, i]/a
    
    dfc = df.copy()
    # Raise the transition matrix to the power of 7
    for i in range(7):
        df = np.matmul(df,dfc)
    
    # Convert it back to relative flows
    for index, row in df.iterrows():
        for i in df.columns:
            df.iloc[index, i] = df.iloc[index, i]*rowsums[index]
            
    # Set diagonal to 0 (think about whether you want this)        
    for i in range(len(df.columns)):
        df.iloc[i,i]=0
        
    return df

In [4]:
# Run the function over each dataframe and write it to a folder
for i in range(len(kal_flow)):
    print(i)
    temp = stochasticise(d[str(i)])
    temp.to_csv("/Volumes/HardDrive/New_Workflow/stochastic_58/"+kal_flow[i],index = False, header = False)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14


KeyboardInterrupt: 