In [1]:
from aiida.storage.sqlite_zip.backend import SqliteZipBackend
from aiida import orm, load_profile, get_profile

load_profile(SqliteZipBackend.create_profile("../data_Kahle2020/migrated.aiida"))

# Fixing missing user, as in https://aiida.discourse.group/t/setting-up-a-user-for-sqlitezipbackend/139/3
(user,) = orm.QueryBuilder().append(orm.User).all(flat=True)
get_profile().default_user_email = user.email


traj_group = orm.Group.objects.get(label="concatenated_trajectories")
diff_group = orm.Group.objects.get(label="diffusion_coefficients")
stru_group = orm.Group.objects.get(label="starting_structures")

In [2]:
from aiida_utils import find_connections

In [3]:
connections_traj_diff_l2r = find_connections(traj_group.nodes, diff_group.nodes, mode="from_left")
connections_traj_diff_r2l = find_connections(traj_group.nodes, diff_group.nodes, mode="to_right")

connections_stru_traj_l2r = find_connections(stru_group.nodes, traj_group.nodes, mode="from_left")
connections_stru_traj_r2l = find_connections(stru_group.nodes, traj_group.nodes, mode="to_right")

In [4]:
# There's a 1-to-1 correspondence between trajectories and diffusion results:
assert connections_traj_diff_l2r == connections_traj_diff_r2l
for l, r in connections_traj_diff_l2r:
    assert len(l) == 1
    assert len(r) == 1

# So we can define:
connections_traj_diff = [(l[0], r[0]) for l, r in connections_traj_diff_l2r]

In [5]:
# But that's not the case for starting structures:
for (l1, r1), (l2, r2) in zip(connections_stru_traj_l2r[:10], connections_stru_traj_r2l):
    line1 = f"{[n.pk for n in l1]} -> {[n.pk for n in r1]}"
    line2 = f"{[n.pk for n in l2]} <- {[n.pk for n in r2]}"
    print(f"{line1:50}     |       {line2:50}")

[149] -> [4]                                           |       [149] <- [4]                                      
[271] -> [253]                                         |       [271] <- [253]                                    
[368] -> [298, 5426, 11504, 11788]                     |       [368] <- [298]                                    
[568] -> [446]                                         |       [568] <- [446]                                    
[603] -> [681]                                         |       [603] <- [681]                                    
[800] -> [870]                                         |       [800] <- [870]                                    
[1072] -> [945]                                        |       [1072] <- [945]                                   
[1146] -> [1244]                                       |       [1146] <- [1244]                                  
[1349] -> [1311]                                       |       [1349] <- [1311]         

In [6]:
# Actually, there's a single initial structure for every trajectory:
for l, r in connections_stru_traj_r2l:
    assert len(l) == 1
    assert len(r) == 1

# So we can define:
connections_stru_traj = [(l[0], r[0]) for l, r in connections_stru_traj_r2l]

In [7]:
len(connections_stru_traj)

200

In [8]:
print("Number of different starting structures:", len(set(l for l, r in connections_stru_traj)))
print("Number of different trajectories:", len(set(r for l, r in connections_stru_traj)))

Number of different starting structures: 121
Number of different trajectories: 200
