# Cargo 2000 Case Study Graph Import

<img src="img/logistics-diagram.png" alt="summary" width="1000"/>

In [1]:
import pandas as pd
from faker import Faker

## Connect ot Graph Data Science

In [2]:
from graphdatascience import GraphDataScience

# Use Neo4j URI and credentials according to your setup
gds = GraphDataScience('neo4j://localhost', auth=('neo4j', 'neopharm'))

## Staging for ETL
1. Clear the graph of any existing data and indexes
2. read the source data
3. create helper functions
4. Set Neo4j Indexes


In [4]:
# Clear last graph - All data and schema attributes
gds.run_cypher('MATCH(n) DETACH DELETE n')
gds.run_cypher('CALL apoc.schema.assert({},{})')

Unnamed: 0,label,key,keys,unique,action


In [5]:
df = pd.read_csv('https://s-cube-network.eu/c2k-files/c2k_data_comma.csv', dtype=str)

In [6]:
LEGS = [1,2,3]
SEGMENTS = [1,2,3]
GOINGS = ['i','o']
SERVICES = ['rcs', 'dlv']

In [7]:
# Helper functions
def get_last_i1_rcf_place(row):
    for s in [3,2,1]:
        if row[f'i1_rcf_{s}_place'] != '?':
            return row[f'i1_rcf_{s}_place']
    raise Exception("cannot find last rcf place")

def get_last_outbound_rcf_place(row):
    for s in [3,2,1]:
        if row[f'o_rcf_{s}_place'] != '?':
            return row[f'o_rcf_{s}_place']
    raise Exception("cannot find last rcf place")

def get_path_indexes(row):
    res = {'o': []}
    end = str(get_last_outbound_rcf_place(row))
    for i in LEGS:
        start = str(row[f'i{i}_dep_1_place'])
        if start != '?':
            ind = start + '-' + end
            res[f'i{i}'] = ind
            res['o'].append(ind)
    return res

In [8]:
# assign last inbound & outbound rcf place
df['last_o_rcf_place'] = df.apply(get_last_outbound_rcf_place, axis =1)
df['last_i_rcf_place'] = df.apply(get_last_i1_rcf_place, axis =1)
# assign path index
df['path_indexes'] = df.apply(get_path_indexes, axis=1)
df

Unnamed: 0,nr,i1_legid,i1_rcs_p,i1_rcs_e,i1_dep_1_p,i1_dep_1_e,i1_dep_1_place,i1_rcf_1_p,i1_rcf_1_e,i1_rcf_1_place,...,o_rcf_3_p,o_rcf_3_e,o_rcf_3_place,o_dlv_p,o_dlv_e,o_hops,legs,last_o_rcf_place,last_i_rcf_place,path_indexes
0,0,5182,199,218,210,215,609,935,736,256,...,?,?,?,780,434,1,2,411,256,"{'o': ['609-411', '431-411'], 'i1': '609-411',..."
1,1,6523,844,584,90,297,700,1935,1415,431,...,?,?,?,3870,445,1,2,256,431,"{'o': ['700-256', '257-256'], 'i1': '700-256',..."
2,2,5878,4380,4119,90,280,456,905,547,700,...,?,?,?,550,1520,1,1,349,700,"{'o': ['456-349'], 'i1': '456-349'}"
3,3,1275,759,169,240,777,173,340,577,349,...,?,?,?,3780,159,1,1,700,671,"{'o': ['173-700'], 'i1': '173-700'}"
4,4,8117,1597,1485,150,241,411,585,612,128,...,?,?,?,4140,4797,2,1,411,166,"{'o': ['411-411'], 'i1': '411-411'}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3938,3939,4225,122,19,240,278,815,830,761,174,...,?,?,?,1665,1300,2,3,293,174,"{'o': ['815-293', '671-293', '671-293'], 'i1':..."
3939,3940,14017,2028,413,270,1825,605,2610,2535,349,...,?,?,?,3780,807,2,1,113,597,"{'o': ['605-113'], 'i1': '605-113'}"
3940,3941,4660,1356,178,240,1359,815,760,716,609,...,?,?,?,5100,4381,2,1,737,609,"{'o': ['815-737'], 'i1': '815-737'}"
3941,3942,6472,2692,1856,90,867,700,1060,1049,113,...,?,?,?,3780,945,2,2,635,113,"{'o': ['700-635', '456-635'], 'i1': '700-635',..."


In [23]:
df['path_indexes']

0       {'o': ['609-411', '431-411'], 'i1': '609-411',...
1       {'o': ['700-256', '257-256'], 'i1': '700-256',...
2                     {'o': ['456-349'], 'i1': '456-349'}
3                     {'o': ['173-700'], 'i1': '173-700'}
4                     {'o': ['411-411'], 'i1': '411-411'}
                              ...                        
3938    {'o': ['815-293', '671-293', '671-293'], 'i1':...
3939                  {'o': ['605-113'], 'i1': '605-113'}
3940                  {'o': ['815-737'], 'i1': '815-737'}
3941    {'o': ['700-635', '456-635'], 'i1': '700-635',...
3942    {'o': ['nan-nan', 'nan-nan', 'nan-nan'], 'i1':...
Name: path_indexes, Length: 3943, dtype: object

In [9]:
# Create Indexes
gds.run_cypher('CREATE CONSTRAINT airport_unique IF NOT EXISTS FOR (n:Airport) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE CONSTRAINT airport_entry_unique IF NOT EXISTS FOR (n:EntryPoint) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX check_in_shipment_id IF NOT EXISTS FOR ()-[r:RECEPTION]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX check_in_path IF NOT EXISTS FOR ()-[r:RECEPTION]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_departure_checkpoint_unique IF NOT EXISTS FOR (n:DepartureWarehouse) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX confirm_shipment_id IF NOT EXISTS FOR ()-[r:DEPARTURE]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX confirm_path IF NOT EXISTS FOR ()-[r:DEPARTURE]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_departure_unique IF NOT EXISTS FOR (n:DeparturePoint) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX depart_shipment_id IF NOT EXISTS FOR ()-[r:TRANSPORT]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX depart_path IF NOT EXISTS FOR ()-[r:TRANSPORT]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_arrival_unique IF NOT EXISTS FOR (n:ArrivalWarehouse) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX deliver_shipment_id IF NOT EXISTS FOR ()-[r:DELIVERY]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX deliver_path IF NOT EXISTS FOR ()-[r:DELIVERY]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_destination_unique  IF NOT EXISTS FOR (n:Destination) REQUIRE n.airportId  IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT airport_transfer_unique  IF NOT EXISTS FOR (n:TransferPoint) REQUIRE n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX transfer_shipment_id IF NOT EXISTS FOR ()-[r:TRANSFER]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX transfer_path IF NOT EXISTS FOR ()-[r:TRANSFER]-() ON (r.pathIndex)')

## Ingest Nodes
Nodes will consist of Airports and freight forwarding checkpoints

In [10]:
# Load nodes and draw location at relationships
cols = df.columns.tolist()

airport_ids = set()
for col in cols:
    if 'place' in col:
        airport_ids.update([int(i) for i in df.loc[(df[col] != '?') & (df[col].notna()), col].unique().tolist()])

# Creating Names for the airports will help with human readability
Faker.seed(0)
fake = Faker()
def single_name_city():
   n = fake.city()
   while ' ' in n: # multi word names are a bit of a mouthful
       n = fake.city()
   return n

airports = [{'id':airport_id , 'name':single_name_city()} for airport_id in airport_ids]

In [11]:
airports

[{'id': 514, 'name': 'Changchester'},
 {'id': 515, 'name': 'Hullport'},
 {'id': 520, 'name': 'Howardborough'},
 {'id': 524, 'name': 'Ramoshaven'},
 {'id': 527, 'name': 'Bryanside'},
 {'id': 528, 'name': 'Davismouth'},
 {'id': 530, 'name': 'Rodriguezside'},
 {'id': 535, 'name': 'Jasmineside'},
 {'id': 538, 'name': 'Lisaside'},
 {'id': 543, 'name': 'Seanfurt'},
 {'id': 548, 'name': 'Barbaraport'},
 {'id': 550, 'name': 'Jonesberg'},
 {'id': 555, 'name': 'Masseyhaven'},
 {'id': 556, 'name': 'Aimeebury'},
 {'id': 561, 'name': 'Rileymouth'},
 {'id': 563, 'name': 'Kellerstad'},
 {'id': 567, 'name': 'Meganbury'},
 {'id': 571, 'name': 'Lauramouth'},
 {'id': 575, 'name': 'Cabreraville'},
 {'id': 576, 'name': 'Chadland'},
 {'id': 579, 'name': 'Michaelburgh'},
 {'id': 580, 'name': 'Gonzalezstad'},
 {'id': 583, 'name': 'Masseyshire'},
 {'id': 584, 'name': 'Alyssaville'},
 {'id': 588, 'name': 'Wadetown'},
 {'id': 593, 'name': 'Maldonadoshire'},
 {'id': 594, 'name': 'Deborahfurt'},
 {'id': 597, 'name

In [12]:
# Load nodes and draw location at relationships
gds.run_cypher('''
    UNWIND $airports AS a
    WITH a.id AS airportId, a.name AS name
    MERGE(n0:Airport {airportId: airportId}) SET n0.name=name
    MERGE(n1:EntryPoint {airportId: airportId}) SET n1.name=name
    MERGE(n2:DepartureWarehouse{airportId: airportId}) SET n2.name=name
    MERGE(n3:DeparturePoint {airportId: airportId}) SET n3.name=name
    MERGE(n4:ArrivalWarehouse {airportId: airportId}) SET n4.name=name
    MERGE(n5:TransferPoint {airportId: airportId}) SET n5.name=name
    MERGE(n6:Destination {airportId: airportId}) SET n6.name=name

    MERGE(n0)<-[:LOCATED_AT]-(n1)
    MERGE(n0)<-[:LOCATED_AT]-(n2)
    MERGE(n0)<-[:LOCATED_AT]-(n3)
    MERGE(n0)<-[:LOCATED_AT]-(n4)
    MERGE(n0)<-[:LOCATED_AT]-(n5)
    MERGE(n0)<-[:LOCATED_AT]-(n6)
    RETURN count(n0), count(n1), count(n2), count(n3), count(n4), count(n5), count(n6)
    ''', params={'airports':airports})

Unnamed: 0,count(n0),count(n1),count(n2),count(n3),count(n4),count(n5),count(n6)
0,237,237,237,237,237,237,237


## Ingest Relationships
Relationships will consist of the freight forwarding steps as well as links representing connection between transportation segments and transfer between the inbound and output stage of the shipment

In [13]:
LEGS

[1, 2, 3]

In [14]:
# (n:EntryPoint)-[r:RECEPTION]-(m:DepartureWarehouse) RCS inbound
for l in LEGS:
    sub_dict = df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
                      ['nr', f'i{l}_legid', f'i{l}_rcs_p', f'i{l}_rcs_e', f'i{l}_dep_1_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.i{l}_dep_1_place) AS airportId,
            toInteger(relMap.i{l}_legid) AS legId,
            toInteger(relMap.i{l}_rcs_e) AS effectiveMinutes,
            toInteger(relMap.i{l}_rcs_p) AS plannedMinutes,
            relMap.path_indexes.i{l} AS pathIndex
        MATCH(n1:EntryPoint {{airportId: airportId}})
        MATCH(n2:DepartureWarehouse {{airportId: airportId}})
        MERGE(n1)-[r:RECEPTION {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':l})
    print(res)

   count(r)
0      3942
   count(r)
0      2624
   count(r)
0      1366


In [15]:
sub_dict = df.loc[(df[f'o_legid'] != '?') & (df[f'o_legid'].notna()),
                  ['nr', 'o_legid', 'o_rcs_p', 'o_rcs_e', 'o_dep_1_place', 'path_indexes']].to_dict('records')
sub_dict

[{'nr': '0',
  'o_legid': '10121',
  'o_rcs_p': '9919',
  'o_rcs_e': '7559',
  'o_dep_1_place': '256',
  'path_indexes': {'o': ['609-411', '431-411'],
   'i1': '609-411',
   'i2': '431-411'}},
 {'nr': '1',
  'o_legid': '626',
  'o_rcs_p': '3030',
  'o_rcs_e': '3016',
  'o_dep_1_place': '431',
  'path_indexes': {'o': ['700-256', '257-256'],
   'i1': '700-256',
   'i2': '257-256'}},
 {'nr': '2',
  'o_legid': '12691',
  'o_rcs_p': '1433',
  'o_rcs_e': '642',
  'o_dep_1_place': '700',
  'path_indexes': {'o': ['456-349'], 'i1': '456-349'}},
 {'nr': '3',
  'o_legid': '12932',
  'o_rcs_p': '3963',
  'o_rcs_e': '3176',
  'o_dep_1_place': '671',
  'path_indexes': {'o': ['173-700'], 'i1': '173-700'}},
 {'nr': '4',
  'o_legid': '12805',
  'o_rcs_p': '152',
  'o_rcs_e': '179',
  'o_dep_1_place': '166',
  'path_indexes': {'o': ['411-411'], 'i1': '411-411'}},
 {'nr': '5',
  'o_legid': '6430',
  'o_rcs_p': '136',
  'o_rcs_e': '29',
  'o_dep_1_place': '128',
  'path_indexes': {'o': ['815-597', '128-59

In [16]:
# (n:TransferPoint)-[r:RECEPTION]-(m:DepartureWarehouse) RCS outbound

sub_dict = df.loc[(df[f'o_legid'] != '?') & (df[f'o_legid'].notna()),
                  ['nr', 'o_legid', 'o_rcs_p', 'o_rcs_e', 'o_dep_1_place', 'path_indexes']].to_dict('records')
gds.run_cypher('''
    UNWIND $relMaps AS relMap
    WITH toInteger(relMap.nr) AS shipmentId,
        toInteger(relMap.o_dep_1_place) AS airportId,
        toInteger(relMap.o_legid) AS legId,
        toInteger(relMap.o_rcs_e) AS effectiveMinutes,
        toInteger(relMap.o_rcs_p) AS plannedMinutes,
        relMap.path_indexes.o AS pathIndex
    MATCH(n1:TransferPoint {airportId: airportId})
    MATCH(n2:DepartureWarehouse {airportId: airportId})
    MERGE(n1)-[r:RECEPTION {shipmentId: shipmentId, legId: legId, legNumber: -1}]->(n2)
    ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
    RETURN count(r)
''', params={'relMaps':sub_dict})

Unnamed: 0,count(r)
0,3942


In [17]:
# (:DepartureWarehouse)-[r:DEPARTURE]-(:DeparturePoint) DEP1 inbound
for l in LEGS:
    print(f'== LEG {l} ======================')
    for s in SEGMENTS:
        print(f'-- SEGMENT {s} ----------------------')
        source_label = 'DepartureWarehouse'
        if s > 1:
            source_label = 'ArrivalWarehouse'
        sub_dict = df.loc[(df[f'i{l}_dep_{s}_place'] != '?') & (df[f'i{l}_legid'].notna()),
            ['nr', f'i{l}_legid', f'i{l}_dep_{s}_p', f'i{l}_dep_{s}_e', f'i{l}_dep_{s}_place', 'path_indexes']]\
            .to_dict('records')
        res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.i{l}_dep_{s}_place) AS airportId,
                toInteger(relMap.i{l}_legid) AS legId,
                toInteger(relMap.i{l}_dep_{s}_e) AS effectiveMinutes,
                toInteger(relMap.i{l}_dep_{s}_p) AS plannedMinutes,
                relMap.path_indexes.i{l} AS pathIndex
            MATCH(n1:{source_label} {{airportId: airportId}})
            MATCH(n2:DeparturePoint {{airportId: airportId}})
            MERGE(n1)-[r:DEPARTURE {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=plannedMinutes,
                r.effectiveMinutes=effectiveMinutes,
                r.pathIndex = pathIndex
            RETURN count(r)
        ''', params={'relMaps':sub_dict, 'legNumber':l, 'segmentNumber':s})
        print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1195
-- SEGMENT 3 ----------------------
   count(r)
0        23
-- SEGMENT 1 ----------------------
   count(r)
0      2624
-- SEGMENT 2 ----------------------
   count(r)
0       791
-- SEGMENT 3 ----------------------
   count(r)
0        14
-- SEGMENT 1 ----------------------
   count(r)
0      1366
-- SEGMENT 2 ----------------------
   count(r)
0       391
-- SEGMENT 3 ----------------------
   count(r)
0         8


In [18]:
# (:DepartureWarehouse)-[r:DEPARTURE]-(:DeparturePoint) DEP1 outbound
for s in SEGMENTS:
    source_label = 'DepartureWarehouse'
    if s > 1:
        source_label = 'ArrivalWarehouse'
    print(f'-- SEGMENT {s} ----------------------')
    sub_dict = df.loc[(df[f'o_dep_{s}_place'] != '?') & (df[f'o_legid'].notna()),
                      ['nr', f'o_legid', f'o_dep_{s}_p', f'o_dep_{s}_e', f'o_dep_{s}_place', 'path_indexes']]\
        .to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.o_dep_{s}_place) AS airportId,
            toInteger(relMap.o_legid) AS legId,
            toInteger(relMap.o_dep_{s}_e) AS effectiveMinutes,
            toInteger(relMap.o_dep_{s}_p) AS plannedMinutes,
            relMap.path_indexes.o AS pathIndex
        MATCH(n1:{source_label} {{airportId: airportId}})
        MATCH(n2:DeparturePoint {{airportId: airportId}})
        MERGE(n1)-[r:DEPARTURE {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':-1, 'segmentNumber':s})
    print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1845
-- SEGMENT 3 ----------------------
   count(r)
0        26


In [19]:
# (:DeparturePoint)-[r:TRANSPORT]-(:ArrivalWarehouse) inbound
for l in LEGS:
    print(f'== LEG {l} ======================')
    for s in SEGMENTS:
        print(f'-- SEGMENT {s} ----------------------')
        sub_dict = df.loc[(df[f'i{l}_rcf_{s}_place'] != '?') & (df[f'i{l}_legid'].notna()),
                          ['nr', f'i{l}_legid', f'i{l}_rcf_{s}_p', f'i{l}_rcf_{s}_e', f'i{l}_dep_{s}_place', f'i{l}_rcf_{s}_place', 'path_indexes']]\
            .to_dict('records')
        res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.i{l}_dep_{s}_place) AS fromAirportId,
                toInteger(relMap.i{l}_rcf_{s}_place) AS toAirportId,
                toInteger(relMap.i{l}_legid) AS legId,
                toInteger(relMap.i{l}_rcf_{s}_e) AS effectiveMinutes,
                toInteger(relMap.i{l}_rcf_{s}_p) AS plannedMinutes,
                relMap.path_indexes.i{l} AS pathIndex
            MATCH(n1:DeparturePoint {{airportId: fromAirportId}})
            MATCH(n2:ArrivalWarehouse {{airportId: toAirportId}})
            MERGE(n1)-[r:TRANSPORT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=plannedMinutes,
                r.effectiveMinutes=effectiveMinutes,
                r.pathIndex = pathIndex
            RETURN count(r)
        ''', params={'relMaps':sub_dict, 'legNumber':l, 'segmentNumber':s})
        print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1195
-- SEGMENT 3 ----------------------
   count(r)
0        23
-- SEGMENT 1 ----------------------
   count(r)
0      2624
-- SEGMENT 2 ----------------------
   count(r)
0       791
-- SEGMENT 3 ----------------------
   count(r)
0        14
-- SEGMENT 1 ----------------------
   count(r)
0      1366
-- SEGMENT 2 ----------------------
   count(r)
0       391
-- SEGMENT 3 ----------------------
   count(r)
0         8


In [20]:
# (:DeparturePoint)-[r:TRANSPORT]-(:ArrivalWarehouse) RCF outbound
for s in SEGMENTS:
    print(f'-- SEGMENT {s} ----------------------')
    sub_dict = df.loc[(df[f'o_rcf_{s}_place'] != '?') & (df[f'o_legid'].notna()),
                      ['nr', f'o_legid', f'o_rcf_{s}_p', f'o_rcf_{s}_e',  f'o_dep_{s}_place', f'o_rcf_{s}_place', 'path_indexes']]\
        .to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.o_dep_{s}_place) AS fromAirportId,
            toInteger(relMap.o_rcf_{s}_place) AS toAirportId,
            toInteger(relMap.o_legid) AS legId,
            toInteger(relMap.o_rcf_{s}_e) AS effectiveMinutes,
            toInteger(relMap.o_rcf_{s}_p) AS plannedMinutes,
            relMap.path_indexes.o AS pathIndex
        MATCH(n1:DeparturePoint {{airportId: fromAirportId}})
        MATCH(n2:ArrivalWarehouse {{airportId: toAirportId}})
        MERGE(n1)-[r:TRANSPORT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':-1, 'segmentNumber':s})
    print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1845
-- SEGMENT 3 ----------------------
   count(r)
0        26


In [21]:
# (:ArrivalWarehouse)-[r:DELIVERY]-(:TransferPoint) DLV inbound
for l in LEGS:
    sub_dict = df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
                      ['nr', f'i{l}_legid', f'i{l}_dlv_p', f'i{l}_dlv_e', 'last_i_rcf_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.last_i_rcf_place) AS airportId,
            toInteger(relMap.i{l}_legid) AS legId,
            toInteger(relMap.i{l}_dlv_e) AS effectiveMinutes,
            toInteger(relMap.i{l}_dlv_p) AS plannedMinutes,
            relMap.path_indexes.i{l} AS pathIndex
        MATCH(n1:ArrivalWarehouse {{airportId: airportId}})
        MATCH(n2:TransferPoint {{airportId: airportId}})
        MERGE(n1)-[r:DELIVERY {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':l})
    print(res)

   count(r)
0      3942
   count(r)
0      2624
   count(r)
0      1366


In [22]:
# (:ArrivalWarehouse)-[r:DELIVERY]-(:Destination) DLV outbound
sub_dict = df.loc[(df[f'o_legid'] != '?') & (df[f'o_legid'].notna()),
                  ['nr', 'o_legid', f'o_dlv_p', f'o_dlv_e', 'last_o_rcf_place', 'path_indexes']].to_dict('records')
res = gds.run_cypher('''
    UNWIND $relMaps AS relMap
    WITH toInteger(relMap.nr) AS shipmentId,
        toInteger(relMap.last_o_rcf_place) AS airportId,
        toInteger(relMap.o_legid) AS legId,
        toInteger(relMap.o_dlv_e) AS effectiveMinutes,
        toInteger(relMap.o_dlv_p) AS plannedMinutes,
        relMap.path_indexes.o AS pathIndex
    MATCH(n1:ArrivalWarehouse {airportId: airportId})
    MATCH(n2:Destination {airportId: airportId})
    MERGE(n1)-[r:DELIVERY {shipmentId: shipmentId, legId: legId, legNumber: $legNumber}]->(n2)
    ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
    RETURN count(r)
''', params={'relMaps':sub_dict, 'legNumber':-1})
print(res)

   count(r)
0      3942
