# Draw flows between nodes as a network

### Load data

In [2]:
# Load data
import pandas as pd
moves = pd.read_csv('data-raw/ED_bed_moves_raw_2022-03-21.csv')

In [3]:
moves.head()

Unnamed: 0,csn,location_string,admission,discharge,location
0,1017787509,ED^null^null,2020-02-04T13:58:58Z,2020-02-04T14:38:33Z,ED Waiting
1,1017787509,ED^UCHED ADULT TRIAGE^NONE,2020-02-04T14:38:33Z,2020-02-04T16:49:23Z,ED TRIAGE
2,1017787509,ED^UCHED UTC11^11-UTC,2020-02-04T16:49:23Z,2020-02-04T17:36:21Z,ED UTC
3,1017787509,ED^UCHED UTC06^06-UTC,2020-02-04T17:36:21Z,2020-02-04T17:53:28Z,ED UTC
4,1017787509,ED^UCHED ADULT TRIAGE^NONE,2020-02-04T17:53:28Z,2020-02-04T18:43:00Z,ED TRIAGE


In [10]:
moves['admission']  = pd.to_datetime(moves['admission'])
moves['discharge']  = pd.to_datetime(moves['discharge'])
moves.head()

Unnamed: 0,csn,location_string,admission,discharge,location,next_location,next_csn
0,1017787509,ED^null^null,2020-02-04 13:58:58+00:00,2020-02-04 14:38:33+00:00,ED Waiting,ED TRIAGE,1017787509
1,1017787509,ED^UCHED ADULT TRIAGE^NONE,2020-02-04 14:38:33+00:00,2020-02-04 16:49:23+00:00,ED TRIAGE,ED UTC,1017787509
2,1017787509,ED^UCHED UTC11^11-UTC,2020-02-04 16:49:23+00:00,2020-02-04 17:36:21+00:00,ED UTC,ED UTC,1017787509
3,1017787509,ED^UCHED UTC06^06-UTC,2020-02-04 17:36:21+00:00,2020-02-04 17:53:28+00:00,ED UTC,ED TRIAGE,1017787509
4,1017787509,ED^UCHED ADULT TRIAGE^NONE,2020-02-04 17:53:28+00:00,2020-02-04 18:43:00+00:00,ED TRIAGE,Left,1027701584


In [11]:
print(moves.shape)

(1094418, 7)


In [12]:
print(moves.csn.nunique())

293554


In [13]:
print(moves.admission.max())

2022-03-21 10:01:00+00:00


In [23]:
moves_reduced = moves[(moves.admission >= '2021-01-01') & (moves.admission <= '2021-01-31')]
print(moves_reduced.shape)
print(moves_reduced.csn.nunique())

(21177, 7)
5193


### Calculate next location

In [24]:
moves_reduced['next_location'] = moves_reduced.groupby('csn')['location'].shift(-1)
moves_reduced['next_csn'] = moves_reduced['csn'].astype(str).shift(-1)
moves_reduced.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  moves_reduced['next_location'] = moves_reduced.groupby('csn')['location'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  moves_reduced['next_csn'] = moves_reduced['csn'].astype(str).shift(-1)


Unnamed: 0,csn,location_string,admission,discharge,location,next_location,next_csn
86,1025015224,ED^null^null,2021-01-09 13:52:46+00:00,2021-01-09 13:58:00+00:00,ED Waiting,ED UTC,1025015224
87,1025015224,ED^UCHED UTC POOL01^UTC PTWR,2021-01-09 13:58:00+00:00,2021-01-09 17:09:00+00:00,ED UTC,,1025419125
476,1025419125,ED^null^null,2021-01-29 17:43:30+00:00,2021-01-29 17:54:00+00:00,ED Waiting,ED RAT,1025419125
477,1025419125,ED^UCHED RAT05^RAT-05,2021-01-29 17:54:00+00:00,2021-01-29 18:21:00+00:00,ED RAT,ED MAJORS,1025419125
478,1025419125,ED^NON COVID MAJORS 07^07-NON COVID MAJORS,2021-01-29 18:21:00+00:00,2021-01-29 20:02:00+00:00,ED MAJORS,,1025083669
587,1025083669,ED^null^null,2021-01-13 12:31:17+00:00,2021-01-13 12:48:00+00:00,ED Waiting,ED RAT,1025083669
588,1025083669,ED^UCHED RAT CHAIR^RAT-CHAIR,2021-01-13 12:48:00+00:00,2021-01-13 13:02:00+00:00,ED RAT,ED MAJORS,1025083669
589,1025083669,ED^NON COVID MAJORS 01^01-NON COVID MAJORS,2021-01-13 13:02:00+00:00,2021-01-13 16:18:00+00:00,ED MAJORS,,1024934321
929,1024934321,ED^null^null,2021-01-05 15:54:44+00:00,2021-01-05 18:05:00+00:00,ED Waiting,ED UTC,1024934321
930,1024934321,ED^UCHED UTC POOL01^UTC PTWR,2021-01-05 18:05:00+00:00,2021-01-05 18:23:00+00:00,ED UTC,,1025063604


In [26]:
# Where next_location is NaN, set it to 'Left'
moves_reduced['next_location'] = moves_reduced['next_location'].fillna('Left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  moves_reduced['next_location'] = moves_reduced['next_location'].fillna('Left')


### Create an edge list

In [27]:
edge_list = moves_reduced.groupby(['location', 'next_location']).size().reset_index(name='frequency')
edge_list.head(20)

Unnamed: 0,location,next_location,frequency
0,ED MAJORS,ED MAJORS,313
1,ED MAJORS,ED RAT,159
2,ED MAJORS,ED RESUS,41
3,ED MAJORS,ED TAF,64
4,ED MAJORS,ED TRIAGE,4
5,ED MAJORS,ED UTC,23
6,ED MAJORS,ED Waiting,7
7,ED MAJORS,Left,1255
8,ED MAJORS,OTF,348
9,ED MAJORS,SDEC,863


In [7]:
### Save the edge list

In [28]:
edge_list.to_csv('edge_list.csv', index=False)

### Plot the network

First step is to install the plotting module (Note - pygraphviz is tricky to install and doesn't work in this Jupyter environment)

In [24]:
!pip install pygraphviz

Collecting pygraphviz
  Downloading pygraphviz-1.13.tar.gz (104 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.6/104.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: pygraphviz
  Building wheel for pygraphviz (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pygraphviz [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[61 lines of output][0m
  [31m   [0m running bdist_wheel
  [31m   [0m running build
  [31m   [0m running build_py
  [31m   [0m creating build
  [31m   [0m creating build/lib.linux-x86_64-cpython-310
  [31m   [0m creating build/lib.linux-x86_64-cpython-3

In [23]:
import pygraphviz as pgv
import matplotlib.pyplot as plt


ModuleNotFoundError: No module named 'pygraphviz'