In [1]:
from dotenv import load_dotenv
load_dotenv()

# TODO: Add Age Logic as Question and Tag
# TODO: Add all the message of Services to Services Node
# TODO: Add User Analytics to Graph

from import_neo.base import BaseNeoImporter
from import_neo.base_api import API2NeoImporter

## Make sure to run Local Neo4j

```shell
docker-compose up -d --build
```

In [2]:
# changes this to run DigitalOcean Spaces or MHP-API
RUN_SPACES = False

In [3]:
neo = BaseNeoImporter(node_type='Services')
with neo.driver.session() as session:
    session.run("""MATCH (n) DETACH DELETE n;""")

# Use Backup in Digital Ocean Spaces

In [4]:
if RUN_SPACES:
    impo_services = BaseNeoImporter(node_type='Services')
    print('Services Data \n')
    impo_services.run()
    print(impo_services.data[0])
    
    
    impo_q = BaseNeoImporter(node_type='Questions')
    print('\nQuestions Data \n')
    impo_q.run()
    print(impo_q.data[0])

## Use API to Import to Neo4j

In [5]:
if not RUN_SPACES:
    # note mhp docker services must be running locally which will be a port 80
    # if not use production server: https://mhpportal.app
    api_path = 'https://mhpportal.app'
    print(f'Using API: {api_path}')
    q = API2NeoImporter(node_type='Questions', api_path=api_path)
    print('Questions Data \n')
    q.run()
    print(q.data[0])
    print(q.tags)
    
    s = API2NeoImporter(node_type='Services', api_path=api_path)
    print('\nServices Data \n')
    s.run()
    print(s.data[0])
    print(s.tags)


Using API: https://mhpportal.app
Questions Data 

{'tag_nodes': 30, 'nodes': 28, 'rel': 106}
{'id': 1, 'question': 'Is anyone scaring, threatening or hurting you or your children?', 'tags': ['Domestic Violence', 'Shelter', 'Family'], 'main_tag': 'Family', 'mongo_id': 1, 'name': 'Is anyone scaring, threatening or hurting you or your children?'}
['Adolescent', 'Child Support', 'Children', 'Disability', 'Domestic Violence', 'Education', 'Elder', 'Employment', 'Family', 'Food Insecurity', 'Food and Nutrition', 'Health Insurance', 'Home', 'Housing', 'Identity', 'Income', 'Indigent', 'LGBTQ', 'Legal Assistance', 'Legal Services', 'Low Income', 'Mental Health', 'Pubic Benefits', 'Public Benefits', 'Shelter', 'Social Security', 'Special Education', 'Transportation', 'Women Health', 'Young Adult']

Services Data 

{'tag_nodes': 31, 'nodes': 188, 'rel': 336}
{'name': 'S. AZ Gender Alliance', 'phone': 5204777096, 'address': '2030 EAST BROADWAY', 'general_topic': 'LGBTQ', 'tags': ['LGBTQ'], 'city'

In [6]:
# QA wrong node type
# impo_services = BaseNeoImporter(node_type='Service')

# Cypher Queries
<img src="static/graph.png">

* https://neo4j.com/docs/graph-data-science/current/algorithms/node-similarity/
* https://neo4j.com/docs/graph-data-science/current/algorithms/knn/
* https://neo4j.com/docs/graph-data-science/current/algorithms/bfs/
* https://neo4j.com/docs/graph-data-science/current/machine-learning/linkprediction-pipelines/link-prediction/

## Show all relationships 
<hr>

```cypher
MATCH p=()-[r:TAGGED]-() RETURN p;
```

## Get Node Similiarties
<hr>

```cypher
CALL gds.graph.project(
    'myGraph',
    ['Tags', 'Services', 'Questions'],
    {
        TAGGED: {
        }
    }
);
```


```cypher
CALL gds.nodeSimilarity.write('myGraph', {
    writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'
})
YIELD nodesCompared, relationshipsWritten;
```


```cypher
MATCH p=()-[r:SIMILAR]-() WHERE r.score > 0.75 RETURN p;
```

## Link Prediction
<hr>

```cypher

CALL gds.beta.pipeline.linkPrediction.create('pipe');

                                             
CALL gds.beta.pipeline.linkPrediction.configureSplit('pipe', {
  testFraction: 0.25,
  trainFraction: 0.6,
  validationFolds: 3
})
YIELD splitConfig;


CALL gds.alpha.pipeline.linkPrediction.addMLP('pipe',
{hiddenLayerSizes: [4, 2], penalty: 1, patience: 2})
YIELD parameterSpace;


CALL gds.alpha.pipeline.linkPrediction.configureAutoTuning('pipe', {
  maxTrials: 2
}) YIELD autoTuningConfig;



CALL gds.graph.project(
  'testGraph',
  {
    Services: {
      properties: ['created']
    }
  },
  {
    TAGGED: {
      orientation: 'UNDIRECTED'
    }
  }
);


CALL gds.beta.pipeline.linkPrediction.train('testGraph', {
  pipeline: 'pipe',
  modelName: 'lp-pipeline-model',
  metrics: ['AUCPR', 'OUT_OF_BAG_ERROR'],
  targetRelationshipType: 'TAGGED',
  randomSeed: 73
}) YIELD modelInfo, modelSelectionStats
RETURN
  modelInfo.bestParameters AS winningModel,
  modelInfo.metrics.AUCPR.train.avg AS avgTrainScore,
  modelInfo.metrics.AUCPR.outerTrain AS outerTrainScore,
  modelInfo.metrics.AUCPR.test AS testScore,
  [cand IN modelSelectionStats.modelCandidates | cand.metrics.AUCPR.validation.avg] AS validationScores;
    
    
CALL gds.beta.pipeline.linkPrediction.predict.stream('testGraph', {
  modelName: 'lp-pipeline-model',
  topN: 5,
  threshold: 0.5
})
 YIELD node1, node2, probability
 RETURN gds.util.asNode(node1).name AS person1, gds.util.asNode(node2).name AS person2, probability
 ORDER BY probability DESC, person1;
```

## Get Services not tied to Question
<hr>

### Get Services not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
      AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
ORDER BY tags;
```


### Get Tags not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
WITH n1.name as tags 
RETURN DISTINCT tags as tag
ORDER BY tag;
```

In [7]:
neo = BaseNeoImporter(node_type='Services')

with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
    WHERE NOT (n1)-[:TAGGED]-(:Questions) 
    // Young Adult Resources is tied by Age Question
          AND n1.name <> 'Young Adult Resources' 
          AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
    RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
    ORDER BY tags;
    """)
    df = data.to_df()

In [8]:
df

Unnamed: 0,service_id,service,tags
0,632259bda6904810a4353dc2,"COPE Community Services, Inc.",[Addiction Resources]
1,632259bda6904810a4353dc1,COPE Community Services,[Addiction Resources]
2,632259bda6904810a4353dc0,Cope Behavioral Services,[Addiction Resources]
3,632259bda6904810a4353dbf,COPE Community Services,[Addiction Resources]
4,632259bda6904810a4353dbe,"COPE Community Services, Inc.",[Addiction Resources]
...,...,...,...
60,632259bda6904810a4353dce,AZ Copper Card,[Prescription Medication Assistance]
61,632259bda6904810a4353de4,TEP - Lifeline Discount Program,[Utility Assistance]
62,632259bda6904810a4353de3,Salvation Army Utilities/Rent,[Utility Assistance]
63,632259bda6904810a4353de2,Project PPEP,[Utility Assistance]


In [9]:
with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
        WHERE NOT (n1)-[:TAGGED]-(:Questions) 
        // Young Adult Resources is tied by Age Question
              AND n1.name <> 'Young Adult Resources' 
        WITH n1.name as tags 
        RETURN DISTINCT tags as tag
        ORDER BY tag;
        """)
    tag = data.to_df()

In [10]:
tag

Unnamed: 0,tag
0,Addiction Resources
1,Dental
2,Health Care Resources
3,Immigrant/Refugee Resources
4,Prescription Medication Assistance
5,Substance Abuse
6,Utility Assistance


# Get User Data

In [11]:
zip_codes_url = 'https://mhpportal.app/api/v1/platform/zip_codes'
data_zip_url = 'https://mhpportal.app/api/v1/platform/data/%s'

import requests

resp = requests.get(zip_codes_url)
z_c = resp.json()
z_c

json_data = []
for z in z_c:
    resp = requests.get(data_zip_url % z['id'])
    json_data = json_data + resp.json()

In [12]:
json_data[0]

{'dob': 1992,
 'zip_code': 85719,
 'answers': [1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'top_services': ['632259bda6904810a4353d45',
  '632259bda6904810a4353d50',
  '632259bda6904810a4353d33',
  '632259bda6904810a4353d36',
  '632259bda6904810a4353d3a',
  '632259bda6904810a4353d35',
  '632259bda6904810a4353d38',
  '632259bda6904810a4353d37',
  '632259bda6904810a4353d39',
  '632259bda6904810a4353d4b',
  '632259bda6904810a4353d69',
  '632259bda6904810a4353d6f',
  '632259bda6904810a4353d62',
  '632259bda6904810a4353d2b',
  '632259bda6904810a4353d4d'],
 'time': '2023-01-09T05:00:54.414000',
 'name': '60ade32c0c894b5ba1f1e4c17514db87',
 'id': '63bb9f86721edb59a9f3d0a5'}

In [13]:
from import_neo.analytics import Analytics2NeoImporter

a = Analytics2NeoImporter()
a.run()

{'nodes': 19, 'rel': 237}
