In [14]:
from dotenv import load_dotenv
load_dotenv()

# TODO: Add Age Logic as Question and Tag

from import_neo.base import BaseNeoImporter
from import_neo.base_api import API2NeoImporter

## Make sure to run Local Neo4j

```shell
docker-compose up -d --build
```

In [2]:
# changes this to run DigitalOcean Spaces or MHP-API
RUN_SPACES = True

In [3]:
neo = BaseNeoImporter(node_type='Services')
with neo.driver.session() as session:
    session.run("""MATCH (n) DETACH DELETE n;""")

# Use Backup in Digital Ocean Spaces

In [4]:
if RUN_SPACES:
    impo_services = BaseNeoImporter(node_type='Services', space='api')
    print('Services Data \n')
    impo_services.run()
    print(impo_services.data[0])
    
    
    impo_q = BaseNeoImporter(node_type='Questions', space='api')
    print('\nQuestions Data \n')
    impo_q.run()
    print(impo_q.data[0])

Services Data 

{'tag_nodes': 31, 'nodes': 188, 'rel': 336}
{'name': 'S. AZ Gender Alliance', 'phone': 5204777096, 'address': '2030 EAST BROADWAY', 'general_topic': 'LGBTQ', 'tags': ['LGBTQ'], 'city': 'Tuscon', 'state': 'AZ', 'lat': 32.2210873, 'lon': -110.94121, 'zip_code': 85719, 'web_site': 'http://www.sagatucson.org/', 'days': None, 'hours': None, 'id': '632259bda6904810a4353d29', 'mongo_id': '632259bda6904810a4353d29', 'main_tag': 'LGBTQ'}

Questions Data 

{'tag_nodes': 30, 'nodes': 28, 'rel': 106}
{'id': 1, 'question': 'Is anyone scaring, threatening or hurting you or your children?', 'tags': ['Domestic Violence', 'Shelter', 'Family'], 'main_tag': 'Family', 'mongo_id': 1, 'name': 'Is anyone scaring, threatening or hurting you or your children?'}


## Use API to Import to Neo4j

In [5]:
if not RUN_SPACES:
    # note mhp docker services must be running locally which will be a port 80
    # if not use production server: https://mhpportal.app
    api_path = 'https://mhpportal.app'
    print(f'Using API: {api_path}')
    q = API2NeoImporter(node_type='Questions', api_path=api_path)
    print('Questions Data \n')
    q.run()
    print(q.data[0])
    print(q.tags)
    
    s = API2NeoImporter(node_type='Services', api_path=api_path)
    print('\nServices Data \n')
    s.run()
    print(s.data[0])
    print(s.tags)


In [6]:
# QA wrong node type
# impo_services = BaseNeoImporter(node_type='Service')

# Cypher Queries
<img src="static/graph.png">

* https://neo4j.com/docs/graph-data-science/current/algorithms/node-similarity/
* https://neo4j.com/docs/graph-data-science/current/algorithms/knn/
* https://neo4j.com/docs/graph-data-science/current/algorithms/bfs/
* https://neo4j.com/docs/graph-data-science/current/machine-learning/linkprediction-pipelines/link-prediction/

## Show all relationships 
<hr>

```cypher
MATCH p=()-[r:TAGGED]-() RETURN p;
```

## Get Node Similiarties
<hr>

```cypher
CALL gds.graph.project(
    'myGraph',
    ['Tags', 'Services', 'Questions'],
    {
        TAGGED: {
        }
    }
);
```


```cypher
CALL gds.nodeSimilarity.write('myGraph', {
    writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'
})
YIELD nodesCompared, relationshipsWritten;
```


```cypher
MATCH p=()-[r:SIMILAR]-() WHERE r.score > 0.75 RETURN p;
```

## Link Prediction
<hr>

```cypher

CALL gds.beta.pipeline.linkPrediction.create('pipe');

                                             
CALL gds.beta.pipeline.linkPrediction.configureSplit('pipe', {
  testFraction: 0.25,
  trainFraction: 0.6,
  validationFolds: 3
})
YIELD splitConfig;


CALL gds.alpha.pipeline.linkPrediction.addMLP('pipe',
{hiddenLayerSizes: [4, 2], penalty: 1, patience: 2})
YIELD parameterSpace;


CALL gds.alpha.pipeline.linkPrediction.configureAutoTuning('pipe', {
  maxTrials: 2
}) YIELD autoTuningConfig;



CALL gds.graph.project(
  'testGraph',
  {
    Services: {
      properties: ['created']
    }
  },
  {
    TAGGED: {
      orientation: 'UNDIRECTED'
    }
  }
);


CALL gds.beta.pipeline.linkPrediction.train('testGraph', {
  pipeline: 'pipe',
  modelName: 'lp-pipeline-model',
  metrics: ['AUCPR', 'OUT_OF_BAG_ERROR'],
  targetRelationshipType: 'TAGGED',
  randomSeed: 73
}) YIELD modelInfo, modelSelectionStats
RETURN
  modelInfo.bestParameters AS winningModel,
  modelInfo.metrics.AUCPR.train.avg AS avgTrainScore,
  modelInfo.metrics.AUCPR.outerTrain AS outerTrainScore,
  modelInfo.metrics.AUCPR.test AS testScore,
  [cand IN modelSelectionStats.modelCandidates | cand.metrics.AUCPR.validation.avg] AS validationScores;
    
    
CALL gds.beta.pipeline.linkPrediction.predict.stream('testGraph', {
  modelName: 'lp-pipeline-model',
  topN: 5,
  threshold: 0.5
})
 YIELD node1, node2, probability
 RETURN gds.util.asNode(node1).name AS person1, gds.util.asNode(node2).name AS person2, probability
 ORDER BY probability DESC, person1;
```

## Get Services not tied to Question
<hr>

### Get Services not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
      AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
ORDER BY tags;
```


### Get Tags not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
WITH n1.name as tags 
RETURN DISTINCT tags as tag
ORDER BY tag;
```

In [22]:
neo = BaseNeoImporter(node_type='Services')

with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
    WHERE NOT (n1)-[:TAGGED]-(:Questions) 
    // Young Adult Resources is tied by Age Question
          AND n1.name <> 'Young Adult Resources' 
          AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
    RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
    ORDER BY tags;
    """)
    df = data.to_df()

In [31]:
df.to_dict(orient='records')

[{'service_id': '632259bda6904810a4353dc2',
  'service': 'COPE Community Services, Inc.',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dc1',
  'service': 'COPE Community Services',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dc0',
  'service': 'Cope Behavioral Services',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dbf',
  'service': 'COPE Community Services',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dbe',
  'service': 'COPE Community Services, Inc.',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dbd',
  'service': 'Cope Behavioral Services',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dbc',
  'service': 'COPE Community Services, Inc.',
  'tags': ['Addiction Resources']},
 {'service_id': '632259bda6904810a4353dbb',
  'service': 'COPE Community Services, Inc. -Youth And Family Services',
  'tags': ['Addiction R

In [24]:
with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
        WHERE NOT (n1)-[:TAGGED]-(:Questions) 
        // Young Adult Resources is tied by Age Question
              AND n1.name <> 'Young Adult Resources' 
        WITH n1.name as tags 
        RETURN DISTINCT tags as tag
        ORDER BY tag;
        """)
    tag = data.values()

In [30]:
[elem for sublist in tag for elem in sublist]

['Addiction Resources',
 'Dental',
 'Health Care Resources',
 'Immigrant/Refugee Resources',
 'Prescription Medication Assistance',
 'Substance Abuse',
 'Utility Assistance']

# Get User Data

In [11]:
zip_codes_url = 'https://mhpportal.app/api/v1/platform/zip_codes'
data_zip_url = 'https://mhpportal.app/api/v1/platform/data/%s'

import requests

resp = requests.get(zip_codes_url)
z_c = resp.json()
z_c

json_data = []
for z in z_c:
    resp = requests.get(data_zip_url % z['id'])
    json_data = json_data + resp.json()

In [12]:
json_data[0]

{'dob': 1994,
 'zip_code': 85732,
 'answers': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'top_services': ['632259bda6904810a4353d4b',
  '632259bda6904810a4353d4e',
  '632259bda6904810a4353d4d',
  '632259bda6904810a4353de5',
  '632259bda6904810a4353d4c',
  '632259bda6904810a4353d60',
  '632259bda6904810a4353d48',
  '632259bda6904810a4353d5e',
  '632259bda6904810a4353d61',
  '632259bda6904810a4353d50',
  '632259bda6904810a4353d4a',
  '632259bda6904810a4353d2f',
  '632259bda6904810a4353d47',
  '632259bda6904810a4353d2a',
  '632259bda6904810a4353d58'],
 'time': '2023-01-19T03:27:56.937000',
 'name': '4fadbd1eaac34ca88c4c9d69dd249b87',
 'id': '63c8b8bc6abf8b7271145307'}

In [13]:
from import_neo.analytics import Analytics2NeoImporter

a = Analytics2NeoImporter()
a.run()

{'nodes': 19, 'rel': 237}
