In [17]:
from dotenv import load_dotenv
from datetime import datetime
load_dotenv()

from function.packages.neo4jdb.generate.utils.api2neo import Analytics2NeoImporter, API2NeoImporter
from function.packages.neo4jdb.generate.utils.neo import BaseNeoImporter

## Make sure to run Local Neo4j

```shell
docker-compose up -d --build
```

In [3]:
neo = BaseNeoImporter(node_type='Services')
with neo.driver.session() as session:
    session.run("""MATCH (n) DETACH DELETE n;""")

# Use Backup in Digital Ocean Spaces

In [4]:
static_date = int(datetime.now().timestamp())
impo_services = BaseNeoImporter(node_type='Services', space='api', static_date=static_date)
impo_services.execute_date_node()
print('Services Data \n')
impo_services.run()
print(impo_services.data[0])


impo_q = BaseNeoImporter(node_type='Questions', space='api', static_date=static_date)
print('\nQuestions Data \n')
impo_q.run()
print(impo_q.data[0])

Services Data 

{'tag_nodes': 187, 'nodes': 187}
{'name': 'S. AZ Gender Alliance', 'phone': 5204777096, 'address': '2030 EAST BROADWAY', 'general_topic': 'LGBTQ', 'tags': ['LGBTQ'], 'city': 'Tuscon', 'state': 'AZ', 'lat': 32.2210873, 'lon': -110.94121, 'zip_code': 85719, 'web_site': 'http://www.sagatucson.org/', 'days': None, 'hours': None, 'id': '632259bda6904810a4353d29', 'mongo_id': '632259bda6904810a4353d29', 'main_tag': 'LGBTQ'}

Questions Data 

{'tag_nodes': 30, 'nodes': 30}
{'id': 1, 'question': 'Is anyone scaring, threatening or hurting you or your children?', 'tags': ['Domestic Violence', 'Shelter', 'Family'], 'main_tag': 'Family', 'mongo_id': 1, 'name': 'Is anyone scaring, threatening or hurting you or your children?'}


## Use API to Import to Neo4j

In [5]:
# note mhp docker services must be running locally which will be a port 80
# if not use production server: https://mhpportal.app
static_date = int(datetime.now().timestamp())
api_path = 'https://mhpportal.app'
print(f'Using API: {api_path}')
q = API2NeoImporter(node_type='Questions', api_path=api_path, static_date=static_date)
q.execute_date_node()
print('Questions Data \n')
q.run()
print(q.data[0])
print(q.tags)

s = API2NeoImporter(node_type='Services', api_path=api_path, static_date=static_date)
print('\nServices Data \n')
s.run()
print(s.data[0])
print(s.tags)


Using API: https://mhpportal.app
Questions Data 

{'tag_nodes': 30, 'nodes': 30}
{'id': 1, 'question': 'Is anyone scaring, threatening or hurting you or your children?', 'tags': ['Domestic Violence', 'Shelter', 'Family'], 'main_tag': 'Family', 'mongo_id': 1, 'name': 'Is anyone scaring, threatening or hurting you or your children?'}
['Addiction Resources', 'Adolescent', 'Child Support', 'Children', 'Disability', 'Domestic Violence', 'Education', 'Elder', 'Employment', 'Family', 'Food Insecurity', 'Food and Nutrition', 'Health Care Resources', 'Health Insurance', 'Home', 'Housing', 'Identity', 'Income', 'Indigent', 'LGBTQ', 'Legal Assistance', 'Legal Services', 'Low Income', 'Mental Health', 'Prescription Medication Assistance', 'Public Benefits', 'Shelter', 'Social Security', 'Special Education', 'Substance Abuse', 'Transportation', 'Utility Assistance', 'Women Health', 'Young Adult']

Services Data 

{'tag_nodes': 177, 'nodes': 177}
{'name': 'S. AZ Gender Alliance', 'phone': 5204777096

In [6]:
# QA wrong node type
# impo_services = BaseNeoImporter(node_type='Service')

# Cypher Queries
<img src="static/graph.png">

* https://neo4j.com/docs/graph-data-science/current/algorithms/node-similarity/
* https://neo4j.com/docs/graph-data-science/current/algorithms/knn/
* https://neo4j.com/docs/graph-data-science/current/algorithms/bfs/
* https://neo4j.com/docs/graph-data-science/current/machine-learning/linkprediction-pipelines/link-prediction/

## Show all relationships 
<hr>

```cypher
MATCH p=()-[r:TAGGED]-() RETURN p;
```

## Get Node Similiarties
<hr>

```cypher
CALL gds.graph.project(
    'myGraph',
    ['Tags', 'Services', 'Questions'],
    {
        TAGGED: {
        }
    }
);
```


```cypher
CALL gds.nodeSimilarity.write('myGraph', {
    writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'
})
YIELD nodesCompared, relationshipsWritten;
```


```cypher
MATCH p=()-[r:SIMILAR]-() WHERE r.score > 0.75 RETURN p;
```

## Link Prediction
<hr>

```cypher

CALL gds.beta.pipeline.linkPrediction.create('pipe');

                                             
CALL gds.beta.pipeline.linkPrediction.configureSplit('pipe', {
  testFraction: 0.25,
  trainFraction: 0.6,
  validationFolds: 3
})
YIELD splitConfig;


CALL gds.alpha.pipeline.linkPrediction.addMLP('pipe',
{hiddenLayerSizes: [4, 2], penalty: 1, patience: 2})
YIELD parameterSpace;


CALL gds.alpha.pipeline.linkPrediction.configureAutoTuning('pipe', {
  maxTrials: 2
}) YIELD autoTuningConfig;



CALL gds.graph.project(
  'testGraph',
  {
    Services: {
      properties: ['created']
    }
  },
  {
    TAGGED: {
      orientation: 'UNDIRECTED'
    }
  }
);


CALL gds.beta.pipeline.linkPrediction.train('testGraph', {
  pipeline: 'pipe',
  modelName: 'lp-pipeline-model',
  metrics: ['AUCPR', 'OUT_OF_BAG_ERROR'],
  targetRelationshipType: 'TAGGED',
  randomSeed: 73
}) YIELD modelInfo, modelSelectionStats
RETURN
  modelInfo.bestParameters AS winningModel,
  modelInfo.metrics.AUCPR.train.avg AS avgTrainScore,
  modelInfo.metrics.AUCPR.outerTrain AS outerTrainScore,
  modelInfo.metrics.AUCPR.test AS testScore,
  [cand IN modelSelectionStats.modelCandidates | cand.metrics.AUCPR.validation.avg] AS validationScores;
    
    
CALL gds.beta.pipeline.linkPrediction.predict.stream('testGraph', {
  modelName: 'lp-pipeline-model',
  topN: 5,
  threshold: 0.5
})
 YIELD node1, node2, probability
 RETURN gds.util.asNode(node1).name AS person1, gds.util.asNode(node2).name AS person2, probability
 ORDER BY probability DESC, person1;
```

## Get Services not tied to Question
<hr>

### Get Services not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
      AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
ORDER BY tags;
```


### Get Tags not tied to Question

```cypher
MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
WHERE NOT (n1)-[:TAGGED]-(:Questions) 
// Young Adult Resources is tied by Age Question
      AND n1.name <> 'Young Adult Resources' 
WITH n1.name as tags 
RETURN DISTINCT tags as tag
ORDER BY tag;
```

In [7]:
neo = BaseNeoImporter(node_type='Services')

with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
    WHERE NOT (n1)-[:TAGGED]-(:Questions) 
    // Young Adult Resources is tied by Age Question
          AND n1.name <> 'Young Adult Resources' 
          AND NOT (n)-[:TAGGED]-(n1)-[:TAGGED]-(:Questions)
    RETURN n.id as service_id, n.name as service, COLLECT(n1.name) as tags 
    ORDER BY tags;
    """)
    df = data.to_df()

In [8]:
df.to_dict(orient='records')

[{'service_id': '632259bda6904810a4353d7c',
  'service': 'El Rio Community Center-Northwest Dental',
  'tags': ['Dental', 'Dental']},
 {'service_id': '632259bda6904810a4353d81',
  'service': 'El Rio Southwest Dental ',
  'tags': ['Dental', 'Dental']},
 {'service_id': '632259bda6904810a4353d83',
  'service': 'El Rio Community Health Center-Congress Dental Center',
  'tags': ['Dental', 'Dental']},
 {'service_id': '632259bda6904810a4353d7d',
  'service': 'El Rio-Congress Dental Center',
  'tags': ['Dental', 'Dental']},
 {'service_id': '632259bda6904810a4353d82',
  'service': 'El Rio Community Health Center-Northwest Dental ',
  'tags': ['Dental', 'Dental']},
 {'service_id': '632259bda6904810a4353d97',
  'service': 'International Rescue Committee',
  'tags': ['Immigrant/Refugee Resources']},
 {'service_id': '632259bda6904810a4353db1',
  'service': 'International Rescue Committee',
  'tags': ['Immigrant/Refugee Resources']},
 {'service_id': '632259bda6904810a4353d9a',
  'service': 'Merciful

In [9]:
with neo.driver.session() as session:
    data = session.run("""
        MATCH (n:Services)-[:TAGGED]->(n1:Tags) 
        WHERE NOT (n1)-[:TAGGED]-(:Questions) 
        // Young Adult Resources is tied by Age Question
              AND n1.name <> 'Young Adult Resources' 
        WITH n1.name as tags 
        RETURN DISTINCT tags as tag
        ORDER BY tag;
        """)
    tag = data.values()

In [10]:
[elem for sublist in tag for elem in sublist]

['Dental', 'Immigrant/Refugee Resources', 'Prescription Medication Assistance']

# Get User Data

In [14]:
a = Analytics2NeoImporter(static_date=static_date)
a.execute_date_node(finished=True)
a.run()

{'nodes': 21}
