In [None]:
import json
import os

import httpx
import numpy as np
import pandas as pd
from elasticsearch import Elasticsearch
from neomodel import (
    DateProperty,
    IntegerProperty,
    Relationship,
    RelationshipFrom,
    RelationshipTo,
    StringProperty,
    StructuredNode,
    UniqueIdProperty,
    clear_neo4j_database,
    config,
    db,
)
from tqdm.notebook import tqdm

In [None]:
config.DATABASE_URL = os.environ["NEO4J_BOLT_URL"]
db.set_connection(os.environ["NEO4J_BOLT_URL"])

In [None]:
class Concept(StructuredNode):
    uid = UniqueIdProperty()
    name = StringProperty(unique_index=True, required=True)
    variant_name = RelationshipTo("VariantName", "AKA")
    stories = RelationshipTo("Story", "HAS_CONCEPT")


class Contributor(StructuredNode):
    uid = UniqueIdProperty()
    name = StringProperty(unique_index=True, required=True)
    variant_name = RelationshipTo("VariantName", "AKA")
    stories = RelationshipTo("Story", "CONTRIBUTED_TO")


class Story(StructuredNode):
    uid = UniqueIdProperty()
    title = StringProperty(unique_index=True, required=True)
    published = DateProperty()
    contributor = RelationshipFrom("Contributor", "CONTRIBUTED_TO")
    concept = RelationshipFrom("Concept", "HAS_CONCEPT")


class VariantName(StructuredNode):
    uid = UniqueIdProperty()
    name = StringProperty(unique_index=True, required=True)
    concept = RelationshipFrom("Concept", "AKA")

In [None]:
Story.nodes.all()[:5]

In [None]:
random_story_uid = np.random.choice(Story.nodes.all()).uid
random_story_uid

In [None]:
node = Story.nodes.get(uid=random_story_uid)
node

In [None]:
concepts_on_node = [concept.name for concept in node.concept.all()]
concepts_on_node

In [None]:
variants_on_concepts_on_node = [
    variant.name
    for concept in node.concept.all()
    for variant in concept.variant_name.all()
]
variants_on_concepts_on_node

# index into es using data from graph

In [None]:
import os
from elasticsearch import Elasticsearch
from tqdm.notebook import tqdm

In [None]:
es = Elasticsearch(
    os.environ["ELASTIC_HOST"],
    http_auth=(os.environ["ELASTIC_USERNAME"], os.environ["ELASTIC_PASSWORD"]),
)

In [None]:
INDEX_NAME = "graph-enriched"

In [None]:
es.indices.delete(index=INDEX_NAME, ignore=404)

In [None]:
es.indices.create(index=INDEX_NAME)

In [None]:
for story in tqdm(Story.nodes.all()):
    concepts_on_node = [concept.name for concept in node.concept.all()]
    variants_on_concepts_on_node = [
        variant.name
        for concept in node.concept.all()
        for variant in concept.variant_name.all()
    ]

    document = {
        "title": story.title,
        "published": story.published,
        "concepts": concepts_on_node,
        "variants": variants_on_concepts_on_node
    }
    
    for field, value in document.items():
        if type(value) == list:
            document[field] = ",".join(value)

    es.index(index=INDEX_NAME, document=document)