# Import CSVs into Neo4j

In [1]:
import os
import re
import shutil

from glob import glob

## Download and unpack Neo4j

In [2]:
version = "neo4j-community-3.1.1"
name = "neo4j/{}-unix.tar.gz".format(version)
url = "http://neo4j.com/artifact.php?name={}-unix.tar.gz".format(version)

In [3]:
!wget -O {name} {url}

--2017-03-07 19:40:12--  http://neo4j.com/artifact.php?name=neo4j-community-3.1.1-unix.tar.gz
Resolving neo4j.com (neo4j.com)... 54.197.241.3, 54.243.71.145
Connecting to neo4j.com (neo4j.com)|54.197.241.3|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 77401077 (74M) [application/x-gzip]
Saving to: ‘neo4j/neo4j-community-3.1.1-unix.tar.gz’


2017-03-07 19:40:13 (87.4 MB/s) - ‘neo4j/neo4j-community-3.1.1-unix.tar.gz’ saved [77401077/77401077]



In [4]:
!tar -xf {name} -C neo4j

---

## Determine the names of the files we want to import

In [5]:
def replace_text(path, find, repl):
    """
    Read a text file, replace the text specified by find with repl,
    and overwrite the file with the modified version.
    """
    with open(path) as read_file:
        text = read_file.read()
    pattern = re.escape(find)
    text = re.sub(pattern, repl, text)
    with open(path, 'wt') as write_file:
        write_file.write(text)


def create_instance(version, db_id, port):
    """Remove any old database files and prepare for new database."""
    floc = os.path.join("neo4j", "{}_{}".format(version, db_id))
    if os.path.exists(floc):
        shutil.rmtree(floc)
        
    neo4j_loc = os.path.join("neo4j", version)
    shutil.copytree(neo4j_loc, floc)

    # Modify neo4j-server.properties
    path = os.path.join(floc, 'conf', 'neo4j.conf')

    replace_text(path,
        "#dbms.security.auth_enabled=false",
        "dbms.security.auth_enabled=false"
    )

    replace_text(path,
        "#dbms.connector.bolt.listen_address=:7687",
        "dbms.connector.bolt.listen_address=:{}".format(port)
    )

    replace_text(path,
        "#dbms.connector.http.listen_address=:7474",
        "dbms.connector.http.listen_address=:{}".format(port+1)
    )

    replace_text(path,
        "dbms.connector.https.enabled=true",
        "dbms.connector.https.enabled=false"
    )

In [6]:
with open("../crossval_idx.txt", "r") as fin:
    crossval_idx = int(fin.read().strip())

port_0 = 7500 + 100*crossval_idx

db_name = "rephetio-v2.0"

create_instance(version, db_name, port_0)

In [7]:
perms = sorted(glob("data/permuted/hetnet_perm*"))

print(perms)

for perm_idx, perm_net in enumerate(perms, start=1):
    port = port_0 + 10*perm_idx
    db_id = "{}_perm-{}".format(db_name, perm_idx)
    create_instance(version, db_id, port)

['data/permuted/hetnet_perm-1.json.bz2', 'data/permuted/hetnet_perm-2.json.bz2', 'data/permuted/hetnet_perm-3.json.bz2', 'data/permuted/hetnet_perm-4.json.bz2', 'data/permuted/hetnet_perm-5.json.bz2']


---

In [8]:
!rm -rf neo4j/{version}