## Random Node Edge
### Usage
Looks for the data file in the project subdirectory `Data` which needs to be unzipped.

```
Project
├── Data
│   ├── *unzip datafile here*
│   └── web-Stanford.txt.zip
├── Sampling Code
│   └── this file  
└── Samples
    └── results
```

Sample size is set as one of: `{5%, 10%, 15%, 20%}`. 5 samples are created as separate .csv files then (manually) moved to the `Samples` folder.

In [1]:
import pandas as pd
import numpy as np
import random

In [3]:
data = pd.read_csv("../Data/web-Stanford.txt", sep="\t")

In [4]:
list_of_nodes = np.unique(np.array(data["FromNode"])) #gets the unique list of nodes
sample_size = .05*len(list_of_nodes) #so we get to hold 10% of the nodes it looks like

In [5]:
#Actual run of random node random edge
def randomNodeEdge():
    nodes_seen = set() #will keep track of the nodes that we've seen
    edges = set() #will keep track of the edges that we have
    while(len(nodes_seen) <= sample_size):
        from_node = np.random.choice(list_of_nodes)
        toNode = int(data[data.FromNode == from_node].sample()["ToNode"])
        edges.add((from_node, toNode))
        nodes_seen.add(from_node)
        nodes_seen.add(toNode)
    return edges

In [6]:
for i in range(1,6):
    e = randomNodeEdge()
    edgeHolder = pd.DataFrame(columns = ["FromNode", "ToNode"])
    for link in e:
        line = pd.DataFrame({"FromNode" : [link[0]], "ToNode" : [link[1]]})
        edgeHolder = pd.concat([edgeHolder, line], ignore_index = True)
    outputname = "OutputRNE{}%{}.csv".format(str(int(sample_size*100)), str(i))
    edgeHolder.to_csv(outputname, index = False)

In [7]:
# Single run example

edgeHolder = pd.DataFrame(columns = ["FromNode", "ToNode"])
for link in e:
    line = pd.DataFrame({"FromNode" : [link[0]], "ToNode" : [link[1]]})
    edgeHolder = pd.concat([edgeHolder, line], ignore_index = True)
    
edgeHolder.to_csv("RandomNodeEdge.csv")