In [1]:
from uscode import USCode
from search import SearchEngine

In [2]:
def id_to_location(sec_id):
    parts = sec_id.split('/')
    return parts[-2][1:], parts[-1][1:]

def location_info(loc):
    return "Title {:3} Section {:5} ".format(*loc)

In [3]:
usc = USCode('data/usc201307/')
se = SearchEngine(usc)
cn = se.citation_network

In [None]:
cn = CitationNetwork(usc.sections())

print("Nodes", len(cn.graph.nodes))
print("Edges", len(cn.graph.edges))
total_weight = sum(w for _, _, w in cn.graph.edges.data('weight'))
print("Weight", total_weight)

In [None]:
sorted_indeg = sorted(cn.graph.nodes, key=lambda x: cn.graph.in_degree(x), reverse=True)
sorted_outdeg = sorted(cn.graph.nodes, key=lambda x: cn.graph.out_degree(x), reverse=True)

def show_node_info(node):
    print(location_info(node.location),
          "In: {:>3}".format(cn.graph.in_degree(node)),
          "Out: {:>3}".format(cn.graph.out_degree(node)))

print("Highest Indegree:")
for node in sorted_indeg[:5]:
    show_node_info(node)

print("\nHighest Outdegree:")
for node in sorted_outdeg[:5]:
    show_node_info(node)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x_range = np.arange(1000, -1, -1)
y_indeg, y_outdeg = [], []

i1 = i2 = 0
for x in x_range:
    while i1 < len(sorted_indeg) and cn.graph.in_degree(sorted_indeg[i1]) >= x:
        i1 += 1
    y_indeg.append(i1)
    
    while i2 < len(sorted_outdeg) and cn.graph.out_degree(sorted_outdeg[i2]) >= x:
        i2 += 1
    y_outdeg.append(i2)
  
with np.errstate(divide='ignore'):
    x_range = np.log10(x_range)
    y_indeg = np.log10(np.array(y_indeg) / len(cn.graph.nodes))
    y_outdeg = np.log10(np.array(y_outdeg) / len(cn.graph.nodes))

fig, axs = plt.subplots(1, 2, figsize=(15, 7.5))
for ax, y_range, name in zip(axs, [y_indeg, y_outdeg], ["Indegree", "Outdegree"]):
    ax.scatter(x_range, y_range, marker='x')
    
    
    ax.set_title("Log-Log {} Distribution".format(name))
    ax.set_xlabel("Log {}".format(name))
    ax.set_ylabel("Log Probability")
    ax.set_xticks(range(4))
    ax.set_yticks(range(-5, 1))
plt.show()

In [8]:
res = se.search('copyright AND property AND NOT legal', mode='boolean', rank='pagerank')
for sec in res[:5]:
    print(location_info(sec.location),
          "Indegree: {:4}".format(cn.graph.in_degree(sec.id)),
          "PR: {:.4e}".format(se.pagerank[sec.id]))

Title 35  Section 2      Indegree:   55 PR: 1.4889e-04
Title 19  Section 1337   Indegree:   24 PR: 1.2442e-04
Title 19  Section 2412   Indegree:   18 PR: 9.8308e-05
Title 19  Section 2462   Indegree:   15 PR: 9.2216e-05
Title 19  Section 2702   Indegree:   13 PR: 8.3319e-05
