In [None]:
import itertools
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go

# offline mode
py.init_notebook_mode(connected=True)

Re-run this cell when Python code in the repository changes.

In [None]:
%%capture
import importlib
import fismatic.core as fismatic
import fismatic.helpers as helpers
importlib.reload(fismatic)
importlib.reload(helpers)

# Load files

In [None]:
files = fismatic.get_files(".")
control_set_by_file = {f: fismatic.control_set_for(f) for f in files}
control_sets = control_set_by_file.values()

# Compare files

In [None]:
stats = [fismatic.stats_for(f, cs) for f, cs in control_set_by_file.items()]
df = pd.DataFrame(stats)
df.set_index("Filename", inplace=True)
df

In [None]:
control_token_counts = helpers.flatten([cs.implementation_token_counts() for cs in control_set_by_file.values()])

data = [go.Histogram(x=control_token_counts)]
layout = go.Layout(
    title="Control token counts",
    xaxis={
        "title": "Number of tokens"
    },
    yaxis={
        "title": "Number of controls"
    }
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='basic histogram')

In [None]:
from collections import Counter
control_names = helpers.flatten([cs.control_names() for cs in control_sets])
counter = Counter(control_names)
top_controls = counter.most_common(20)
pd.DataFrame(top_controls, columns=["Control", "# occurrences"])

# Example SSP

## Control similarity matrix

In [None]:
control_set = list(control_sets)[0]
control_set.similarity_matrix().head()

## Top entities

In [None]:
top_entities = control_set.top_entities()
pd.DataFrame(top_entities, columns=["Entity", "# occurrences"])

In [None]:
top_chunks = control_set.top_proper_noun_chunks()
pd.DataFrame(top_chunks, columns=["Chunk", "# occurrences"])

# Example implementation

In [None]:
implementations = control_set.get_implementations()
implementation = list(implementations)[0]
implementation

## Noun chunks

In [None]:
list(implementation.noun_chunks)

## Entities

In [None]:
implementation.ents