# Demo of the Convergence Exploratory Data Analysis (CEDA) Tool

In [None]:
import pandas as pd
import numpy as np

## 1. Fit the model

I ran this entirely via a script in a local machine and then saved a checkpoint for this demo (see `BCL_CEDA.py`). But for reference, the code ought to look like this when fitting the model.

### 1.1 Importing Data

In [None]:
dataset = ''
text_column_name = ''
meta_data_columns = []
CEDA_checkpoint_name = 'CEDA-CKPT.pt'

In [None]:
df = pd.read_csv(dataset)
df.head()

### 1.2 Creating a CEDA model object and fitting it

In [None]:
from . import ceda_model

graph = ceda_model(
    sigma=1.5,
    device='cuda',
    wv_layers=[8,-1]
)

In [None]:
graph.fit(df[text_column_name].values)

and adding additional labels for graph metadata.

In [None]:
graph.add_labels(
    x_labels=df[meta_data_columns].to_dict(orient='records'),
    y_labels=df[meta_data_columns].to_dict(orient='records')
)

saving a model checkpoint for later.

In [None]:
graph.checkpoint(CEDA_checkpoint_name)

## 2. Visualizer and Exploratory Data Analysis (EDA)

In [None]:
from . import ceda_model, EDA

In [None]:
graph = ceda_model()
graph.load_from_checkpoint(CEDA_checkpoint_name)

In [None]:
eda = EDA(graph)

### 2.1 Labeled recurrence plot

In [None]:
fig = eda.recurrence_plot()
fig.show()

### 2.2 Looking up linked utterances

Takes an index from graph.texts and returns a json containing that graph and k-samples of texts that have greater than some degree of similarity, `min_cutoff`.

In [None]:
eda.get_linked_examples(0, min_cutoff=2.)

### 2.3 c-TF-IDF visualizer

In [None]:
fig = eda.TFIDF(k_topic_words=3, n_topics=9)
fig.show()