In [None]:
import pandas as pd
import json
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
import spacy
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [None]:
with open('intents.json', 'r') as file:
    data = json.load(file)

In [None]:
intents = data['intents']

rows = []

for intent in intents:
    tag = intent['tag']
    patterns = intent['patterns']
    responses = intent['responses']
    
    for pattern, response in zip(patterns, responses):
        rows.append({'tag': tag, 'pattern': pattern, 'response': response})

df = pd.DataFrame(rows)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
print("Null Values:", df.isnull().sum())
print("Duplicate Values:", df.duplicated().sum())

In [None]:
labels = df['tag'].unique()
values = df['tag'].value_counts().tolist()

fig = go.Figure(data=[go.Bar(x=labels, y=values)])
fig.update_layout(title_text='Intent Distribution')
fig.show()

### Preprocessing

In [None]:
nlp = spacy.load("en_core_web_sm")

def spacy_tokenize(text):
    doc = nlp(text)
    new_text = [token.text.lower() for token in doc]
    return " ".join(new_text)

df['pattern'] = df['pattern'].apply(spacy_tokenize)

In [None]:
df

In [None]:
pd.set_option('display.max_rows', None)  
tag_counts = df['tag'].value_counts()
print(tag_counts)
pd.reset_option('display.max_rows')  

In [None]:
for tag in df['tag'].unique():
    count = (df['tag'] == tag).sum()
    if count < 3:
        df = pd.concat([df, df[df['tag'] == tag]], ignore_index=True)

tag_counts = df['tag'].value_counts()
print(tag_counts)