# Requisitos previos

In [None]:
!pip install gradio
!pip install neattext
!pip install emoji
!pip install scikit-learn==1.4.2 

!pip install joblib
!pip install nltk

In [None]:
import joblib
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
# from googletrans import Translator

In [None]:
import gradio as gr

In [None]:
# Funcion axuiliar para saber si estoy en el collab y usar su path o el del proyecto de github
def is_running_on_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False
    
from enum import Enum
# Funcion auxiliar para luego entrenar varios modelos con una sola ejecucion
class Modelos(Enum):
    LOGISTIC_REGRESSION = 'logistic_regression'
    DECISION_TREE = 'decision_tree'
    MULTINOMIAL = 'multinomial'
    BERNOULLI = 'bernoulli'
    GAUSIAN = 'gausian'

# Funcion procesador texto

In [None]:
import neattext.functions as nfx
from nltk import pos_tag, word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from string import punctuation
import emoji

wnl = WordNetLemmatizer()

def penn2morphy(penntag):
    """ Converts Penn Treebank tags to WordNet. """
    morphy_tag = {'NN':'n', 'JJ':'a',
                  'VB':'v', 'RB':'r'}
    try:
        return morphy_tag[penntag[:2]]
    except:
        return 'n'


stopwords_en = stopwords.words('english')
stopwords_en = set(stopwords_en).union(set(punctuation))

my_custom_stopwords = {'’', "n't", "'m", "'s", "'ve", '...', 'ca', "''", '``', '\u200d', 'im', 'na', "'ll", '..', 'u', "'re", "'d", '--', '”', '“', '\u200f\u200f\u200e', '....', 'ㅤ','\u200e\u200f\u200f\u200e', 'x200b', 'ive', '.-', '\u200e', '‘'}

stopwords_en = stopwords_en.union(my_custom_stopwords)


def preprocessing_function(text):
    words = []

    for word, tag in pos_tag(word_tokenize(nfx.clean_text(text))):
        word_lemmatized = wnl.lemmatize(word.lower(), pos=penn2morphy(tag))

        if '\u200b' in word_lemmatized:
            continue

        if word_lemmatized not in stopwords_en and not word_lemmatized.isdigit() and not emoji.purely_emoji(word_lemmatized):
            words.append(word_lemmatized)

    return words

# Carga modelo entrenado

In [None]:
# MODIFICAR ESTOS PARAMETROS PARA LA CARGA
# --------------------------------------------------------
nombre_modelo_prev_entrenado = Modelos.LOGISTIC_REGRESSION.value
# usar formato '25k' para 25.000 filas ejemplo
cant_prev_entrenada = '50k'

path_base_modelo_generado = '/content/' if is_running_on_colab() else '.\\tentativa_suicidio\\entrenados\\'
path_modelo_generado = path_base_modelo_generado + nombre_modelo_prev_entrenado + '_' + cant_prev_entrenada
# --------------------------------------------------------

model = joblib.load(path_modelo_generado + '_model.pkl')
vect = joblib.load(path_modelo_generado + '_vector.pkl')

print(type(vect))
print(type(model))

# Funcion predict

In [None]:
# translator = Translator()

def get_tentativa_suicidio(text_input, english_text=False):
    texto_a_analizar = text_input #if english_text else translator.translate(text_input, dest='en').text
        
    texto_preprocesado = ' '.join(preprocessing_function(texto_a_analizar))
    texto_vectorizado = vect.transform([texto_preprocesado])

    return model.predict(texto_vectorizado)[0]

# Interfaz

In [None]:
def get_answer(message, history):
    tentativa_suicidio = get_tentativa_suicidio(message, False)
    return "Suicida" if tentativa_suicidio else "No suicida"

gr.ChatInterface(
    get_answer,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Por favor ingrese un texto para analizarlo", container=False, scale=7),
    title="Deteccion suicidio y violencia en textos",
    description="Universidad Nacional de la Matanza - Grupo 2 - IA Aplicada - 1c2024",
    theme="soft",
    examples=["I want to jump from a bridge",
                "I want to suicide me",
                 "I hate my parents with all my heart",
                 "I hate all about this life",
                 "I cry every night",
                 "I don't know what is happen to me, but I don't want live anymore",
                 "Nose que me esta pasando, pero ya no quiero vivir mas",
                 "Non so cosa mi sta succedendo, ma non voglio più vivere."],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Limpiar todo",
    clear_btn="Limpiar Historial Charla",
).launch()
