# Função de remoção de outliers

In [3]:
def outliers_indexes(df, ft):
    """ 
    Função que retorna o índice dos outliers
    Embasamento matemático:
    Amplitude Interquartil => IQR = Quartil_3 - Quartil_1
    Limite Inferior => LI = Quartil_1 - 1.5*IQR
    Limite Superior => LS = Quartil_3 + 1.5*IQR
    Todos os dados que estiverem acima do limite superior ou abaixo do limite inferior, são outliers
    """
    Q1 = df[ft].quantile(0.25)
    Q3 = df[ft].quantile(0.75)
    IQR = Q3 - Q1
    
    LI = Q1 - 1.5*IQR
    LS = Q3 + 1.5*IQR
    
    lista_indices = df.index[ (df[ft] < LI) | (df[ft] > LS) ]
    
    return lista_indices


# Cria-se uma lista vazia para armazenar os índices de saídas de múltiplas colunas 
# (somente caso se quisesse tirar outliers de multiplas colunas)

def outliers_index_l(df):
    index_list = []
    for feature in ["Preço", "TotalReviews"]:
        index_list.extend(outliers_indexes(df, feature))
    outliers_index_list = outliers_indexes(df, 'Preço')
    print("Quantidade de outliers: ",len(outliers_index_list))
    
    return outliers_index_list
    
    
def remove_outliers(df, index_list):
    index_list = sorted(set(index_list))
    # Drop dos outliers
    df = df.drop(index_list)
    return df

# Importando dados

In [4]:
import pandas as pd

In [5]:
colunas = ['Descrição','TotalReviews','Preço']

In [6]:
notebook_df = pd.read_csv('output/notebook_CB.csv',sep=';')
notebook_df

Unnamed: 0,nome,num_avali,avali,preco
0,Notebook Acer Core i5-10210U 8GB 512GB SSD Tel...,297,5.0,3999.00
1,Notebook Samsung Dual Core 4GB 500GB Tela Full...,251,4.5,2349.00
2,Notebook Positivo Intel Atom Quad Core 4GB 64G...,468,4.5,1519.90
3,Notebook 2 em 1 Positivo Dual Core 4GB 64GB eM...,360,4.5,1699.00
4,Notebook Samsung Core i3-1115G4 4GB 1TB Tela F...,272,4.5,3099.00
...,...,...,...,...
251,Notebook Dell Inspiron I5-1035g1 8gb 512ssd+2t...,0,0.0,5199.00
252,Notebook Asus Ux430Un-Ih74 I7 1.8Ghz/ 16Gb/ 51...,0,0.0,5299.00
253,Notebook Dell Inspiron I5-1035g1 32gb 128 Ssd ...,0,0.0,5349.00
254,Notebook Dell Inspiron I5-1035g1 8gb 1tb Ssd T...,0,0.0,5499.00


In [7]:
smartphone_df = pd.read_csv('output/smartphone_CB.csv',sep=';')
smartphone_df

Unnamed: 0,nome,num_avali,avali,preco
0,"Smartphone Samsung Galaxy A32 Violeta 128GB, 4...",1319,5.0,1554.00
1,"Smartphone Samsung Galaxy A01 Azul 32GB, Tela ...",2769,4.5,699.00
2,Smartphone Motorola Moto E7 Power Azul Metálic...,720,5.0,759.00
3,Smartphone Motorola Moto G10 Branco Floral 64G...,1428,5.0,1199.00
4,"Smartphone Samsung Galaxy A12 Branco 64GB, Tel...",1784,4.5,1199.00
...,...,...,...,...
259,"Smartphone G9 Play 4GB RAM 64GB Tela 6,5 Motorola",0,0.0,4654.86
260,"Smartphone G9 Play 4GB RAM 64GB Tela 6,5 Motorola",0,0.0,1873.57
261,"Smartphone Xiaomi Redmi 9C Cinza 64GB, Tela de...",248,4.5,2532.28
262,Smartphone Positivo Twist 2 Go S541 Preto com ...,22,4.0,10906.47


In [8]:
smarttv_df = pd.read_csv('output/smart_tv_CB.csv',sep=';')
smarttv_df

Unnamed: 0,nome,num_avali,avali,preco
0,Smart TV LED 42” Full HD Philco PTV42G70N5CF c...,1449,5.0,1896.00
1,"Smart TV LED 32"" HD Philco PTV32D10N5SKH com D...",485,5.0,1499.00
2,"Smart TV LED 43"" Full HD TCL 43S6500FS Android...",3407,4.5,1999.00
3,"Smart TV LED 32"" HD Samsung T4300 com HDR, Sis...",2882,5.0,1599.00
4,"Smart TV LED 58"" UHD 4K Philco PTV58F80SNS com...",969,4.5,3499.00
...,...,...,...,...
263,"Smart TV Led Panasonic 50"" 4K Ultra HD com Com...",0,0.0,1952.99
264,"Smart TV OLED 55"" LG OLED55CXPSA 4K HDR com Wi...",0,0.0,3512.90
265,"Smart TV Philco 55"" PTV55F62SN 4K LED",0,0.0,12499.00
266,Smart Tv 24Tl520s Lg 24``,0,0.0,2299.90


In [9]:
geladeira_df = pd.read_csv('output/geladeira_CB.csv',sep=';')
geladeira_df

Unnamed: 0,nome,num_avali,avali,preco
0,Refrigerador Electrolux Duplex DC35A 260L - Br...,3170,4.5,1999.00
1,Refrigerador Consul CRM50HK Frost Free com Esp...,374,5.0,3099.00
2,Refrigerador Consul CRD37EB com Prateleiras Re...,4460,5.0,1999.00
3,Refrigerador Consul CRM56HB Frost Free com Esp...,745,5.0,3299.00
4,Refrigerador Brastemp BRM44HB Frost Free com C...,3660,5.0,2599.00
...,...,...,...,...
258,Refrigerador Samsung Inverter Frost Free RT53K...,4,5.0,3639.00
259,Geladeira Philco Frost Free French Door 3 Port...,0,0.0,2999.00
260,Geladeira Electrolux Automático Duplex 2 Porta...,0,0.0,2793.80
261,Geladeira Electrolux Bottom Freezer 2 Portas F...,1,5.0,5673.79


# Removendo outliers

In [2]:
outliers_index_list_notebook = outliers_index_l(notebook_df)
df_clean_notebook = remove_outliers(notebook_df, outliers_index_list_notebook)
df_clean_notebook

NameError: name 'outliers_index_l' is not defined