In [179]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [180]:
mercadodf = pd.read_csv('ml_analysis_df.csv')

In [181]:
mercadodf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 292 entries, 0 to 291
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   292 non-null    object
 1   price   292 non-null    int64 
 2   link    292 non-null    object
dtypes: int64(1), object(2)
memory usage: 7.0+ KB


In [182]:
mercadodf.head(10)

Unnamed: 0,title,price,link
0,Apple iPhone 14 (128 GB) - Estelar - Distribui...,5499,https://www.mercadolivre.com.br/apple-iphone-1...
1,Apple iPhone 13 (128 GB) - Estelar - Distribui...,7599,https://www.mercadolivre.com.br/apple-iphone-1...
2,Apple iPhone 13 (128 GB) Meia-noite - Distribu...,7599,https://www.mercadolivre.com.br/apple-iphone-1...
3,Apple iPhone 14 (128 GB) - Meia noite,5499,https://www.mercadolivre.com.br/apple-iphone-1...
4,Apple iPhone 15 (128 GB) - Preto - Distribuido...,5319,https://www.mercadolivre.com.br/apple-iphone-1...
5,Apple iPhone 14 Plus (128 Gb) - Estelar - Dist...,4670,https://www.mercadolivre.com.br/apple-iphone-1...
6,Apple iPhone 14 (128 GB) - Roxo - Distribuidor...,5499,https://www.mercadolivre.com.br/apple-iphone-1...
7,Apple iPhone 12 (128 GB) - Preto - Distribuido...,3999,https://www.mercadolivre.com.br/apple-iphone-1...
8,Apple iPhone 14 Plus (128 GB) - Meia-noite - D...,4670,https://www.mercadolivre.com.br/apple-iphone-1...
9,Apple iPhone 15 (128 GB) - Amarelo - Distribui...,7438,https://www.mercadolivre.com.br/apple-iphone-1...


In [183]:
model_mapping = {'XS': 16, 'XR': 17, 'SE': 18, 'X': 19}

def extract_version(title):
    for model, version in model_mapping.items():
        if model in title.split(' '):
            return version
    
    numbers = re.findall(r'\d+', title)
    
    if numbers:
        number = int(numbers[0])
        if 2 < number < 16:
            return number
    return None


In [184]:
mercadodf['iphone_version'] = mercadodf['title'].apply(extract_version)

In [185]:
mercadodf['iphone_version'].sort_values().unique()

array([ 3,  4,  5,  6,  7,  8, 11, 12, 13, 14, 15, 16, 17, 18, 19],
      dtype=int64)

In [186]:
mercadodf.describe()

Unnamed: 0,price,iphone_version
count,292.0,292.0
mean,4025.667808,12.688356
std,3397.343982,3.692958
min,69.0,3.0
25%,1326.0,11.0
50%,2999.0,14.0
75%,5863.0,15.0
max,24500.0,19.0


In [187]:
version_by_price = mercadodf.groupby('iphone_version')['price'].agg(['min', 'max']).sort_values(by='max', ascending=False)

In [188]:
version_by_price

Unnamed: 0_level_0,min,max
iphone_version,Unnamed: 1_level_1,Unnamed: 2_level_1
13,1199,24500
15,4889,13559
14,4299,10589
3,339,5200
12,1899,5198
11,1789,3890
16,1419,3499
17,1279,2699
18,363,2589
19,1409,1800


In [189]:
min_prices = []
max_prices = []
versions = []

for version in version_by_price.index:
    min_prices.append(mercadodf.loc[(mercadodf['iphone_version'] == version) & (mercadodf['price'] == version_by_price.loc[version]['min'])]['link'].values)
    max_prices.append(mercadodf.loc[(mercadodf['iphone_version'] == version) & (mercadodf['price'] == version_by_price.loc[version]['max'])]['link'].values)
    versions.append(version)

In [190]:
minmax_pricesdf = pd.DataFrame(data=versions, columns=['version'])
minmax_pricesdf['min_price'] = min_prices
minmax_pricesdf['max_price'] = max_prices

In [191]:
minmax_pricesdf[minmax_pricesdf['version'] == 12]['min_price'].values

array([array(['https://produto.mercadolivre.com.br/MLB-3741163937-vitrine-iphone-12-mini-64gb-blue-5g-_JM?searchVariation=180734340728#searchVariation%3D180734340728%26position%3D29%26search_layout%3Dstack%26type%3Ditem%26tracking_id%3D20b99de1-0bd1-4d52-9160-31a4498d06fe'],
             dtype=object)                                                                                                                                                                                                                                                         ],
      dtype=object)

In [192]:
minmax_pricesdf.to_csv('minmax_pricesdf.csv', index=False)