In [2]:
import pandas as pd
import numpy as np

In [3]:
data = {
    'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
    'order_date': ['2025-07-01', '2025-07-02', '2025-07-03', '2025-07-04',
                   '2025-07-05', '2025-07-06', '2025-07-07', '2025-07-08',
                   '2025-07-09', '2025-07-10', '2025-07-11', '2025-07-12'],
    'region': ['North', 'South', 'East', 'West', 'North', 'South', 'East',
               'West', 'North', 'South', 'East', 'West'],
    'sales': [250.0, np.nan, 150.0, 300.0, np.nan, 200.0, 100.0, np.nan, 180.0, 220.0, np.nan, 275.0],
    'discount': [5.0, 0.0, np.nan, 10.0, np.nan, 0.0, -5.0, 20.0, 5.0,
                 np.nan, -10.0, 15.0]
}

df = pd.DataFrame(data)

In [3]:
#Mostramos los primeros 10 registros

df.head(12)

Unnamed: 0,index,order_date,region,sales,discount
0,0,2025-07-01,North,250.0,5.0
1,1,2025-07-02,South,,0.0
2,2,2025-07-03,East,150.0,
3,3,2025-07-04,West,300.0,10.0
4,4,2025-07-05,North,,
5,5,2025-07-06,South,200.0,0.0
6,6,2025-07-07,East,100.0,-5.0
7,7,2025-07-08,West,,20.0
8,8,2025-07-09,North,180.0,5.0
9,9,2025-07-10,South,220.0,


In [4]:
df['sales_filled'] = df['sales'].fillna(0)
df.head(12)

Unnamed: 0,index,order_date,region,sales,discount,sales_filled
0,0,2025-07-01,North,250.0,5.0,250.0
1,1,2025-07-02,South,,0.0,0.0
2,2,2025-07-03,East,150.0,,150.0
3,3,2025-07-04,West,300.0,10.0,300.0
4,4,2025-07-05,North,,,0.0
5,5,2025-07-06,South,200.0,0.0,200.0
6,6,2025-07-07,East,100.0,-5.0,100.0
7,7,2025-07-08,West,,20.0,0.0
8,8,2025-07-09,North,180.0,5.0,180.0
9,9,2025-07-10,South,220.0,,220.0


# 2. Cleaning

In [5]:
df['discount'] = df['discount'].abs() #obtenemos el valor positivo
df.iloc[6] #mostramos solo la fila 6

index                    6
order_date      2025-07-07
region                East
sales                100.0
discount               5.0
sales_filled         100.0
Name: 6, dtype: object

In [6]:
#eliminamos los campos vacios pero how= se asegura de que la condicion haga efercto  en todas las columnas se cumpla 
df_clean = df.dropna(subset=['sales', 'discount'], how='all') #subtet indica las columnas con las que vas a trabajar 
df_clean.head()

Unnamed: 0,index,order_date,region,sales,discount,sales_filled
0,0,2025-07-01,North,250.0,5.0,250.0
1,1,2025-07-02,South,,0.0,0.0
2,2,2025-07-03,East,150.0,,150.0
3,3,2025-07-04,West,300.0,10.0,300.0
5,5,2025-07-06,South,200.0,0.0,200.0


# filtering


In [6]:
df_filtered = df[(df['region'] == 'West') & (df['sales'].isna())] #isna te regresa un true o false (te dice si esta vacio no ) y fillna va rellenar los vacios con lo que tru le digas 
df_filtered.head()

Unnamed: 0,index,order_date,region,sales,discount
7,7,2025-07-08,West,,20.0


# Filling

In [8]:
df_discount = df['discount'].fillna(df['discount'].mean()) #Nos dio una serie con los valores na con la media

df['discount'] = df_discount
df.iloc[2]

index                    2
order_date      2025-07-03
region                East
sales                150.0
discount          7.777778
sales_filled         150.0
Name: 2, dtype: object

# New columns


In [13]:
df['net_sales'] = df.loc[(df['sales'].notna()) & (df ['discount'].notna()), 'sales'] * (1 - df['discount']/100)
df

Unnamed: 0,index,order_date,region,sales,discount,sales_filled,net_sales
0,0,2025-07-01,North,250.0,5.0,250.0,237.5
1,1,2025-07-02,South,,0.0,0.0,
2,2,2025-07-03,East,150.0,7.777778,150.0,138.333333
3,3,2025-07-04,West,300.0,10.0,300.0,270.0
4,4,2025-07-05,North,,7.777778,0.0,
5,5,2025-07-06,South,200.0,0.0,200.0,200.0
6,6,2025-07-07,East,100.0,5.0,100.0,95.0
7,7,2025-07-08,West,,20.0,0.0,
8,8,2025-07-09,North,180.0,5.0,180.0,171.0
9,9,2025-07-10,South,220.0,7.777778,220.0,202.888889


In [5]:
df_sorting = df[df['sales'].notna()].sort_values(by='order_date', ascending=False)
df_sorting

Unnamed: 0,index,order_date,region,sales,discount
11,11,2025-07-12,West,275.0,15.0
9,9,2025-07-10,South,220.0,
8,8,2025-07-09,North,180.0,5.0
6,6,2025-07-07,East,100.0,-5.0
5,5,2025-07-06,South,200.0,0.0
3,3,2025-07-04,West,300.0,10.0
2,2,2025-07-03,East,150.0,
0,0,2025-07-01,North,250.0,5.0
