## data loading


In [1]:
import pandas as pd

# Load the dataset
df = pd.read_excel("nutritrack_dataset_600.xlsx")

# Show first 5 rows
print("----- First 5 Rows of Dataset -----")
print(df.head())

# Show dataset shape
print("\n----- Dataset Shape (Rows, Columns) -----")
print(df.shape)

# Show all column names
print("\n----- Column Names -----")
print(df.columns)

----- First 5 Rows of Dataset -----
  meal_id                                   meal_description cooking_method  \
0  meal_1  30g potato; 200g rice; 200g egg; 150g butter; ...        steamed   
1  meal_2                                           50g beef         boiled   
2  meal_3                                         200g beans         boiled   
3  meal_4  30g banana; 50g butter; 200g apple; 100g lenti...        roasted   
4  meal_5                            100g cheese; 30g potato            raw   

   num_ingredients  estimated_calories  
0                6              1720.4  
1                1               127.0  
2                1               717.9  
3                5               768.4  
4                2               423.7  

----- Dataset Shape (Rows, Columns) -----
(600, 5)

----- Column Names -----
Index(['meal_id', 'meal_description', 'cooking_method', 'num_ingredients',
       'estimated_calories'],
      dtype='object')


## data exploration





In [None]:
# ----- Basic Information About Dataset -----
print("----- Dataset Info -----")
print(df.info())


----- Dataset Info -----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   meal_id             600 non-null    object 
 1   meal_description    600 non-null    object 
 2   cooking_method      600 non-null    object 
 3   num_ingredients     600 non-null    int64  
 4   estimated_calories  600 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 23.6+ KB
None


In [3]:
print("\n----- Missing Values in Each Column -----")
print(df.isnull().sum())


----- Missing Values in Each Column -----
meal_id               0
meal_description      0
cooking_method        0
num_ingredients       0
estimated_calories    0
dtype: int64


In [4]:
print("\n----- Statistical Summary -----")
print(df.describe())


----- Statistical Summary -----
       num_ingredients  estimated_calories
count       600.000000          600.000000
mean          3.415000          769.642667
std           1.718827          661.309217
min           1.000000            7.900000
25%           2.000000          278.375000
50%           3.000000          606.200000
75%           5.000000         1075.600000
max           6.000000         3722.500000


In [5]:
print("\n----- Number of Duplicate Rows -----")
print(df.duplicated().sum())



----- Number of Duplicate Rows -----
0


In [6]:
print("\n----- Unique Values in Categorical Columns -----")
for col in df.select_dtypes(include=['object']).columns:
    print(f"\nColumn: {col}")
    print(df[col].unique())


----- Unique Values in Categorical Columns -----

Column: meal_id
['meal_1' 'meal_2' 'meal_3' 'meal_4' 'meal_5' 'meal_6' 'meal_7' 'meal_8'
 'meal_9' 'meal_10' 'meal_11' 'meal_12' 'meal_13' 'meal_14' 'meal_15'
 'meal_16' 'meal_17' 'meal_18' 'meal_19' 'meal_20' 'meal_21' 'meal_22'
 'meal_23' 'meal_24' 'meal_25' 'meal_26' 'meal_27' 'meal_28' 'meal_29'
 'meal_30' 'meal_31' 'meal_32' 'meal_33' 'meal_34' 'meal_35' 'meal_36'
 'meal_37' 'meal_38' 'meal_39' 'meal_40' 'meal_41' 'meal_42' 'meal_43'
 'meal_44' 'meal_45' 'meal_46' 'meal_47' 'meal_48' 'meal_49' 'meal_50'
 'meal_51' 'meal_52' 'meal_53' 'meal_54' 'meal_55' 'meal_56' 'meal_57'
 'meal_58' 'meal_59' 'meal_60' 'meal_61' 'meal_62' 'meal_63' 'meal_64'
 'meal_65' 'meal_66' 'meal_67' 'meal_68' 'meal_69' 'meal_70' 'meal_71'
 'meal_72' 'meal_73' 'meal_74' 'meal_75' 'meal_76' 'meal_77' 'meal_78'
 'meal_79' 'meal_80' 'meal_81' 'meal_82' 'meal_83' 'meal_84' 'meal_85'
 'meal_86' 'meal_87' 'meal_88' 'meal_89' 'meal_90' 'meal_91' 'meal_92'
 'meal_93