<h1 style = "text-align: center;"><strong>NUTRITION NOTEBOOK</strong></h1>

---
<h2 style = "text-align: center;"><em>State the Problem</em></h2>

<h3 style = "text-align: center;">"5W1H" Method</h3>

<p style = "text-align: center;">I want to analyse food contents, to create healthy menus.</p>

---
<h2 style = "text-align: center;"><em>Prepare the Tools</em></h2>

<h3 style = "text-align: center;">Resources</h3>

<ul>
    <li>Domain Documentation
        <ul>
            <li><a href="https://en.wikipedia.org/wiki/Statistics">Wikipedia: Nutrition</a></li>
        </ul>
    </li>
    <br>
    <li>Data Documentation
        <ul>
            <li><a href="https://www.data.gouv.fr/fr/datasets/table-de-composition-nutritionnelle-des-aliments-ciqual/">Data.gouv: Ciqual Table Information</a></li>
            <li><a href="https://ciqual.anses.fr/#/cms/download/node/20">Ciqual.anses: Ciqual Table Download Page</a></li>
        </ul>
    </li>
    <br>
    <li>Technical Documentation
        <ul>
            <li><a href="https://pandas.pydata.org/docs/">Pandas</a></li>
            <li><a href="https://dev.mysql.com/doc/">MySQL</a></li>
        </ul>
    </li>
</ul>

<h3 style = "text-align: center;">Libraries</h3>

In [9]:
# Import libraries
import os
import pandas as pd

<h3 style = "text-align: center;">Functions</h3>

In [11]:
# Define a function to check the directory
def check_directory():
    """
    Purpose:
        Check the current directory and its contents
    Parameters:
        None
    Returns:
        None: This function prints the current directory and its contents to the console
    """
    try:
        current_directory = os.getcwd()
        print("DIRECTORY")
        print(current_directory)
        print()
        print("FILES IN DIRECTORY")
        files = os.listdir(current_directory)
        if files:
            for file in files:
                print(file)
            print()
        else:
            print("Files not found")
            
    except Exception as e:
        print(f"Error: {e}")
        raise


# Define a function to check a dataframe
def check_dataframe(dataframe: pd.DataFrame):
    """
    Purpose:
        Check a DataFrame for general information
    Parameters:
        dataframe (pd.DataFrame): DataFrame to check
    Returns:
        None: This function prints general information to the console
    """
    try:
        print("GENERAL INFORMATION")
        print(dataframe.info())
        print()
        print("FIRST ROWS")
        print(dataframe.head())
        print()
        print("LAST ROWS")
        print(dataframe.tail())
        print()
        print("DUPLICATED ROWS")
        print(f"Count of duplicated rows: {dataframe.duplicated().sum()}")
        print()
        print("MISSING VALUES BY COLUMN")
        print(dataframe.isna().sum().reset_index(name = "missing_values_count").rename(columns = {"index": "column_name"}))
        print()
        print("UNIQUE VALUES BY COLUMN")
        print(dataframe.nunique().reset_index(name = "unique_values_count").rename(columns = {"index": "column_name"}))
        print()
        print("STATISTICS")
        print(dataframe.describe(include = "all"))
        
    except Exception as e:
        print(f"Error: {e}")
        raise


# Convert a dataframe into a file (CSV, JSON, XLS, XML)
def dataframe_to_file(dataframe: pd.DataFrame, filename: str, file_format: str, delimiter: str = ","):
    """
    Purpose: 
        Convert a DataFrame into a specified file format (CSV, JSON, XLS, or XML)
    Parameters:
        dataframe (pd.DataFrame): The DataFrame to convert into a file
        filename (str): The filename for the output file
        file_format (str): The format of the file ("csv", "json", "xls", "xml")
        delimiter (str): The delimiter to use if the file format is CSV
    Returns:
        str: A message confirming the file creation 
        or 
        None: This funciton prints an error message; if the conversion fails
    """

    try:
        if os.path.isfile(filename):
            print( f"Creation of '{filename}' failed")

        if file_format.lower() == "csv":
            dataframe.to_csv(filename, sep = delimiter, index = False)
        
        elif file_format.lower() == "json":
            dataframe.to_json(filename, orient = "records", lines = True)
        
        elif file_format.lower() == "xls" or file_format.lower() == "xlsx":
            dataframe.to_excel(filename, index = False)
        
        elif file_format.lower() == "xml":
            dataframe.to_xml(filename, index = False)
        
        else:
            print("Error: Unsupported file format")

        print(f"Creation of '{filename}' successful")
            
    except Exception as e:
        print(f"Error: {e}")
        raise

---
<h2 style = "text-align: center;"><em>Collect Data</em></h2>

<h3 style = "text-align: center;">Source(s)</h3>

In [14]:
# Check the directory        
check_directory()

DIRECTORY
C:\Users\willi\data_resources\portfolio\nutrition_project

FILES IN DIRECTORY
.ipynb_checkpoints
ciqual_table.xls
nutrition_database_carbohydrates_queries.sql
nutrition_database_fats_queries.sql
nutrition_database_macronutrients_queries.sql
nutrition_database_minerals_queries.sql
nutrition_database_structure.sql
nutrition_database_vitamins_queries.sql
nutrition_notebook.ipynb
nutrition_notebook_v1.ipynb
table_ciqual_documentation.pdf



In [15]:
# Store the data in a dataframe
data_file_path = r"C:\Users\willi\data_resources\portfolio\nutrition_project\ciqual_table.xls"
nutrition_df = pd.read_excel(data_file_path, engine = "xlrd")

<h3 style = "text-align: center;">Check(s)</h3>

In [17]:
# Check the dataframe
check_dataframe(nutrition_df)

GENERAL INFORMATION
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3186 entries, 0 to 3185
Data columns (total 76 columns):
 #   Column                                                 Non-Null Count  Dtype 
---  ------                                                 --------------  ----- 
 0   alim_grp_code                                          3186 non-null   int64 
 1   alim_ssgrp_code                                        3186 non-null   int64 
 2   alim_ssssgrp_code                                      3186 non-null   int64 
 3   alim_grp_nom_fr                                        3141 non-null   object
 4   alim_ssgrp_nom_fr                                      3141 non-null   object
 5   alim_ssssgrp_nom_fr                                    3141 non-null   object
 6   alim_code                                              3186 non-null   int64 
 7   alim_nom_fr                                            3186 non-null   object
 8   alim_nom_sci                          

<h3 style = "text-align: center;">Comment(s)</h3>

<ul>
    <li>Column "alim_grp_code"
        <ul>Code of the food group (e.g. "11")</ul>
    </li>
    <br>
    <li>Column "alim_ssgrp_code"
        <ul>Code of the food sub-group (e.g. "1103")</ul>
    </li>
    <br>
    <li>Column "alim_ssssgrp_code"
        <ul>Code of the food sub-sub-group (e.g. "0")</ul>
    </li>
    <br>
    <li>Column "alim_grp_nom_fr"
        <ul>French name of the food group (e.g. "aliments infantiles")</ul>
    </li>
    <br>
    <li>Column "alim_ssgrp_nom_fr"
        <ul>French name of the food sub-group (e.g. "desserts infantiles")</ul>
    </li>
    <br>
    <li>Column "alim_ssssgrp_nom_fr"
        <ul>French name of the food sub-sub-group (e.g. "-")</ul>
    </li>
    <br>
    <li>Column "alim_code"
        <ul>Code of the food (e.g. "13166")</ul>
    </li>
    <br>
    <li>Column "alim_nom_fr"
        <ul>French name of the food (e.g. "Dessert lacté infantile nature...")</ul>
    </li>
    <br>
    <li>Column "alim_nom_sci"
        <ul>Scientific name of the food (e.g. "NaN")</ul>
    </li>
    <br>
    <li>Other columns
        <ul>Nutrient content (e.g. "1,5")</ul>
    </li>
</ul>

In [20]:
# Check the unique names of the groups, sub-groups and sub-sub-groups of food
alim_grp_nom_fr_list = nutrition_cleaned_df["alim_grp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD GROUPS")
print(alim_grp_nom_fr_list)
print()

alim_ssgrp_nom_fr_list = nutrition_cleaned_df["alim_ssgrp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD SUB-GROUPS")
print(alim_ssgrp_nom_fr_list)
print()

alim_ssssgrp_nom_fr_list = nutrition_cleaned_df["alim_ssssgrp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD SUB-SUB-GROUPS")
print(alim_ssssgrp_nom_fr_list)
print()

NameError: name 'nutrition_cleaned_df' is not defined

---
<h2 style = "text-align: center;"><em>Clean and Prepare Data</em></h2>

<h3 style = "text-align: center;">Cleaning and Preparation</h3>

<h4 style = "text-align: center;">Backup</h4>

In [None]:
# Save the dataframe
nutrition_cleaned_df = nutrition_df.copy()

<h4 style = "text-align: center;">Food Group Column ("alim_grp_nom_fr")</h4>

In [None]:
# Check missing names of food groups
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_grp_nom_fr"].isna()]

In [None]:
# Replace missing values by relevant names of food groups
nutrition_cleaned_df.loc[
    (nutrition_cleaned_df["alim_grp_nom_fr"].isna()) & (nutrition_cleaned_df["alim_grp_code"] == 3),
    "alim_grp_nom_fr"
] = "farines, amidons et bases de pâtes"

nutrition_cleaned_df.loc[
    (nutrition_cleaned_df["alim_grp_nom_fr"] == "farines, amidons et bases de pâtes") & (nutrition_cleaned_df["alim_nom_fr"].str.startswith("Farine")),
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "farines"

nutrition_cleaned_df.loc[
    (nutrition_cleaned_df["alim_grp_nom_fr"] == "farines, amidons et bases de pâtes") & (nutrition_cleaned_df["alim_nom_fr"].str.startswith("Amidon")),
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "amidons"

nutrition_cleaned_df.loc[
    [1097, 1098],
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "amidons"

nutrition_cleaned_df.loc[
    (nutrition_cleaned_df["alim_grp_nom_fr"] == "farines, amidons et bases de pâtes") & (nutrition_cleaned_df["alim_nom_fr"].str.startswith("Pâte")),
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "bases de pâtes"

nutrition_cleaned_df.loc[
    (nutrition_cleaned_df["alim_grp_nom_fr"] == "farines, amidons et bases de pâtes") & (nutrition_cleaned_df["alim_nom_fr"].str.contains("brick")),
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "bases de pâtes"

# Drop irrelevant row
irrelevant_row = nutrition_cleaned_df[(nutrition_cleaned_df["alim_grp_nom_fr"].isna()) & (nutrition_cleaned_df["alim_grp_code"] == 0)].index
nutrition_cleaned_df.drop(irrelevant_row, inplace = True)

# Check the replacements of the names of food groups
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_grp_nom_fr"] == "farines, amidons et bases de pâtes"]

<h4 style = "text-align: center;">Food Sub-Group Column ("alim_ssgrp_nom_fr")</h4>

In [None]:
# Check "-" name of food sub-groups
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_ssgrp_nom_fr"] == "-"]

In [None]:
# Replace "-" values by a relevant name of food sub-group
nutrition_cleaned_df.loc[
    nutrition_cleaned_df["alim_ssgrp_nom_fr"] == "-",
    ["alim_ssgrp_nom_fr", "alim_ssssgrp_nom_fr"]
] = "glaces et sorbets"

# Check the replacement of the "-" name of the food group
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_ssgrp_nom_fr"] == "glaces et sorbets"]

<h4 style = "text-align: center;">Food Sub-Sub-Group Column ("alim_ssssgrp_nom_fr")</h4>

In [None]:
# Check "-" name of food sub-sub-groups
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_ssssgrp_nom_fr"] == "-"]

In [None]:
# Replace "-" values by a relevant name of food sub-group
nutrition_cleaned_df.loc[
    nutrition_cleaned_df["alim_ssssgrp_nom_fr"] == "-",
    "alim_ssssgrp_nom_fr"
] = "sous-sous-catégories non définie"

# Check the replacement of the name of the food groups
nutrition_cleaned_df.loc[nutrition_cleaned_df["alim_ssssgrp_nom_fr"] == "sous-sous-catégories non définie"]

<h4 style = "text-align: center;">Food Name Column ("alim_nom_fr")</h4>

In [None]:
# Check for duplicated food french names
duplicated_food_french_names_df = nutrition_cleaned_df[nutrition_cleaned_df.duplicated(subset = "alim_nom_fr", keep = False)]
pd.set_option("display.max_columns", None)
duplicated_food_french_names_df

In [None]:
# Fill missing data
nutrition_cleaned_df.iloc[3120, nutrition_cleaned_df.columns.get_loc("Energie, Règlement UE N° 1169/2011 (kJ/100 g)")] = 1160
nutrition_cleaned_df.iloc[[3118, 3119, 3120, 3122], :]

In [None]:
# Drop the duplicated row
pd.reset_option("display.max_columns")
nutrition_cleaned_df.drop(index = 3120, inplace = True)
nutrition_cleaned_df.reset_index(drop = True, inplace = True)
nutrition_cleaned_df.iloc[[3118, 3119, 3120], :]

<h4 style = "text-align: center;">Food Scientific Name Column ("alim_nom_sci")</h4>

In [None]:
# Drop the column of scientific names
nutrition_cleaned_df.drop(columns = ["alim_nom_sci"], inplace = True)

<h4 style = "text-align: center;">Nutrients Columns</h4>

In [None]:
# Store the columns containing nutritional values in a list
nutritional_columns_list = [
    "Energie, Règlement UE N° 1169/2011 (kJ/100 g)",
    "Energie, Règlement UE N° 1169/2011 (kcal/100 g)",
    "Energie, N x facteur Jones, avec fibres  (kJ/100 g)",
    "Energie, N x facteur Jones, avec fibres  (kcal/100 g)",
    "Eau (g/100 g)",
    "Protéines, N x facteur de Jones (g/100 g)",
    "Protéines, N x 6.25 (g/100 g)",
    "Glucides (g/100 g)",
    "Lipides (g/100 g)",
    "Sucres (g/100 g)",
    "Fructose (g/100 g)",
    "Galactose (g/100 g)",
    "Glucose (g/100 g)",
    "Lactose (g/100 g)",
    "Maltose (g/100 g)",
    "Saccharose (g/100 g)",
    "Amidon (g/100 g)",
    "Fibres alimentaires (g/100 g)",
    "Polyols totaux (g/100 g)",
    "Cendres (g/100 g)",
    "Alcool (g/100 g)",
    "Acides organiques (g/100 g)",
    "AG saturés (g/100 g)",
    "AG monoinsaturés (g/100 g)",
    "AG polyinsaturés (g/100 g)",
    "AG 4:0, butyrique (g/100 g)",
    "AG 6:0, caproïque (g/100 g)",
    "AG 8:0, caprylique (g/100 g)",
    "AG 10:0, caprique (g/100 g)",
    "AG 12:0, laurique (g/100 g)",
    "AG 14:0, myristique (g/100 g)",
    "AG 16:0, palmitique (g/100 g)",
    "AG 18:0, stéarique (g/100 g)",
    "AG 18:1 9c (n-9), oléique (g/100 g)",
    "AG 18:2 9c,12c (n-6), linoléique (g/100 g)",
    "AG 18:3 c9,c12,c15 (n-3), alpha-linolénique (g/100 g)",
    "AG 20:4 5c,8c,11c,14c (n-6), arachidonique (g/100 g)",
    "AG 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100 g)",
    "AG 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100 g)",
    "Cholestérol (mg/100 g)",
    "Sel chlorure de sodium (g/100 g)",
    "Calcium (mg/100 g)",
    "Chlorure (mg/100 g)",
    "Cuivre (mg/100 g)",
    "Fer (mg/100 g)",
    "Iode (µg/100 g)",
    "Magnésium (mg/100 g)",
    "Manganèse (mg/100 g)",
    "Phosphore (mg/100 g)",
    "Potassium (mg/100 g)",
    "Sélénium (µg/100 g)",
    "Sodium (mg/100 g)",
    "Zinc (mg/100 g)",
    "Rétinol (µg/100 g)",
    "Beta-Carotène (µg/100 g)",
    "Vitamine D (µg/100 g)",
    "Vitamine E (mg/100 g)",
    "Vitamine K1 (µg/100 g)",
    "Vitamine K2 (µg/100 g)",
    "Vitamine C (mg/100 g)",
    "Vitamine B1 ou Thiamine (mg/100 g)",
    "Vitamine B2 ou Riboflavine (mg/100 g)",
    "Vitamine B3 ou PP ou Niacine (mg/100 g)",
    "Vitamine B5 ou Acide pantothénique (mg/100 g)",
    "Vitamine B6 (mg/100 g)",
    "Vitamine B9 ou Folates totaux (µg/100 g)",
    "Vitamine B12 (µg/100 g)"
]

# Iterate through the columns containing nutritional values to format the type to numeric 
for column_name in nutritional_columns_list:
    if column_name in nutrition_cleaned_df.columns:
        nutrition_cleaned_df[column_name] = pd.to_numeric(nutrition_cleaned_df[column_name], errors = "coerce")

# Handle missing values
nutrition_cleaned_df.replace("", pd.NA, inplace = True)

<h3 style = "text-align: center;">Check(s)</h3>

In [None]:
# Check the unique names of the groups, sub-groups and sub-sub-groups
cleaned_alim_grp_nom_fr_list = nutrition_cleaned_df["alim_grp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD GROUPS")
print(cleaned_alim_grp_nom_fr_list)
print()

cleaned_alim_ssgrp_nom_fr_list = nutrition_cleaned_df["alim_ssgrp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD SUB-GROUPS")
print(cleaned_alim_ssgrp_nom_fr_list)
print()

cleaned_alim_ssssgrp_nom_fr_list = nutrition_cleaned_df["alim_ssssgrp_nom_fr"].unique().tolist()
print("NAME OF THE FOOD SUB-SUB-GROUPS")
print(cleaned_alim_ssssgrp_nom_fr_list)
print()

In [None]:
# Check the dataframe
check_dataframe(nutrition_cleaned_df)

<h3 style = "text-align: center;">Comment(s)</h3>

<ul>
    <li>Irrelevant column
        <ul>Drop of Column "alim_nom_sci"</ul>
    </li>
    <br>
    <li>Duplicated rows
        <ul>No duplicated rows but Merge of 2 rows in Column "alim_nom_fr"</ul>
    </li>
    <br>
    <li>Missing values
        <ul>Manual fill of missing values in Column "alim_grp_nom_fr"</ul>
        <ul>Manual fill of missing values in Column "alim_ssgrp_nom_fr"</ul>
        <ul>Manual fill of missing values in Column "alim_ssssgrp_nom_fr"</ul>
    </li>
    <br>
    <li>Data formats
        <ul>No formatting</ul>
    </li>
    <br>
    <li>Data types
        <ul>Conversion of values in Nutrients Columns to numeric</ul>
    </li>
    <br>
    <li>Additional Features
        <ul>No additional features</ul>
    </li>
</ul>

<h2 style = "text-align: center;"><em>Data Wrangling</em></h2>

<h3 style = "text-align: center;">CSV Export</h3>

In [None]:
# Split the dataframe into food content categories and Prepare the columns for SQL

# Store the Macro-nutrients
macro_nutrients_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "Energie, Règlement UE N° 1169/2011 (kJ/100 g)",
    "Energie, Règlement UE N° 1169/2011 (kcal/100 g)",
    "Energie, N x facteur Jones, avec fibres  (kJ/100 g)",
    "Energie, N x facteur Jones, avec fibres  (kcal/100 g)",
    "Protéines, N x facteur de Jones (g/100 g)",
    "Glucides (g/100 g)",                                                  
    "Lipides (g/100 g)",
    "Eau (g/100 g)"
]]
macro_nutrients_df = macro_nutrients_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "Energie, Règlement UE N° 1169/2011 (kJ/100 g)": "energy_in_kJ_for_100g",
    "Energie, Règlement UE N° 1169/2011 (kcal/100 g)": "energy_in_kcal_for_100g",
    "Energie, N x facteur Jones, avec fibres  (kJ/100 g)": "energy_with_jones_and_fibers_in_kJ_for_100g",
    "Energie, N x facteur Jones, avec fibres  (kcal/100 g)": "energy_with_jones_and_fibers_in_kcal_for_100g",
    "Protéines, N x facteur de Jones (g/100 g)": "proteins_with_jones_in_g_for_100g",
    "Glucides (g/100 g)": "carbohydrates_in_g_for_100g",
    "Lipides (g/100 g)": "fats_in_g_for_100g",
    "Eau (g/100 g)": "water_in_g_for_100g"
})


# Store the Carbohydrates
carbs_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "Sucres (g/100 g)",
    "Fructose (g/100 g)",
    "Galactose (g/100 g)",
    "Glucose (g/100 g)",
    "Lactose (g/100 g)",
    "Maltose (g/100 g)",
    "Saccharose (g/100 g)",
    "Amidon (g/100 g)",
    "Fibres alimentaires (g/100 g)",
    "Polyols totaux (g/100 g)"
]]
carbs_df = carbs_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "Sucres (g/100 g)": "sugars_in_g_for_100g",
    "Fructose (g/100 g)": "fructose_in_g_for_100g",
    "Galactose (g/100 g)": "galactose_in_g_for_100g",
    "Glucose (g/100 g)": "glucose_in_g_for_100g",
    "Lactose (g/100 g)": "lactose_in_g_for_100g",
    "Maltose (g/100 g)": "maltose_in_g_for_100g",
    "Saccharose (g/100 g)": "sucrose_in_g_for_100g",
    "Amidon (g/100 g)": "starch_in_g_for_100g",
    "Fibres alimentaires (g/100 g)": "dietary_fibers_in_g_for_100g",
    "Polyols totaux (g/100 g)": "total_polyols_in_g_for_100g"
})


# Store the Fats
fats_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "AG saturés (g/100 g)",
    "AG monoinsaturés (g/100 g)",
    "AG polyinsaturés (g/100 g)",
    "AG 4:0, butyrique (g/100 g)",
    "AG 6:0, caproïque (g/100 g)",
    "AG 8:0, caprylique (g/100 g)",
    "AG 10:0, caprique (g/100 g)",
    "AG 12:0, laurique (g/100 g)",
    "AG 14:0, myristique (g/100 g)",
    "AG 16:0, palmitique (g/100 g)",
    "AG 18:0, stéarique (g/100 g)",
    "AG 18:1 9c (n-9), oléique (g/100 g)",
    "AG 18:2 9c,12c (n-6), linoléique (g/100 g)",
    "AG 18:3 c9,c12,c15 (n-3), alpha-linolénique (g/100 g)",
    "AG 20:4 5c,8c,11c,14c (n-6), arachidonique (g/100 g)",
    "AG 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100 g)",
    "AG 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100 g)",
    "Cholestérol (mg/100 g)"
]]
fats_df = fats_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "AG saturés (g/100 g)": "saturated_fats_in_g_for_100g",
    "AG monoinsaturés (g/100 g)": "monounsaturated_fats_in_g_for_100g",
    "AG polyinsaturés (g/100 g)": "polyunsaturated_fats_in_g_for_100g",
    "AG 4:0, butyrique (g/100 g)": "butyric_acid_4_0_in_g_for_100g",
    "AG 6:0, caproïque (g/100 g)": "caproic_acid_6_0_in_g_for_100g",
    "AG 8:0, caprylique (g/100 g)": "caprylic_acid_8_0_in_g_for_100g",
    "AG 10:0, caprique (g/100 g)": "capric_acid_10_0_in_g_for_100g",
    "AG 12:0, laurique (g/100 g)": "lauric_acid_12_0_in_g_for_100g",
    "AG 14:0, myristique (g/100 g)": "myristic_acid_14_0_in_g_for_100g",
    "AG 16:0, palmitique (g/100 g)": "palmitic_acid_16_0_in_g_for_100g",
    "AG 18:0, stéarique (g/100 g)": "stearic_acid_18_0_in_g_for_100g",
    "AG 18:1 9c (n-9), oléique (g/100 g)": "oleic_acid_18_1_n_9_in_g_for_100g",
    "AG 18:2 9c,12c (n-6), linoléique (g/100 g)": "linoleic_acid_18_2_n_6_in_g_for_100g",
    "AG 18:3 c9,c12,c15 (n-3), alpha-linolénique (g/100 g)": "alpha_linolenic_acid_18_3_n_3_in_g_for_100g",
    "AG 20:4 5c,8c,11c,14c (n-6), arachidonique (g/100 g)": "arachidonic_acid_20_4_n_6_in_g_for_100g",
    "AG 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100 g)": "eicosapentaenoic_acid_20_5_n_3_in_g_for_100g",
    "AG 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100 g)": "docosahexaenoic_acid_22_6_n_3_in_g_for_100g",
    "Cholestérol (mg/100 g)": "cholesterol_in_mg_for_100g"
})


# Store the Minerals
minerals_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "Sel chlorure de sodium (g/100 g)",
    "Calcium (mg/100 g)",
    "Chlorure (mg/100 g)",
    "Cuivre (mg/100 g)",
    "Fer (mg/100 g)",
    "Iode (µg/100 g)",
    "Magnésium (mg/100 g)",
    "Manganèse (mg/100 g)",
    "Phosphore (mg/100 g)",
    "Potassium (mg/100 g)",
    "Sélénium (µg/100 g)",
    "Sodium (mg/100 g)",
    "Zinc (mg/100 g)"
]]
minerals_df = minerals_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "Sel chlorure de sodium (g/100 g)": "sodium_chloride_in_g_for_100g",
    "Calcium (mg/100 g)": "calcium_in_mg_for_100g",
    "Chlorure (mg/100 g)": "chloride_in_mg_for_100g",
    "Cuivre (mg/100 g)": "copper_in_mg_for_100g",
    "Fer (mg/100 g)": "iron_in_mg_for_100g",
    "Iode (µg/100 g)": "iodine_in_µg_for_100g",
    "Magnésium (mg/100 g)": "magnesium_in_mg_for_100g",
    "Manganèse (mg/100 g)": "manganese_in_mg_for_100g",
    "Phosphore (mg/100 g)": "phosphorus_in_mg_for_100g",
    "Potassium (mg/100 g)": "potassium_in_mg_for_100g",
    "Sélénium (µg/100 g)": "selenium_in_µg_for_100g",
    "Sodium (mg/100 g)": "sodium_in_mg_for_100g",
    "Zinc (mg/100 g)": "zinc_in_mg_for_100g"
})


# Store the Vitamins
vitamins_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "Rétinol (µg/100 g)",
    "Beta-Carotène (µg/100 g)",
    "Vitamine D (µg/100 g)",
    "Vitamine E (mg/100 g)",
    "Vitamine K1 (µg/100 g)",
    "Vitamine K2 (µg/100 g)",
    "Vitamine C (mg/100 g)",
    "Vitamine B1 ou Thiamine (mg/100 g)",
    "Vitamine B2 ou Riboflavine (mg/100 g)",
    "Vitamine B3 ou PP ou Niacine (mg/100 g)",
    "Vitamine B5 ou Acide pantothénique (mg/100 g)",
    "Vitamine B6 (mg/100 g)",
    "Vitamine B9 ou Folates totaux (µg/100 g)",
    "Vitamine B12 (µg/100 g)"
]]
vitamins_df = vitamins_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "Rétinol (µg/100 g)": "retinol_in_µg_for_100g",
    "Beta-Carotène (µg/100 g)": "beta_carotene_in_µg_for_100g",
    "Vitamine D (µg/100 g)": "vitamin_d_in_µg_for_100g",
    "Vitamine E (mg/100 g)": "vitamin_e_in_mg_for_100g",
    "Vitamine K1 (µg/100 g)": "vitamin_k1_in_µg_for_100g",
    "Vitamine K2 (µg/100 g)": "vitamin_k2_in_µg_for_100g",
    "Vitamine C (mg/100 g)": "vitamin_c_in_mg_for_100g",
    "Vitamine B1 ou Thiamine (mg/100 g)": "vitamin_b1_thiamine_in_mg_for_100g",
    "Vitamine B2 ou Riboflavine (mg/100 g)": "vitamin_b2_riboflavin_in_mg_for_100g",
    "Vitamine B3 ou PP ou Niacine (mg/100 g)": "vitamin_b3_niacin_in_mg_for_100g",
    "Vitamine B5 ou Acide pantothénique (mg/100 g)": "vitamin_b5_pantothenic_acid_in_mg_for_100g",
    "Vitamine B6 (mg/100 g)": "vitamin_b6_in_mg_for_100g",
    "Vitamine B9 ou Folates totaux (µg/100 g)": "vitamin_b9_total_folate_in_µg_for_100g",
    "Vitamine B12 (µg/100 g)": "vitamin_b12_in_µg_for_100g"
})


# Store the Additional contents
additional_nutrition_data_df = nutrition_cleaned_df[[
    "alim_nom_fr",
    "Cendres (g/100 g)",
    "Alcool (g/100 g)",
    "Acides organiques (g/100 g)"
]]
additional_nutrition_data_df = additional_nutrition_data_df.rename(columns = {
    "alim_nom_fr": "food_name_in_french",
    "Cendres (g/100 g)": "ashes_in_g_for_100g",
    "Alcool (g/100 g)": "alcohol_in_g_for_100g",
    "Acides organiques (g/100 g)": "organic_acids_in_g_for_100g"
})

In [None]:
# Convert dataframes to CSV files
dataframes_dict = {
    "macro_nutrients_df": macro_nutrients_df,
    "carbs_df": carbs_df,
    "fats_df": fats_df,
    "minerals_df": minerals_df,
    "vitamins_df": vitamins_df,
    "additional_nutrition_data_df": additional_nutrition_data_df
}
csv_filenames = {
    "macro_nutrients_df": "macro_nutrients_csv.csv",
    "carbs_df": "carbohydrates_csv.csv",
    "fats_df": "fats_csv.csv",
    "minerals_df": "minerals_csv.csv",
    "vitamins_df": "vitamins_csv.csv",
    "additional_nutrition_data_df": "additional_nutrition_data_csv.csv"
}

for key, dataframe in dataframes_dict.items():
    csv_filename = csv_filenames[key]
    dataframe_to_file(dataframe, csv_filename, "csv")