<a href="https://colab.research.google.com/github/umiSirya/General-Data-analysis/blob/main/foodconsumption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
url ='https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv'
df = pd.read_csv(url)
df

Unnamed: 0,country,food_category,consumption,co2_emmission
0,Argentina,Pork,10.51,37.20
1,Argentina,Poultry,38.66,41.53
2,Argentina,Beef,55.48,1712.00
3,Argentina,Lamb & Goat,1.56,54.63
4,Argentina,Fish,4.36,6.96
...,...,...,...,...
1425,Bangladesh,Milk - inc. cheese,21.91,31.21
1426,Bangladesh,Wheat and Wheat Products,17.47,3.33
1427,Bangladesh,Rice,171.73,219.76
1428,Bangladesh,Soybeans,0.61,0.27


In [None]:
df.isnull().sum()

Unnamed: 0,0
country,0
food_category,0
consumption,0
co2_emmission,0


In [None]:
#Checks the data to find out if there are any duplicates
df.duplicated().sum()

0

 **The average C02 emission per person by food category among five countries (Kenya, Uganda, Tanzania, Rwanda and Ethiopia) located in East Africa.**

In [None]:

import plotly.express as px



# Step 4: Filter the dataset for the East African countries of interest
east_african_countries = ["Kenya", "Uganda", "Tanzania", "Rwanda", "Ethiopia"]
df_east_africa = df[df['country'].isin(east_african_countries)]

# Step 5: Calculate the average CO₂ emissions per person by food category
average_emissions = df_east_africa.groupby('food_category')['co2_emmission'].mean().reset_index()

# Step 6: Sort the categories by average CO₂ emissions in descending order
average_emissions = average_emissions.sort_values(by='co2_emmission', ascending=False)

# Step 7: Plotting with Plotly (each food category will have a unique color)
fig = px.bar(
    average_emissions,
    x='co2_emmission',
    y='food_category',
    color='food_category',
    title="Average CO₂ Emissions per Person by Food Category in East Africa (Kenya, Uganda, Tanzania, Rwanda, Ethiopia)",
    labels={'co2_emmission': 'Average CO₂ Emissions (kg/person/year)', 'food_category': 'Food Category'}
)

# Step 8: Customize layout for better readability
fig.update_layout(
    yaxis_title="",
    xaxis_title="CO₂ Emissions (kg/person/year)",
    plot_bgcolor="white",
    xaxis=dict(showgrid=True, gridcolor="lightgrey"),
    showlegend=False  # Hide legend if there are too many categories
)

# Show the plot
fig.show()


**Highlight the marked difference between consumption and emissions in certain food product of your choice**

In [None]:
import plotly.graph_objects as go



# Step 4: Filter the dataset for Kenya and selected food categories
selected_country = "Kenya"
selected_foods = ["Beef", "Lamb & Goat", "Rice", "Fish", "Pork"]
df_kenya = df[(df['country'] == selected_country) & (df['food_category'].isin(selected_foods))]

# Step 5: Calculate the average consumption and CO₂ emissions for the selected food categories in Kenya
average_metrics = df_kenya.groupby('food_category')[['consumption', 'co2_emmission']].mean().reset_index()

# Step 6: Sort the data by CO₂ emissions in descending order for plotting
average_metrics = average_metrics.sort_values(by='co2_emmission', ascending=False)

# Step 7: Plotting both metrics with Plotly, highlighting each selected food category
fig = go.Figure()

# Add bar for CO₂ emissions
fig.add_trace(go.Bar(
    x=average_metrics['food_category'],
    y=average_metrics['co2_emmission'],
    name='CO₂ Emissions (kg/person/year)',
    marker_color='indianred'
))

# Add bar for consumption
fig.add_trace(go.Bar(
    x=average_metrics['food_category'],
    y=average_metrics['consumption'],
    name='Consumption (kg/person/year)',
    marker_color='lightseagreen'
))

# Step 8: Add annotations for each selected food category to highlight the difference between consumption and emissions
for food in selected_foods:
    highlight_data = average_metrics[average_metrics['food_category'] == food]
    if not highlight_data.empty:
        fig.add_annotation(
            x=food,
            y=highlight_data['co2_emmission'].values[0],
            text=f"{food}: Higher emissions vs. consumption",
            showarrow=True,
            arrowhead=2,
            ax=0,
            ay=-40,
            font=dict(color="red"),
            bgcolor="lightyellow"
        )

# Step 9: Customize layout
fig.update_layout(
    title="Comparison of Average Consumption and CO₂ Emissions for Selected Foods in Kenya",
    xaxis_title="Food Category",
    yaxis_title="Amount (kg/person/year)",
    barmode='group',
    plot_bgcolor="white"
)

# Show the plot
fig.show()


**The food consumption of animal and non-animal products and the impact on carbon footprint**

In [None]:

# Step 4: Define lists for animal-based and plant-based foods
animal_based = ["Beef", "Pork", "Poultry", "Fish", "Lamb & Goat", "Milk (incl. cheese)", "Eggs"]
plant_based = ["Rice", "Wheat and Wheat Products", "Nuts (incl. peanut butter)", "Soybeans"]

# Step 5: Add a new column to categorize each food item as 'Animal' or 'Plant'
df['food_type'] = df['food_category'].apply(lambda x: 'Animal' if x in animal_based else 'Plant')

# Step 6: Filter for Kenya (or other specific country if needed)
df_kenya = df[df['country'] == 'Kenya']

# Step 7: Aggregate data by food type to get total consumption and emissions for each category
summary = df_kenya.groupby('food_type')[['consumption', 'co2_emmission']].sum().reset_index()

# Step 8: Plotting total consumption and CO₂ emissions by food type
fig = go.Figure()

# Add bar for CO₂ emissions
fig.add_trace(go.Bar(
    x=summary['food_type'],
    y=summary['co2_emmission'],
    name='Total CO₂ Emissions (kg/person/year)',
    marker_color='indianred'
))

# Add bar for consumption
fig.add_trace(go.Bar(
    x=summary['food_type'],
    y=summary['consumption'],
    name='Total Consumption (kg/person/year)',
    marker_color='lightseagreen'
))

# Step 9: Customize layout
fig.update_layout(
    title="Total Consumption and CO₂ Emissions of Animal vs. Plant-Based Foods in Kenya",
    xaxis_title="Food Type",
    yaxis_title="Amount (kg/person/year)",
    barmode='group',
    plot_bgcolor="white"
)

# Show the plot
fig.show()


***Beef's contribution to CO2 emissions (kg/person/year) ***

In [None]:

# Step 4: Filter the data for the food category 'Beef'
df_beef = df[df['food_category'] == 'Beef']

# Step 5: Calculate the average CO₂ emissions for Beef across all countries
average_beef_co2_emissions = df_beef['co2_emmission'].mean() if not df_beef.empty else 0

# Step 6: Display the CO₂ emissions for Beef
print(f"Average CO₂ emissions from Beef across all countries: {average_beef_co2_emissions:.2f} kg/person/year")

# Step 7: Optional - Create a simple bar chart to visualize Beef's average CO₂ emissions across all countries
fig = go.Figure(go.Bar(
    x=['Beef'],
    y=[average_beef_co2_emissions],
    marker_color='indianred',
    name='Average CO₂ Emissions (kg/person/year)'
))

# Customize layout
fig.update_layout(
    title="Average CO₂ Emissions from Beef Across All Countries",
    xaxis_title="Food Category",
    yaxis_title="CO₂ Emissions (kg/person/year)",
    plot_bgcolor="white"
)

# Show the plot
fig.show()


Average CO₂ emissions from Beef across all countries: 374.10 kg/person/year


***A box plot and interpretation of the CO2 contributions of food levels. ***

In [None]:
# Create the box plot
fig = px.box(
    df,
    x="food_category",
    y="co2_emmission",
    title="CO₂ Emissions by Food Category",
    labels={"co2_emmission": "CO₂ Emissions (kg CO₂eq)", "food_category": "Food Category"},
    color="food_category",
    points="outliers",  # Show only outliers on the boxplots
)

# Display the plot
fig.show()

**Top 10 Countries with highest emissions.**

In [None]:
 #Group by country and sum CO₂ emissions
country_emissions = df.groupby("country")["co2_emmission"].sum().reset_index()

# Sort countries by total emissions in descending order
top_countries = country_emissions.sort_values(by="co2_emmission", ascending=False).head(10)

# Create the bar chart
fig = px.bar(
    top_countries,
    x="country",
    y="co2_emmission",
    title="Top 10 Countries with Highest CO₂ Emissions",
    labels={"country": "Country", "co2_emmission": "Total CO₂ Emissions (kg CO₂eq)"},
    text="co2_emmission",
    color="country",
)

# Add text labels for emissions
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')

# Display the plot
fig.show()

# **Treemap**

In [None]:
# Group by country and sum CO₂ emissions
country_emissions = df.groupby("country")["co2_emmission"].sum().reset_index()

# Sort countries by total emissions in descending order
top_countries = country_emissions.sort_values(by="co2_emmission", ascending=False).head(10)

# Create the Treemap
fig = px.treemap(
    top_countries,
    path=["country"],  # Define the hierarchy (country in this case)
    values="co2_emmission",
    title="Top 10 Countries with Highest CO₂ Emissions",
    color="co2_emmission",
    color_continuous_scale="RdYlGn",  # Color scale from red to green
    labels={"country": "Country", "co2_emmission": "Total CO₂ Emissions (kg CO₂eq)"},
)

# Display the plot
fig.show()