In [25]:
import pandas as pd
import altair as alt
from vega_datasets import data

# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/yuanma0427/BMI706-Project/main/Cardiovascular_diseases.csv')
df = df[['Region Name', 'Country Name', 'Year', 'Sex', 'Age Group', 'Number', 'Death rate per 100 000 population']]
df["Number"] = df.groupby(["Region Name", "Country Name", "Sex", "Age Group"])["Number"].fillna(method="bfill")
df["Death rate per 100 000 population"] = df.groupby(["Region Name", "Country Name", "Sex", "Age Group", "Number"])["Death rate per 100 000 population"].fillna(method="bfill")
df.dropna(inplace=True)
df.reset_index()

Unnamed: 0,index,Region Name,Country Name,Year,Sex,Age Group,Number,Death rate per 100 000 population
0,0,Europe,Albania,1987,All,[All],6388.0,11.172916
1,1,Europe,Albania,1987,All,[0],5.0,2.301761
2,2,Europe,Albania,1987,All,[1-4],39.0,12.802521
3,3,Europe,Albania,1987,All,[5-9],6.0,24.649287
4,4,Europe,Albania,1987,All,[10-14],6.0,1.336097
...,...,...,...,...,...,...,...,...
285043,310480,Asia,Saudi Arabia,2021,Female,[70-74],1274.0,6.628571
285044,310481,Asia,Saudi Arabia,2021,Female,[75-79],1656.0,14.579537
285045,310482,Asia,Saudi Arabia,2021,Female,[80-84],1559.0,31.637826
285046,310483,Asia,Saudi Arabia,2021,Female,[85+],2225.0,52.527646


In [31]:
df = df.rename(columns={"Country Name": "Country"})

In [32]:
country_df = pd.read_csv('https://raw.githubusercontent.com/hms-dbmi/bmi706-2022/main/cancer_data/country_codes.csv', dtype = {'conuntry-code': str})
country_df['country-code'] = country_df['country-code'].astype(str)
df_new = df.merge(country_df[['Country', 'country-code']], how="left", on="Country")
df_new

Unnamed: 0,Region Name,Country,Year,Sex,Age Group,Number,Death rate per 100 000 population,country-code
0,Europe,Albania,1987,All,[All],6388.0,11.172916,8
1,Europe,Albania,1987,All,[0],5.0,2.301761,8
2,Europe,Albania,1987,All,[1-4],39.0,12.802521,8
3,Europe,Albania,1987,All,[5-9],6.0,24.649287,8
4,Europe,Albania,1987,All,[10-14],6.0,1.336097,8
...,...,...,...,...,...,...,...,...
285043,Asia,Saudi Arabia,2021,Female,[70-74],1274.0,6.628571,682
285044,Asia,Saudi Arabia,2021,Female,[75-79],1656.0,14.579537,682
285045,Asia,Saudi Arabia,2021,Female,[80-84],1559.0,31.637826,682
285046,Asia,Saudi Arabia,2021,Female,[85+],2225.0,52.527646,682


In [37]:

# Aggregate data to get sums of total deaths per region, per year
aggregated_data = df_new.groupby(["Country", "Year", "country-code"]).agg(
    total_deaths=('Number', 'sum')
).reset_index()

aggregated_data

Unnamed: 0,Country,Year,country-code,total_deaths
0,Albania,1987,8,24772.0
1,Albania,1988,8,25937.0
2,Albania,1989,8,26228.0
3,Albania,1992,8,24302.0
4,Albania,1993,8,21720.0
...,...,...,...,...
4701,Venezuela (Bolivarian Republic of),2012,862,178284.0
4702,Venezuela (Bolivarian Republic of),2013,862,180820.0
4703,Venezuela (Bolivarian Republic of),2014,862,194452.0
4704,Venezuela (Bolivarian Republic of),2015,862,198988.0


In [38]:
world = alt.topo_feature(data.world_110m.url, 'countries')
year_data = aggregated_data[aggregated_data['Year'] == 2002]
year_data

Unnamed: 0,Country,Year,country-code,total_deaths
13,Albania,2002,8,32248.0
53,Antigua and Barbuda,2002,28,588.0
103,Argentina,2002,32,853.0
142,Armenia,2002,51,55815.0
213,Australia,2002,36,123944.0
...,...,...,...,...
4462,United Kingdom of Great Britain and Northern I...,2002,826,910935.0
4533,United States of America,2002,840,3567843.0
4595,Uruguay,2002,858,40184.0
4632,Uzbekistan,2002,860,236438.0


In [42]:
import altair as alt
import pandas as pd
from vega_datasets import data

width = 600
height  = 300
project = 'equirectangular'
source = alt.topo_feature(data.world_110m.url, 'countries')
# a gray map using as the visualization background
background = alt.Chart(source
).mark_geoshape(
    fill='#aaa',
    stroke='white'
).properties(
    width=width,
    height=height
).project(project)

######################
# P3.4 create a selector to link two map visualizations
selector = alt.selection_multi(
    # add your code here
    fields = ['id'],
    on = 'click',
    )

chart_base = alt.Chart(source
    ).properties(
        width=width,
        height=height
    ).project(project
    ).add_selection(selector
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(year_data, "country-code", ["total_deaths", 'Country']),
    )

# fix the color schema so that it will not change upon user selection
death_scale = alt.Scale(domain=[year_data['total_deaths'].min(), year_data['total_deaths'].max()], scheme='oranges')
death_color = alt.Color(field="total_deaths", type="quantitative", scale=death_scale)

chart_rate = chart_base.mark_geoshape().encode(
    ######################
    # P3.1 map visualization showing the mortality rate
    # add your code here
    color = alt.condition(
        selector,
        death_color,
        alt.value('#ddd')
    ),
    ######################
    # P3.3 tooltip
    # add your code here
    tooltip = [alt.Tooltip('Country:N', title='Country'),
               alt.Tooltip('total_deaths:Q', title='Mortality')]
    ).transform_filter(
    selector
    )
background+chart_rate

  selector = alt.selection_multi(
  ).add_selection(selector


In [21]:

import pandas as pd
import altair as alt
from vega_datasets import data

# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/yuanma0427/BMI706-Project/main/Cardiovascular_diseases.csv')
df = df[['Country Name', 'Year', 'Number']]  # Using only the necessary columns

# Aggregate data to get sums of total deaths per country, per year
df2 = df.groupby(['Country Name', 'Year']).agg({
    'Number': 'sum'  # Total deaths
}).reset_index()

# Filter the data based on the selected year
df_filtered = df2[df2['Year'] == 2002]

# Load world map geometry
source = alt.topo_feature(data.world_110m.url, 'countries')

# Visualization dimensions and projection
width = 600
height = 300
project = 'equirectangular'

# Gray background map for visualization
background = alt.Chart(source).mark_geoshape(
    fill='#aaa',
    stroke='white'
).properties(
    width=width,
    height=height
).project(project)

# Selector for linking the two maps
selector = alt.selection_single(fields=['properties.name'])

# Base chart for linking data
chart_base = alt.Chart(source).properties(
    width=width,
    height=height
).project(project).add_selection(selector).transform_lookup(
    lookup="properties.name",
    from_=alt.LookupData(df_filtered, "Country Name", ["Number", 'Country Name']),
)

# Fixed color scale for total death map
death_scale = alt.Scale(domain=[df_filtered['Number'].min(), df_filtered['Number'].max()], scheme='reds')

chart_deaths = chart_base.mark_geoshape().encode(
    # Total death map
    color=alt.Color('Number:Q', scale=death_scale, title='Total Deaths'),
    tooltip=[alt.Tooltip('Country Name:N', title='Country'),
             alt.Tooltip('Number:Q', title='Total Deaths')]
).transform_filter(
    selector
)
background+chart_deaths

  selector = alt.selection_single(fields=['properties.name'])
  ).project(project).add_selection(selector).transform_lookup(


In [None]:
	import altair as alt
import pandas as pd

df = pd.DataFrame([['Action', 5, 'F'], 
                   ['Crime', 10, 'F'], 
                   ['Action', 3, 'M'], 
                   ['Crime', 9, 'M']], 
                  columns=['Genre', 'Rating', 'Gender'])

chart = alt.Chart(df).mark_bar().encode(
   x=alt.X('Genre', axis=alt.Axis(labelAngle=0)),
   xOffset='Gender',
   y=alt.Y('Rating', axis=alt.Axis(grid=False)),
   color='Gender'
).configure_view(
    stroke=None,
)

chart
