In [167]:
import requests, re
from bs4 import BeautifulSoup as bs
import pandas as pd
import altair as alt

In [73]:
def scrape_page(www):

    r = requests.get(www).text
    soup = bs(r, "lxml")
    
    page = {}

    page["title"] = soup.h1.text.strip()
    page["summary"] = soup.select(".field-type-text-with-summary")[0].text.strip()
    
    budgets = soup.find("div", text=re.compile("Budget\(s\).*")).parent.findAll("div", class_ = "content")

    countries = []
    support_split = []
    overal_support = 0

    for row in budgets:
        country = row.select(".field-name-field-country-beneficiary li")[0].text
        eu_support = row.select(".field-name-field-eu-cofinance .field-items")[0].text
        eu_support = eu_support.replace(" ","").replace(",",".").replace("€","")

        overal_support += float(eu_support)
        support_split.append(eu_support)
        countries.append(country)

    page["countries"] = ";".join(countries)
    page["support"] =";".join(support_split)
    page["overal_support"] = overal_support
    
    page["link"] = www
        
    return page

In [61]:
links = pd.read_csv("links.csv")["link"].to_list()

data = []

for link in links[5:]:
    data.append(scrape_page(link))

In [63]:
#pd.DataFrame(data).to_csv("data.csv")
data = pd.read_csv("data.csv")

In [152]:
data.title = data.title.str.title()
data["overal_support_str"] = data['overal_support'].apply(lambda x : f"{'{:.2f}'.format(x/1000000)} mio eur")
                                                          
category = {
    "European Veal Meat" : "beef",
    "Pork Lovers Europe" : "pork",
    "Emf Milk" : "dairy",
    "Proud Of Eu Beef" : "beef",
    "Letstalkaboutpork" : "pork",
    "Love Pork" : "pork",
    "Assolattemi" : "dairy",
    "Eu Lamb Campaign" : "lamb",
    "Carne De Conejo" : "rabbit",
    "Carnes Con Estilo" : "beef/lamb"
}

data["category"] = data["title"].apply(lambda x: category[x])
                                                          
data.head()

Unnamed: 0,title,summary,countries,support,overal_support,link,overal_support_str,category
0,Love Pork,A European movement for more taste\nThe action...,Denmark;Sweden,2451165.76;97254.40,2548420.16,https://ec.europa.eu/chafea/agri/en/campaigns/...,2.55 mio eur,pork
1,Assolattemi,"The program “The shapes of milk, discover the ...",Italy,700606.9,700606.9,https://ec.europa.eu/chafea/agri/en/campaigns/...,0.70 mio eur,dairy
2,Eu Lamb Campaign,Over 80% of sheep production in the EU is prod...,France;Ireland;United Kingdom,3242850.98;1187071.56;3794138.25,8224060.79,https://ec.europa.eu/chafea/agri/en/campaigns/...,8.22 mio eur,lamb
3,Carne De Conejo,"INTERCUN and ASPOC, two state-recognised organ...",Spain;Portugal,3021432.58;834454.69,3855887.27,https://ec.europa.eu/chafea/agri/en/campaigns/...,3.86 mio eur,rabbit
4,Carnes Con Estilo,The Ternera Gallega and Ternasco de Aragón PGI...,Spain,1022809.9,1022809.9,https://ec.europa.eu/chafea/agri/en/campaigns/...,1.02 mio eur,beef/lamb
5,European Veal Meat,Promotional campaign for European veal meat in...,France;Italy;Netherlands;Belgium,2957489.00;836998.00;1156247.00;1044741.00,5995475.0,https://ec.europa.eu/chafea/agri/en/campaigns/...,6.00 mio eur,beef
6,Pork Lovers Europe,"""The consumption of pork meat in Europe has de...",Spain,1366348.2,1366348.2,https://ec.europa.eu/chafea/agri/campaigns/por...,1.37 mio eur,pork
7,Emf Milk,A European multi-programme on milk targeting t...,France;France;Denmark;Netherlands;Ireland;Belgium,82272.00;9647616.00;35500.80;72288.00;14400.00...,9900000.0,https://ec.europa.eu/chafea/agri/en/campaigns/...,9.90 mio eur,dairy
8,Proud Of Eu Beef,The objective of Proud of European Beef is to ...,Spain;Belgium,2406032.78;1199237.94,3605270.72,https://ec.europa.eu/chafea/agri/en/campaigns/...,3.61 mio eur,beef
9,Letstalkaboutpork,Pork is the most consumed meat in all Europe. ...,Spain;France;Portugal;Portugal,2451428.64;2428053.20;587983.72,5467465.56,https://ec.europa.eu/chafea/agri/en/campaigns/...,5.47 mio eur,pork


In [166]:
bars = alt.Chart(data).mark_bar().encode(
        x = alt.X("overal_support",
                 title = "EU campaign support in eur"),
        y = alt.Y("title:N",
                  title = "Campaign name",
                sort=alt.EncodingSortField(
                    field="overal_support",  # The field to use for the sort
                    op="sum",  # The operation to run on the field prior to sorting
                    order="descending"  # The order to sort in
                )),
        #color = "category:N"
    )

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text=alt.Text('overal_support_str')
)

cummul = alt.Chart(data).mark_bar().encode(
    x = alt.X("sum(overal_support):Q",
                title = "EU campaign support in eur"),
    y = alt.Y("category:N",
        sort=alt.EncodingSortField(
                    field="overal_support",  # The field to use for the sort
                    op="sum",  # The operation to run on the field prior to sorting
                    order="descending"  # The order to sort in
                )),
    color = "category:N"
    ).properties(
    height = 300
)

text_cumm = cummul.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text=alt.Text('sum(overal_support):Q', format=',')
)

    
bars + text | cummul + text_cumm