In [1]:
import requests
from bs4 import BeautifulSoup
import re

tweet_re = re.compile(r'#[a-zA-z]+[0-9]+')

goals_re = re.compile(r'Goal [0-9]+')

des_separator_re = re.compile(r'<div id="subHeadline">')
htmltag_re = re.compile(r'<[^>]*>')
newline_re = re.compile(r'[\n\r]+')

# Title
def get_title():
    return soup.find(id = 'headline').get_text().strip()

# Goals
def get_goals():
    if len(r.history) and 'the ocean conference' in soup.title.text.lower():


        other_sgd_ind = home_right_raw.index('Other SDGs')

        other_goals = re.findall(goals_re,home_right_raw[other_sgd_ind:])

        goals_lst = ['Goal 14'] + other_goals

    else:
        goals_raw = soup.find(id='targets')
        goals_lst = [goal.get_text() for goal in goals_raw.findAll('strong')]
        
    return ','.join(goals_lst)

# Partners
def get_partners():
    partner_index = home_right_raw_lst.index('Partners') + 1
    next_index = home_right_raw_lst.index('Ocean Basins') \
                    if len(r.history) and 'the ocean conference' in soup.title.text.lower() \
                    else home_right_raw_lst.index('Countries')
            
    partners = [p.strip() for p in home_right_raw_lst[partner_index:next_index]]
    partners = list(filter(None, partners))
    
    return ' || '.join(partners)

# Description
def get_description():
    des_raw = soup.find(id='intro').find('div', attrs={'class':'wrap'})

    temp = str(des_raw)

    if '<div id="subHeadline">' in temp:
        temp = re.sub(des_separator_re, ' : ', temp)
        temp = re.sub(htmltag_re, '', temp)
        temp = re.sub(newline_re, '\n', temp)
        return temp.strip()
    else: 
        return des_raw.get_text().strip()
    
def get_resources():
    resources_raw = soup.find(id='resources')
    resources_lst = []

    for resource in resources_raw.findAll('div', recursive = False):
        temp = resource.get_text()

        if temp != '':
            temp = re.sub(r'\n+', ' : ', temp.strip())
#             temp = re.sub(r'[\x91\x92]', '\'', temp)
            resources_lst.append(temp.strip())
    return ' || '.join(resources_lst)


def get_timeframe():
    time_frame_index  = [i for i, item in enumerate(home_right_raw_lst) if re.search('Time-frame', item)]
    return home_right_raw_lst[time_frame_index[0]] if len(time_frame_index) == 1 else 'Time-frame: '

def get_countries():
    try:
        countries_index = home_right_raw_lst.index('Countries') + 1
        next_index = home_right_raw_lst.index('Contact information')
        countries = home_right_raw_lst[countries_index:next_index]
        return ",".join(countries)
    except:
        return ''
    
def get_hashtag():
    try:
        return list(filter(tweet_re.match, home_right_raw_lst))[0]
    except:
        return ''

In [4]:
base_url = 'https://sustainabledevelopment.un.org/partnership/?p='

functions = [get_title, get_goals, get_partners, get_description,\
             get_resources, get_timeframe, get_countries, get_hashtag]

In [5]:
ids = open('good_ids.txt').read().split()

from random import sample
sub_ids = sample(ids, 50)

In [6]:
for ide in sub_ids:
    url = base_url + ide
    
    r = requests.get(url)
    raw_data = r.text
    
    print(' ******************************************  ')
    print(url)
    
    soup = BeautifulSoup(raw_data, 'html.parser')

    home_right = soup.find('div', attrs={'class':'homeRight'})
    
    home_right_raw = str(home_right)

    home_right_raw_lst = home_right.getText().split('\n')
    home_right_raw_lst = list(filter(None, home_right_raw_lst))
    

    for f in functions:
        print('\n --------- \n')
        print(f.__name__)
        print('\n')
        print(repr(f()))
        
    

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=18241

 --------- 

get_title


'To improve the regulatory framework for decommissioning offshore oil and gas installations and pipelines'

 --------- 

get_goals


'Goal 14'

 --------- 

get_partners


'New Zealand Ministry for the Environment (Government), Ministry for Business, Innovation and Employment (Government); Environmental Protection Authority (Government); Private Sector (Private Sector)'

 --------- 

get_description


"The regulatory framework for decommissioning offshore oil and gas installations and pipelines will be improved through changes to legislation and regulations. These changes will help to ensure that the entire life cycle of New Zealand's offshore oil and gas operations are managed appropriately to ensure sustainable use of the marine environment; and that relevant obligations under the UN Convention on the Law of the Sea are met.  The means of implementation i

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=17626

 --------- 

get_title


'Marine Spatial Planning in Argentina'

 --------- 

get_goals


'Goal 14'

 --------- 

get_partners


'Ministerio de Ambiente y Desarrollo Sustentable (MAyDS); Pampa Azul Ministerio de Ciencia, Tecnologa e innovacin productiva; Forum for the Conservation of the Patagonian sea and area of influence'

 --------- 

get_description


'Marine spatial planning is a tool that facilitates the development of marine areas and the balance between the demands of development with the need to protect marine ecosystems, achieving environmental, social and economic benefits. Also it provides elements to improve government management in marine areas through integrated knowledge on Oceans sciences, biodiversity, natural resources and environmental services; a better ability of coordination between sectors, institutions and jurisdictions; anticipation of potential conflicts

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=801

 --------- 

get_title


'Business Contributions to the Promotion of a Green and Inclusive Economy'

 --------- 

get_goals


''

 --------- 

get_partners


'Rede Brasileira do Pacto Global'

 --------- 

get_description


': Description/achievement of initiative\nBrazilian Network is leading the submission of 10 Commitments by CEOs of Brazilian Companies/Organizations for the next 20 years.\n \n \n : Implementation methodologies\nBy define specific and measurable targets for Commitments. For further information, Please refer to the deliverables. \n : Arrangements for Capacity-Building and Technology Transfer \n : Coordination mechanisms/governance structure\n : Partner(s)\nRede Brasileira do Pacto Global'

 --------- 

get_resources


''

 --------- 

get_timeframe


'Time-frame:  - 2013'

 --------- 

get_countries


''

 --------- 

get_hashtag


'#SDGAction801'
 ****************

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=2206

 --------- 

get_title


'Practical Application of the Earth Charter'

 --------- 

get_goals


'Goal 8'

 --------- 

get_partners


''

 --------- 

get_description


': Description/achievement of initiative\nPreparation of development programmes, focusing on high standards of life quality and healthy environment, on the basis of the Earth Charter\nSource: Ministry of Ecology and Natural Resources of the Republic of Tatarstan\nIn April 2001, the State Council of the Republic of Tatarstan  in Russia voted unanimously to adopt the Earth Charter in the Republic. Since the adoption of the Decree of the State Council of the Republic of Tatarstan on 27 April 2001 ¹ 722 "About the Earth Charter" its principles were observed in preparation of programes of development, focusing on high standards of life quality and healthy environment.\n \n \n : Implementation methodologies\n : Arrangement

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=17262

 --------- 

get_title


'Plantar 6,000 plantas de manglar en la Isla de Roatn'

 --------- 

get_goals


'Goal 14'

 --------- 

get_partners


'Bay Islands Conservation Association (BICA) Roatn'

 --------- 

get_description


'Plantar 6,000 propgulos de manglar, que se encuentran en viveros, en diferentes zonas de la Isla de Roatan.'

 --------- 

get_resources


'Financing (in USD) : 6,200 USD'

 --------- 

get_timeframe


'Time-frame: 7/2017 - 12/2018'

 --------- 

get_countries


''

 --------- 

get_hashtag


'#OceanAction17262'
 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=21296

 --------- 

get_title


'Awareness campaigns on the marine mammals in the Pelagos Santuary'

 --------- 

get_goals


'Goal 14'

 --------- 

get_partners


'ACCOBAMS (IGO), Italian and French Municipalities (Local administrations), Souffleurs

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=443

 --------- 

get_title


"One Planet Living - a commitment to deliver China's most advanced sustainable building centres of excellence"

 --------- 

get_goals


''

 --------- 

get_partners


'China Merchant Property Development, a BioRegional One Planet partner\rBioRegional and partners are working together to implement one planet living. One planet living is a practical implementation framework based on sustainable consumption and sustainable development'

 --------- 

get_description


': Description/achievement of initiative\nBy 2016, we commit to:\n Develop Jinshan into a Low Carbon community\n Build the Zero Carbon Office Building within Jinshan\n Develop Jinshan into the Green Building Demonstration Zone Southern China Green Building Demonstration Zone\n : Implementation methodologies\nWe will build 5 platforms to facilitate the realisation of these commitments:\n ·         

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=19649

 --------- 

get_title


'Development of effective area-based conservation measures in marine areas'

 --------- 

get_goals


'Goal 14'

 --------- 

get_partners


'Brazilian Ministries of Defense; Science, Technology, Innovations and Communications; Industry, Trade and Services; Agriculture and other members of the Inter-ministerial Commission for Sea Resources - CIRM; Brazilian Navy (Government); World Bank (International Financial Institution), United Nations Development Programme (United Nations Entity), BirdLife International, SAVE Brazil, Conservation International, WWF, SOS Atlantic Forest, CONFREM - (National Commission of Peoples in Coastal-Marine Extractive Reserves and partner artisanal fishermen and grassroots associations, Brazilian Fund for Biodiversity FUNBIO (Non-governmental Organizations).'

 --------- 

get_description


'The development of effective area-based

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=278

 --------- 

get_title


'Université Catholique de Louvain'

 --------- 

get_goals


'Goal 4'

 --------- 

get_partners


'Université Catholique de Louvain || Université Catholique de Louvain'

 --------- 

get_description


": Description/achievement of initiative\n UCL is committed to continuing its management and its development by assuming its societal responsibility in terms of sustainable development: energy management, soft mobility for students and staff, sustainable installations, materials and consumables.  To this end, it is engaged in a process of strategic reflection on the subject, so as to devise a vision, an inventory of themes, actions and players, with transparent monitoring based on objective results indicators.\n \n \n : Implementation methodologies\nThe Louvain School of Management (LSM) - the UCL's Faculty of Management leads a pilot study about the integratio

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=193

 --------- 

get_title


'Baikal International Business School'

 --------- 

get_goals


'Goal 4'

 --------- 

get_partners


'Baikal International Business School || Baikal International Business School'

 --------- 

get_description


": Description/achievement of initiative\nOur students take part in the local government's projects of sustainable development. We teach some disciplines such as Knowledge Management, Sustainable Ecomomies, Ecological Quality Management, etc.All student activities are reporting in the official web cite including students' rating with annual results.\n \n \n : Implementation methodologies\nThrough continued efforts already underway.\n : Arrangements for Capacity-Building and Technology Transfer \n : Coordination mechanisms/governance structure\n : Partner(s)\nBaikal International Business School"

 --------- 

get_resources


'Staff / Technical exper

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=20244

 --------- 

get_title


'Marine Conservation Masterplan - Vizhinjam/Kovalam, India'

 --------- 

get_goals


'Goal 14,Goal 1,Goal 3,Goal 4,Goal 6,Goal 8,Goal 9,Goal 10,Goal 11,Goal 12,Goal 13,Goal 15,Goal 16,Goal 17'

 --------- 

get_partners


'Positive Change for Marine Life (PCFML) || Sebastian Social Indian Projects (SISP) || Eco Preserve || Bond Safari Kovalam || Kovalam Surf Club'

 --------- 

get_description


'As part of our Global Programs, Positive Change for Marine Life (PCFML) will be implementing our Marine Conservation Masterplan in the Kovalam and Vizhinjam regions in India. The plan will be rolled-out by our local full-time team on the ground in India, consisting of two international and one local coordinator, with the plan to train and skill-up more local people to eventually take over and run the program on their own, with resources and assistance from PCFML. 

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=371

 --------- 

get_title


'UNDP Governance support: capacity development, assessments and integrated strategies'

 --------- 

get_goals


''

 --------- 

get_partners


'UNDP'

 --------- 

get_description


': Description/achievement of initiative\nVisible progress in addressing equity and inclusion concerns in MDGs acceleration and sustainable development strategies. \n The integration of the three strands of sustainable development gets operationalized at the national and local levels through strengthening the enabling environment, governing institutions and non-state actors. \n : Implementation methodologies\nThrough policy support and development, technical advice, provision of alternative technological and investment choices, capacity development of state and non-state actors, knowledge management, vulnerability and win-win assessments, stakeholder involvement, and support to 

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=22141

 --------- 

get_title


'Sustainable Tourism Education and Demonstration'

 --------- 

get_goals


'Goal 3,Goal 6,Goal 7,Goal 9,Goal 11,Goal 12,Goal 13,Goal 14,Goal 15,Goal 16,Goal 17'

 --------- 

get_partners


'Blue Community Consortium. Thirty Members See: http://www.bluecommunity.info/topics/view/55a922bd0cf20feb892c987e/'

 --------- 

get_description


': Description/achievement of initiative\nThis initiative is committed to ongoing research, education, demonstration, and capacity building within the tourism industry to promote sustainable tourism that will:\n1.  Build on the foundation of the Global Sustainable Tourism Council criteria\n2.  Build capacity for sustainable social structures\n3.  Utilize the 12 Blue Community Strategies for research, education and demonstration http://www.bluecommunity.info/topics/view/51cbfc99f702fc2ba812ed8f/\n4.  Work towards a safe and 

 ******************************************  
https://sustainabledevelopment.un.org/partnership/?p=657

 --------- 

get_title


'P&G; 2012 Sustainability Goals'

 --------- 

get_goals


''

 --------- 

get_partners


"Our key partners for the P&G; Children's Safe Drinking Water Goals can be found here : http://www.csdw.org/csdw/global-partners.shtml"

 --------- 

get_description


": Description/achievement of initiative\nAt P&amp;G;, we're committed to delivering products and services that make everyday life better for people around the world.\n \n : Implementation methodologies\nIn 2007, P&amp;G; established a series of five-year sustainability goals, ending on June 30th, 2012.The progress of these goals can be viewed in our annual sustainability reports, available at : P&amp;G; Sustainability Report  Our sustainability goals span both products and operations as well as our social responsibility program \x93Live, Learn and Thrive\x94.P&amp;G;'s Purpose is to touch and improve liv