In [1]:
import glob
import os
import pandas as pd

from langdetect import detect
import nltk
#nltk.download()   # comment after first download
from nltk.tokenize import wordpunct_tokenize, MWETokenizer
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import gensim
from gensim import corpora
import string
from numbers import Number
from pprint import pprint
import logging
import operator

pd.options.display.max_rows = 30

In [2]:
keywords_chosen = '13_FSDS_Goals_Keywords_EN.csv'

data_folder = './Accounts/output_chelsea/*.csv'

CSV_COLUMNS = ['caption_cleaned', 'hashtags']

In [3]:
# create output directory
outputDir = os.path.dirname(data_folder).replace('output_chelsea', 'q3q4_output_chelsea') + '/'
if not os.path.exists(outputDir):
    os.makedirs(outputDir)

In [4]:
# set of punctuations to remove from text
exclude = set(string.punctuation)

In [5]:
stopWords = set(stopwords.words('english'))
stopWords.add('theyre')   # an informal spelling

lemma = WordNetLemmatizer()    # NLTK English lemmatizer

# detect_lang function can be use to check the percentage of non English posts
# note that missing value NaN can be detected as many different languages such english, spanish or italian
def detect_lang(text):
    try:
        lang = detect(text)
    except:
        return 'error'
    return lang

# lemmatize_keywords also clears 'nan' from input keyword list file
# lemmatization is conducted based on context, some words may not get lemmatized, 
# e.g. "local eating" does not get lemmatized to "local eat"
def lemmatize_keywords(col):
    if str(col).lower() == 'nan':
        return ''
    return '_'.join(lemma.lemmatize(word).lower() for word in col.replace('’', '\'').replace('.', '').split()) #


In [6]:
# load keywords list
pd.options.display.max_rows = 100
keywords_df = pd.read_csv(keywords_chosen, encoding='latin-1')   # "ISO-8859-1"
KEYWORDS_COLS = keywords_df.columns
lemma_keywords_df = pd.DataFrame(columns=KEYWORDS_COLS)
category_dict = {}
keywords_list = set()
for col in KEYWORDS_COLS:
    lemma_keywords_df[col] = keywords_df[col].astype(str).apply(lemmatize_keywords)
    category_dict[col] = set(lemma_keywords_df[col].tolist())
    category_dict[col].remove('')
    keywords_list = keywords_list.union(category_dict[col])
display(lemma_keywords_df)

# if there are punctuations in the keywords list, these punctuation will be kept regardless of puncturation removal step
for word in keywords_list:
    for char in word:
        if char in exclude:
            exclude.remove(char)
            
# Add all words in the given keyword list to pre-defined token dictionary
multi_word = [w.split('_') for w in keywords_list ]   #if '_' in w 
tokenizer = MWETokenizer(multi_word)

Unnamed: 0,Effective action on climate change,Modern and resilient infrastructure,Pristine lakes and rivers,Clean drinking water,Safe and healthy communities,Low-carbon government,Clean energy,Sustainably managed lands and forests,Sustainable food,Clean growth,Healthy coasts and oceans,Healthy wildlife populations,Connecting Canadians with nature
0,climate-related_hazard,infrastructure,clean_lake,water,city,energy,energy,terrestrial_ecosystem,end_hunger,clean_enironment,ocean_resource,wildlife,nature
1,natural_disaster,resilient_infrastructure,healthy,clean,settlement,low-carbon,reliable,sustainably_manage_forest,nutritious,economy,ocean,wilderness,canadian
2,climate_change,facility,river,accessable,upgrade_slum,economy,sustainable,forest,sufficient,modern_world,sea,population,connecting
3,national_policy,economic_development,fraser,affordable,transport_system,clean_tech,renewable_energy,desertification,food,clean_technology,lake,plant,benefit
4,mitigation_action,industry,mackenzie,drinking,road_safety,cost_saving,global_energy_mix,reverse_landà_,malnutrition,greenhaouse_gas,coast,animal,community
5,green_climate,national_circumstance,great_lakes,hygiene,public_transport,carbon,energy_efficiency,land,stunting,healthier_community,water,environment,mental_health
6,climate_change_mitigation,small-scale_industrial,st_lawrence,water_pollution,urbanization,energy_efficiency,cleaner_fossil-fuel,degradation,wasting,reduce_ghg,coast,ecosystem,physical
7,change-related_planning,internet,churchill,dumping,human_settlement,efficient_product,investment_in_energy,biodiversity_loss,adolescent_girl,water_pollution,sea_level,food,improve
8,human_and_institutional_capacity_on_climate,wifi,variety,untreated_wastewater,natural_heritage,vehicle,clean_energy,inland_freshwaterà_,pregnant,clean_job,marine,medicine,child
9,critical_global_problem,information_access,plant,water-use,cultural_and_natural_heritage,equipment,land-locked_country,terrestrial,lactating_woman,growth,habibtat,flood,nature-based


In [7]:
def lemmatize_text(row): 
    text = str(row['caption_original'])
    #print(text)
    text = text.replace('’', '\'')
    tokens = tokenizer.tokenize(text.split())   
    # remove stop words
    stop_free = ' '.join(w for w in tokens if w not in stopWords and len(w) > 1)
    # remove punctuation
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    # lemmatize
    lemmas = ' '.join(lemma.lemmatize(word).lower() for word in punc_free.split() if len(lemma.lemmatize(word)) > 1)
    # remove stop words that appear after lemmatization
    stop_free_2 = ' '.join(w for w in lemmas.split() if w not in stopWords and len(w) > 1)
    #print(stop_free_2)
    return stop_free_2.split()

# assign a category based the max number of keywords found in each category
def find_category(row):
    text = row['lemmatized_text']
    keywords_found = []
    counter = {}
    for col in KEYWORDS_COLS:
        counter[col] = 0
    #print(counter)
    category = 'unknown'
    for word in text:
        for col in KEYWORDS_COLS:
            if word in category_dict[col]:
                #print(word)
                #print(category_dict[col])
                keywords_found.append(word)
                counter[col] += 1
    if len(keywords_found) > 0:
        category = max(counter.items(), key=operator.itemgetter(1))[0]    
    return keywords_found, category

In [8]:
pd.options.display.max_rows = 10
# read csv files and save targt columns to dataframe
filePaths = glob.glob(data_folder)  
for filename in filePaths:
    print(filename)
    basename = os.path.basename(filename)
    outputFileName = outputDir + basename
    data_df = pd.read_csv(filename, encoding = 'utf-8')
    if data_df.shape[0] < 1:
        data_df.to_csv(outputFileName, index=None) 
        continue
    #data_df['lang'] = data_df['caption_cleaned'].astype(str).apply(detect_lang)
    #data_df = data_df.drop(['category', 'words_matched_list'], axis=1)
    data_df.fillna('')
    #wrong_lang = data_df[data_df['lang'] != 'en'].shape[0]
    try:
        data_df['lemmatized_text'] = data_df.apply(lemmatize_text, axis=1)
    except:
        print('cannot process file: ' + basename)
        continue
    data_df['FSDS_matched_keywords'], data_df['FSDS_category'] = zip(*data_df.apply(find_category, axis=1))
    output_list = data_df.columns.tolist()
    output_list.remove('lemmatized_text')
    output_df = data_df[output_list]
    output_df.to_csv(outputFileName, index=None)    
    test_df = data_df[data_df['FSDS_category'] != 'unknown']
    display(test_df[['FSDS_matched_keywords', 'FSDS_category']])

./Accounts/output_chelsea/ParksCanada_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
2,[park],Connecting Canadians with nature
5,[park],Connecting Canadians with nature
8,"[care, ecosystem, ecosystem, ecosystem, ecosys...",Healthy wildlife populations
10,"[forest, forest, park]",Clean drinking water
15,"[canadian, canadian, forest, forest]",Clean drinking water
...,...,...
752,"[canadian, canadian]",Sustainable food
755,[camping],Connecting Canadians with nature
762,"[development, development]",Modern and resilient infrastructure
764,"[safe, park]",Safe and healthy communities


./Accounts/output_chelsea/CanadianAgriculture_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[canadian, canadian, greenhouse_gas, emission]",Low-carbon government
1,"[canadian, canadian]",Sustainable food
2,[crop],Healthy wildlife populations
3,"[canadian, canadian]",Sustainable food
6,"[canadian, canadian]",Sustainable food
...,...,...
1077,"[health, health, health]",Effective action on climate change
1079,[agri-food],Sustainable food
1080,"[canadian, canadian, food, food]",Sustainable food
1081,[agriculture],Pristine lakes and rivers


./Accounts/output_chelsea/CCG_GCC_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
3,"[mental_health, mental_health]",Safe and healthy communities
4,"[canadian, canadian, water, water, oil, gas]",Clean energy
7,[marine],Healthy coasts and oceans
13,"[health, health, health]",Effective action on climate change
18,[death],Safe and healthy communities
...,...,...
3070,"[pollution, pollution, pollution, pollution]",Pristine lakes and rivers
3074,"[safe, marine]",Safe and healthy communities
3076,[coast],Healthy coasts and oceans
3077,"[conservation, conservation, protection]",Sustainably managed lands and forests


./Accounts/output_chelsea/FisheriesOceansCanada_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[fish, food, food, fishery, fishery, food, foo...",Sustainable food
1,"[water, water, specie, specie, risk, water, wa...",Healthy coasts and oceans
2,"[fishery, fishery, canadian, canadian, water, ...",Healthy coasts and oceans
3,"[ocean, national_marine, conservation, conserv...",Healthy coasts and oceans
4,"[risk, death, specie, specie, fishery, fishery...",Healthy coasts and oceans
...,...,...
336,[fish],Healthy coasts and oceans
337,"[ocean, ecosystem, ecosystem, ecosystem, ecosy...",Healthy coasts and oceans
339,[research],Modern and resilient infrastructure
340,"[water, water, water, water, water, water]",Clean drinking water


./Accounts/output_chelsea/CanadianCoastGuard_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[canadian, canadian, coast, canadian, canadian...",Sustainable food
1,[growth],Clean growth
3,[safe],Safe and healthy communities
4,"[improve, canadian, canadian, coast]",Connecting Canadians with nature
5,[safe],Safe and healthy communities
...,...,...
248,"[river, river, water, water, city, water, wate...",Healthy coasts and oceans
249,"[water, water]",Clean drinking water
253,"[safe, water, water]",Clean drinking water
258,"[operation, community, community, coast, canad...",Healthy coasts and oceans


./Accounts/output_chelsea/YourMoneyMattersCanada_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,"[mountain, mountain]",Clean drinking water
4,[cost],Safe and healthy communities
5,[risk],Healthy coasts and oceans
6,"[canadian, canadian]",Sustainable food
10,[park],Connecting Canadians with nature
...,...,...
977,"[community, community]",Safe and healthy communities
991,"[canadian, canadian, variety]",Pristine lakes and rivers
992,[cost],Safe and healthy communities
993,"[benefit, cost]",Safe and healthy communities


./Accounts/output_chelsea/EnvironmentandClimateChange_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[flooding, environment, environment]",Sustainable food
1,"[environment, environment]",Healthy wildlife populations
2,"[clean, growth, carbon, economy, economy]",Low-carbon government
3,"[environment, environment]",Healthy wildlife populations
4,"[energy, energy, energy, quality]",Clean drinking water
...,...,...
152,"[clean_technology, clean_technology, clean_tec...",Modern and resilient infrastructure
153,"[coal-fired, electricity, electricity, carbon,...",Low-carbon government
154,"[gas, clean, energy, energy, energy]",Clean energy
155,"[pollution, pollution, pollution, pollution, m...",Pristine lakes and rivers


./Accounts/output_chelsea/parks.canada_posts.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,[park],Connecting Canadians with nature
3,"[park, animal, animal, animal, wildlife, wildl...",Healthy wildlife populations
4,[park],Connecting Canadians with nature
5,"[park, coast, forest, forest, lake, lake]",Clean drinking water
6,"[park, reserve, reserve, reserve, wilderness, ...",Healthy wildlife populations
...,...,...
399,"[park, reserve, reserve, reserve]",Connecting Canadians with nature
400,"[park, reserve, reserve, reserve]",Connecting Canadians with nature
404,[park],Connecting Canadians with nature
408,"[care, specie, specie, risk]",Healthy coasts and oceans


./Accounts/output_chelsea/TransportandInfrastructureinCanada_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[water, water, vehicle]",Clean drinking water
2,"[diesel, water, water]",Clean drinking water
8,[vehicle],Low-carbon government
12,[panel],Clean energy
14,"[clean, child, child, food, food]",Clean drinking water
...,...,...
1311,"[infrastructure, community, community]",Modern and resilient infrastructure
1317,[infrastructure],Modern and resilient infrastructure
1320,[infrastructure],Modern and resilient infrastructure
1322,"[pollution, pollution, pollution, pollution, g...",Safe and healthy communities


./Accounts/output_chelsea/environmentca_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
2,[sea],Healthy coasts and oceans
3,"[specie, specie]",Healthy coasts and oceans
4,"[child, child]",Safe and healthy communities
6,"[water, water, food, food, energy, energy, ene...",Clean drinking water
13,"[wildlife, wildlife]",Sustainably managed lands and forests
...,...,...
2935,"[energy, energy, energy, energy, energy, energy]",Low-carbon government
2936,[commitment],Effective action on climate change
2937,"[well-being, well-being, environment, environm...",Healthy wildlife populations
2939,"[energy, energy, energy]",Low-carbon government


./Accounts/output_chelsea/NRCan_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,"[canadian, canadian]",Sustainable food
3,[benefit],Connecting Canadians with nature
6,"[coast, coast, coast, community, community]",Healthy coasts and oceans
7,"[injury, industry]",Modern and resilient infrastructure
8,"[market, canadian, canadian]",Sustainable food
...,...,...
2977,"[development, development]",Modern and resilient infrastructure
2978,[sea],Healthy coasts and oceans
2980,"[electricity, electricity]",Safe and healthy communities
2981,[settlement],Safe and healthy communities


./Accounts/output_chelsea/ENERGYSTAR_CAN_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,"[energy, energy, energy]",Low-carbon government
2,[ocean],Healthy coasts and oceans
3,"[energy, energy, energy]",Low-carbon government
4,"[energy, energy, energy]",Low-carbon government
8,"[energy, energy, energy]",Low-carbon government
...,...,...
1786,"[energy, energy, energy, energy, energy, energy]",Low-carbon government
1787,"[energy, energy, energy, energy, energy, energy]",Low-carbon government
1789,"[energy, energy, energy]",Low-carbon government
1790,"[canadian, canadian]",Sustainable food


./Accounts/output_chelsea/DFO_Gulf_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[feed],Sustainable food
1,"[water, water, water, water]",Clean drinking water
2,[fish],Healthy coasts and oceans
5,"[economy, economy]",Low-carbon government
6,[ocean],Healthy coasts and oceans
...,...,...
532,"[fishery, fishery]",Sustainable food
533,[sea],Healthy coasts and oceans
536,"[fish, specie, specie]",Healthy coasts and oceans
537,"[fishery, fishery, water, water]",Healthy coasts and oceans


./Accounts/output_chelsea/ec_minister_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
3,[feed],Sustainable food
4,[sea],Healthy coasts and oceans
5,"[child, child]",Safe and healthy communities
7,"[water, water, food, food, energy, energy, ene...",Clean drinking water
8,[ocean],Healthy coasts and oceans
...,...,...
3090,"[child, child, national_park, national_park]",Connecting Canadians with nature
3094,"[environment, environment, economy, economy]",Low-carbon government
3095,"[climate_change, community, community, ice]",Safe and healthy communities
3096,"[energy, energy, energy]",Low-carbon government


./Accounts/output_chelsea/DFO_Central_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[feed],Sustainable food
6,[coast],Healthy coasts and oceans
7,[improve],Connecting Canadians with nature
8,"[national_marine, conservation, conservation]",Connecting Canadians with nature
9,[fish],Healthy coasts and oceans
...,...,...
1529,"[water, water]",Clean drinking water
1533,"[pollution, pollution, pollution, pollution, o...",Healthy coasts and oceans
1539,[operation],Low-carbon government
1543,"[fishery, fishery]",Sustainable food


./Accounts/output_chelsea/DFO_Pacific_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,[feed],Sustainable food
4,"[health, health, health]",Effective action on climate change
7,[improve],Connecting Canadians with nature
8,[fish],Healthy coasts and oceans
9,"[community, community, safe]",Safe and healthy communities
...,...,...
1499,[shellfish],Healthy coasts and oceans
1500,"[conservation, conservation]",Sustainably managed lands and forests
1502,"[conservation, conservation]",Sustainably managed lands and forests
1503,[coast],Healthy coasts and oceans


./Accounts/output_chelsea/DFO_NL_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[feed],Sustainable food
1,[improve],Connecting Canadians with nature
2,[fish],Healthy coasts and oceans
3,[ocean],Healthy coasts and oceans
4,"[protection, marine]",Healthy coasts and oceans
...,...,...
2878,"[water, water]",Clean drinking water
2881,"[sustainable, sustainable, marine]",Healthy coasts and oceans
2882,[operation],Low-carbon government
2883,[operation],Low-carbon government


./Accounts/output_chelsea/DFO_CCG_Quebec_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[death],Safe and healthy communities
2,"[marine, coast, coast, coast]",Healthy coasts and oceans
3,"[canadian, canadian, coast]",Sustainable food
4,"[economy, economy]",Low-carbon government
5,[ocean],Healthy coasts and oceans
...,...,...
905,"[development, development, marine, ecosystem, ...",Healthy coasts and oceans
907,[ice],Safe and healthy communities
909,"[food, food]",Sustainable food
910,"[invasive, invasive, specie, specie, marine, e...",Healthy coasts and oceans


./Accounts/output_chelsea/DFO_Science_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[feed],Sustainable food
1,[fish],Healthy coasts and oceans
3,"[health, health, health]",Effective action on climate change
8,"[water, water, safe, healthy, healthy]",Pristine lakes and rivers
9,"[community, community, safe]",Safe and healthy communities
...,...,...
1527,[climate_change],Effective action on climate change
1528,[sea],Healthy coasts and oceans
1529,"[fishery, fishery, research, freshwater, fresh...",Sustainable food
1531,"[water, water]",Clean drinking water


./Accounts/output_chelsea/Transport_gc_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,"[canadian, canadian]",Sustainable food
2,"[canadian, canadian, canadian, canadian]",Sustainable food
4,"[canadian, canadian, canadian, canadian]",Sustainable food
7,[resilience],Safe and healthy communities
8,[improve],Connecting Canadians with nature
...,...,...
2716,[city],Safe and healthy communities
2717,[marine],Healthy coasts and oceans
2720,"[river, river, community, community]",Pristine lakes and rivers
2722,"[child, child]",Safe and healthy communities


./Accounts/output_chelsea/GiantMine_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[fish, lake, lake]",Healthy coasts and oceans
2,[research],Modern and resilient infrastructure
3,"[industry, industry]",Modern and resilient infrastructure
4,[safe],Safe and healthy communities
5,"[health, health, health, ecological, risk]",Effective action on climate change
...,...,...
27,[flooding],Sustainable food
29,"[engagement, engagement]",Connecting Canadians with nature
30,[industry],Modern and resilient infrastructure
33,[industry],Modern and resilient infrastructure


./Accounts/output_chelsea/TSBCanada_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,[industry],Modern and resilient infrastructure
2,[risk],Healthy coasts and oceans
9,[aviation],Effective action on climate change
14,[flow],Clean drinking water
15,"[lake, lake]",Clean drinking water
...,...,...
1419,[risk],Healthy coasts and oceans
1423,[ocean],Healthy coasts and oceans
1426,[feed],Sustainable food
1433,[marine],Healthy coasts and oceans


./Accounts/output_chelsea/CTA_gc_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
5,"[animal, animal, animal]",Pristine lakes and rivers
6,"[community, community]",Safe and healthy communities
11,"[child, child, child, child]",Safe and healthy communities
13,"[canadian, canadian]",Sustainable food
15,[aviation],Effective action on climate change
...,...,...
721,"[shipping, shipping]",Pristine lakes and rivers
725,[marine],Healthy coasts and oceans
726,"[shipping, shipping]",Pristine lakes and rivers
736,"[shipping, shipping]",Pristine lakes and rivers


./Accounts/output_chelsea/AskISED_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category


./Accounts/output_chelsea/DFO_MPO_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,[feed],Sustainable food
2,[death],Safe and healthy communities
4,"[marine, protected_area]",Healthy coasts and oceans
5,"[health, health, health]",Effective action on climate change
7,[sea],Healthy coasts and oceans
...,...,...
2254,[research],Modern and resilient infrastructure
2255,"[ocean, freshwater, freshwater]",Clean drinking water
2256,"[specie, specie]",Healthy coasts and oceans
2259,"[specie, specie]",Healthy coasts and oceans


./Accounts/output_chelsea/GovCanNorth_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[canadian, canadian, research, research, resea...",Modern and resilient infrastructure
1,[land],Sustainably managed lands and forests
2,[land],Sustainably managed lands and forests
3,"[well-being, well-being]",Safe and healthy communities
5,"[lake, lake, engagement, lake, lake]",Clean drinking water
...,...,...
343,"[contaminant, food, food]",Clean drinking water
346,"[engagement, improve]",Connecting Canadians with nature
348,"[engagement, improve]",Connecting Canadians with nature
351,"[water, water, quality, land]",Clean drinking water


./Accounts/output_chelsea/DFO_MAR_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[ocean],Healthy coasts and oceans
2,[marine],Healthy coasts and oceans
3,"[economy, economy]",Low-carbon government
4,[coast],Healthy coasts and oceans
5,[research],Modern and resilient infrastructure
...,...,...
1341,"[community, community]",Safe and healthy communities
1343,"[lake, lake, population, population]",Clean drinking water
1345,[research],Modern and resilient infrastructure
1347,"[fishery, fishery]",Sustainable food


./Accounts/output_chelsea/GCIndigenous_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
15,"[population, population]",Sustainably managed lands and forests
23,"[health, health, health]",Effective action on climate change
24,[preserve],Healthy wildlife populations
25,[commitment],Effective action on climate change
34,"[lake, lake, water, water]",Clean drinking water
...,...,...
3094,[churchill],Pristine lakes and rivers
3099,[urban],Safe and healthy communities
3106,[urban],Safe and healthy communities
3114,"[river, river]",Pristine lakes and rivers


./Accounts/output_chelsea/canenvironment_posts.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[prairie],Sustainably managed lands and forests
2,"[water, water, atmosphere]",Pristine lakes and rivers
3,"[wildlife, wildlife, habitat, habitat, specie,...",Healthy wildlife populations
4,"[canadian, canadian, wildlife, wildlife]",Sustainably managed lands and forests
5,[park],Connecting Canadians with nature
...,...,...
237,"[water, water, harmful, canadian, canadian, co...",Healthy coasts and oceans
240,"[nature, feed]",Sustainable food
241,"[environment, environment, environment, enviro...",Healthy wildlife populations
245,"[canadian, canadian, climate_change, community...",Connecting Canadians with nature


./Accounts/output_chelsea/CanadianTransportationAgency_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
1,"[community, community, flow]",Clean drinking water
2,"[canadian, canadian, variety]",Pristine lakes and rivers
6,"[canadian, canadian]",Sustainable food
7,"[canadian, canadian]",Sustainable food
10,"[stewardship, stewardship]",Clean drinking water
...,...,...
351,"[canadian, canadian]",Sustainable food
354,"[canadian, canadian]",Sustainable food
355,"[medicine, medicine, equipment]",Safe and healthy communities
356,[equipment],Low-carbon government


./Accounts/output_chelsea/AAFC_Canada_tweets.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
6,"[community, community]",Safe and healthy communities
8,[resilience],Safe and healthy communities
13,"[canadian, canadian, agriculture]",Pristine lakes and rivers
15,"[community, community]",Safe and healthy communities
19,"[agriculture, agri-food, industry]",Modern and resilient infrastructure
...,...,...
3094,[market],Sustainable food
3095,[benefit],Connecting Canadians with nature
3100,"[market, industry]",Modern and resilient infrastructure
3101,[research],Modern and resilient infrastructure


./Accounts/output_chelsea/ParksCanada_facebook_statuses.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,[park],Connecting Canadians with nature
2,"[lake, lake, park]",Clean drinking water
3,[variety],Pristine lakes and rivers
4,"[marine, conservation, conservation]",Sustainably managed lands and forests
5,[park],Connecting Canadians with nature
...,...,...
1204,"[camping, camping, camping, food, food, camping]",Connecting Canadians with nature
1206,"[camping, park]",Connecting Canadians with nature
1215,"[camping, park, mountain, mountain, park]",Connecting Canadians with nature
1219,[park],Connecting Canadians with nature


./Accounts/output_chelsea/naturalresourcescanada_posts.csv


Unnamed: 0,FSDS_matched_keywords,FSDS_category
0,"[forest, forest, productivity, canadian, canad...",Sustainable food
1,"[land, infrastructure]",Modern and resilient infrastructure
6,"[ice, environment, environment]",Safe and healthy communities
7,"[healthy, healthy]",Pristine lakes and rivers
8,"[energy, energy, energy]",Low-carbon government
...,...,...
230,"[energy, energy, energy]",Low-carbon government
231,"[energy, energy, energy]",Low-carbon government
233,"[infrastructure, gas, vehicle]",Modern and resilient infrastructure
235,"[mountain, mountain]",Clean drinking water


In [10]:
print(exclude)

{'`', '}', '*', '=', '%', ':', '#', ']', '|', '@', ',', '$', ')', '.', '\\', '!', '[', "'", '^', '~', '{', '?', '(', '+', '&', '<', '>', ';', '"'}
