In [1]:
import pandas as pd

twitter_df = pd.read_csv('tweets_data4.csv', encoding='utf-8')

display(twitter_df)

Unnamed: 0,Keyword,Tweet Text
0,buurthub OR deelvervoer OR deelauto OR deelfie...,@PekePeter @Nieuwsblad_be @groen Dan is het we...
1,buurthub OR deelvervoer OR deelauto OR deelfie...,Wéér trekt een aanbieder van deelvervoer zich ...
2,buurthub OR deelvervoer OR deelauto OR deelfie...,#Tinyhouses #Westpark #Groningen Er gebeurt hi...
3,buurthub OR deelvervoer OR deelauto OR deelfie...,Autodelen of een deelauto. Ideaal te combinere...
4,buurthub OR deelvervoer OR deelauto OR deelfie...,Vandaag is tevens de start van de landelijke c...
...,...,...
1035,buurthub OR deelvervoer OR deelauto OR deelfie...,@TNYBN123 @z0roProfit Een deelauto wordt door ...
1036,buurthub OR deelvervoer OR deelauto OR deelfie...,Deelfiets in brand bij bushalte in Gorinchem: ...
1037,buurthub OR deelvervoer OR deelauto OR deelfie...,Deelfiets in brand bij bushalte in Gorinchem: ...
1038,buurthub OR deelvervoer OR deelauto OR deelfie...,Nieuws: Deelfiets in brand bij bushalte in Gor...


In [7]:
from deep_translator import GoogleTranslator

twitter_df['Translated Text'] = twitter_df['Tweet Text'].apply(
    lambda x: GoogleTranslator(source='auto', target='en').translate(x)
)

In [8]:
display(twitter_df['Translated Text'])

0       @PekePeter @Nieuwsblad_be @groen Then it is st...
1       Once again a shared transport provider is with...
2       #Tinyhouses #Westpark #Groningen There is much...
3       Car sharing or a shared car. Ideal to combine ...
4       Today is also the start of the national campai...
                              ...                        
1035    @TNYBN123 @z0roProfit A shared car is used by ...
1036    Shared bicycle on fire at bus stop in Gorinche...
1037    Shared bicycle on fire at bus stop in Gorinche...
1038    News: Bicycle sharing on fire at bus stop in G...
1039    This morning I parked my shared bicycle powere...
Name: Translated Text, Length: 1040, dtype: object

In [9]:
twitter_df.to_csv('translated_tweets.csv', index=False)

In [15]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import re

from nltk.stem import WordNetLemmatizer
from nltk.corpus import words
from gensim import corpora
from gensim.models.ldamodel import LdaModel

english_words = set(words.words())

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = str(text) if isinstance(text, (str, float)) else ''

    #remove Twitter handles
    text = re.sub(r'@[\w_]+', '', text)

    #remove URLs
    text = re.sub(r'http\S+', '', text)

    text = text.translate(str.maketrans('', '', string.punctuation))

    #remove special characters
    text= re.sub(r'[^A-Za-z\s]', '', text)

    #case normalisation
    text = text.lower()

    #tokenisation
    tokens = word_tokenize(text)

    #removal of stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and token in english_words]
        
    #lemmatisation
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    return tokens


texts = twitter_df['Translated Text'].tolist()

processed_texts = [preprocess_text(text) for text in texts]

corpus = corpora.Dictionary(processed_texts)

doc_term_matrix = [corpus.doc2bow(text) for text in processed_texts]

print(processed_texts)




In [27]:
from bertopic import BERTopic
from bertopic.vectorizers import ClassTfidfTransformer

def analyze_topics(texts):
    concatenated_texts = [' '.join(tweet_list) for tweet_list in texts]

    ctfidf_model = ClassTfidfTransformer(bm25_weighting=True)

    model = BERTopic(verbose=True, embedding_model='paraphrase-MiniLM-L3-v2', min_topic_size=6, ctfidf_model=ctfidf_model, n_gram_range=(1, 2))

    topics, _ = model.fit_transform(concatenated_texts)
    
    freq = model.get_topic_info()

    
    print("Number of topics: {}".format(len(freq)))
    display(freq.head(50))

    freq = model.get_topic_info()
    freq.to_csv('output5.csv', index=False)

    return model, topics

model, topics = analyze_topics(processed_texts)


NameError: name 'nr_keywords' is not defined

In [16]:
topics_df = pd.read_csv('output5.csv', sep=',')

display(topics_df)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs,Keep?,Combine?,Description
0,-1,259,-1_also_private_electric_take,"['also', 'private', 'electric', 'take', 'time'...",['mobility known time building city accessibil...,-,,
1,0,93,0_scooter_bicycle scooter_bicycle_scooter car,"['scooter', 'bicycle scooter', 'bicycle', 'sco...",['bicycle scooter car think let u know via sho...,-,,
2,1,83,1_car car_like_rid car_dont,"['car car', 'like', 'rid car', 'dont', 'still'...",['hi ask pick billy yet received response whet...,-,,
3,2,78,2_transport_public_public transport_future,"['transport', 'public', 'public transport', 'f...",['affordable reliable public transport electri...,-,,
4,3,52,3_bicycle_public transport_public_transport,"['bicycle', 'public transport', 'public', 'tra...",['plenty available within walking distance alt...,+,,Expressing their please for shared mobility be...
5,4,45,4_hub_neighborhood_neighborhood hub_district,"['hub', 'neighborhood', 'neighborhood hub', 'd...",['consultation supervision enforcement another...,-,,
6,5,34,5_electric car_electric_pump_already electric,"['electric car', 'electric', 'pump', 'already ...",['trying electric car easy get family petrol c...,-,,
7,6,30,6_public transport_public_transport_travel,"['public transport', 'public', 'transport', 't...",['never happen car always full often need loca...,-,,
8,7,29,7_electric_premium_electric electric_electric ...,"['electric', 'premium', 'electric electric', '...",['already possible rent electric may also poss...,-,,
9,8,28,8_bicycle_bike_person injured_fall bicycle,"['bicycle', 'bike', 'person injured', 'fall bi...","['person injured fall bicycle', 'person injure...",-,,


In [17]:
topics_df = topics_df[['Count', 'Name', 'Representation', 'Representative_Docs']]
display(topics_df)

Unnamed: 0,Count,Name,Representation,Representative_Docs
0,259,-1_also_private_electric_take,"['also', 'private', 'electric', 'take', 'time'...",['mobility known time building city accessibil...
1,93,0_scooter_bicycle scooter_bicycle_scooter car,"['scooter', 'bicycle scooter', 'bicycle', 'sco...",['bicycle scooter car think let u know via sho...
2,83,1_car car_like_rid car_dont,"['car car', 'like', 'rid car', 'dont', 'still'...",['hi ask pick billy yet received response whet...
3,78,2_transport_public_public transport_future,"['transport', 'public', 'public transport', 'f...",['affordable reliable public transport electri...
4,52,3_bicycle_public transport_public_transport,"['bicycle', 'public transport', 'public', 'tra...",['plenty available within walking distance alt...
5,45,4_hub_neighborhood_neighborhood hub_district,"['hub', 'neighborhood', 'neighborhood hub', 'd...",['consultation supervision enforcement another...
6,34,5_electric car_electric_pump_already electric,"['electric car', 'electric', 'pump', 'already ...",['trying electric car easy get family petrol c...
7,30,6_public transport_public_transport_travel,"['public transport', 'public', 'transport', 't...",['never happen car always full often need loca...
8,29,7_electric_premium_electric electric_electric ...,"['electric', 'premium', 'electric electric', '...",['already possible rent electric may also poss...
9,28,8_bicycle_bike_person injured_fall bicycle,"['bicycle', 'bike', 'person injured', 'fall bi...","['person injured fall bicycle', 'person injure..."


In [18]:
topics_df = topics_df.drop([0, 1, 2 , 3, 5, 6, 7, 8, 9, 12, 13, 14, 16, 17, 18, 19, 22, 23, 24, 25, 27, 28, 29, 30, 32])

In [19]:
display(topics_df)

Unnamed: 0,Count,Name,Representation,Representative_Docs
4,52,3_bicycle_public transport_public_transport,"['bicycle', 'public transport', 'public', 'tra...",['plenty available within walking distance alt...
10,26,9_parking_permit_parking space_parking permit,"['parking', 'permit', 'parking space', 'parkin...",['also difficult find parking space neighborho...
11,25,10_ownership_rental_rent_car ownership,"['ownership', 'rental', 'rent', 'car ownership...",['car principle work well speaking experience ...
15,17,14_expensive_every day_use_every,"['expensive', 'every day', 'use', 'every', 'da...",['dont people share caravan example use much l...
20,13,19_mobility_car mobility_sustainable_people need,"['mobility', 'car mobility', 'sustainable', 'p...",['correct understandable also sustainable situ...
21,11,20_solar_drive solar_drive_energy,"['solar', 'drive solar', 'drive', 'energy', 'c...","['drive solar collaborate', 'drive solar charg..."
26,10,25_car none_none_never_car never,"['car none', 'none', 'never', 'car never', 'no...",['leave nothing happy car maybe car unaffordab...
31,7,30_private_private car_private ownership_honest,"['private', 'private car', 'private ownership'...",['part discouragement strategy want ban privat...


In [20]:
import requests

def analyze_sentiment(text):
    url = "http://text-processing.com/api/sentiment/"
    data = {'text': text, 'language': 'dutch'}

    response = requests.post(url, data=data)
    result = response.json()

    sentiment = result['label']
    confidence = result['probability'][sentiment]

    return sentiment, confidence


topics_df['sentiment'], topics_df['confidence'] = zip(*topics_df['Representative_Docs'].apply(analyze_sentiment))

display(topics_df)

Unnamed: 0,Count,Name,Representation,Representative_Docs,sentiment,confidence
4,52,3_bicycle_public transport_public_transport,"['bicycle', 'public transport', 'public', 'tra...",['plenty available within walking distance alt...,neutral,1.0
10,26,9_parking_permit_parking space_parking permit,"['parking', 'permit', 'parking space', 'parkin...",['also difficult find parking space neighborho...,neutral,1.0
11,25,10_ownership_rental_rent_car ownership,"['ownership', 'rental', 'rent', 'car ownership...",['car principle work well speaking experience ...,neg,0.6
15,17,14_expensive_every day_use_every,"['expensive', 'every day', 'use', 'every', 'da...",['dont people share caravan example use much l...,neutral,1.0
20,13,19_mobility_car mobility_sustainable_people need,"['mobility', 'car mobility', 'sustainable', 'p...",['correct understandable also sustainable situ...,neg,0.6
21,11,20_solar_drive solar_drive_energy,"['solar', 'drive solar', 'drive', 'energy', 'c...","['drive solar collaborate', 'drive solar charg...",neutral,1.0
26,10,25_car none_none_never_car never,"['car none', 'none', 'never', 'car never', 'no...",['leave nothing happy car maybe car unaffordab...,neutral,1.0
31,7,30_private_private car_private ownership_honest,"['private', 'private car', 'private ownership'...",['part discouragement strategy want ban privat...,neg,0.6


In [21]:
topics_df['Description'] = ['Expressing appreciation for the accessibility of shared mobility solutions, conveniently located within a short walking distance.', 'Expressing dissatisfaction with the challenge of finding parking spaces in the neighborhood, contemplating the decision not to renew their parking permit.', 'Advocacy for convenient shared mobility to reduce reliance on traditional car ownership models, making urban living more sustainable and affordable.', 'Promoting the idea of using shared cars for everyday needs to minimise expenses', 'Advocating for governments to play a crucial role by implementing policies to reduce the number of cars and encourage alternative modes of transportation', 'Embracing solar-powered driving', 'Reflecting on the challenges of car ownership, contemplating alternatives due to rising fuel prices, and considering family safety in the decision-making process.', 'Advocating for a discouragement strategy to ban private car ownership in favor of public transportation, bicycles, and walking within accessible distances.']

In [22]:
display(topics_df)

Unnamed: 0,Count,Name,Representation,Representative_Docs,sentiment,confidence,Description
4,52,3_bicycle_public transport_public_transport,"['bicycle', 'public transport', 'public', 'tra...",['plenty available within walking distance alt...,neutral,1.0,Expressing appreciation for the accessibility ...
10,26,9_parking_permit_parking space_parking permit,"['parking', 'permit', 'parking space', 'parkin...",['also difficult find parking space neighborho...,neutral,1.0,Expressing dissatisfaction with the challenge ...
11,25,10_ownership_rental_rent_car ownership,"['ownership', 'rental', 'rent', 'car ownership...",['car principle work well speaking experience ...,neg,0.6,Advocacy for convenient shared mobility to red...
15,17,14_expensive_every day_use_every,"['expensive', 'every day', 'use', 'every', 'da...",['dont people share caravan example use much l...,neutral,1.0,Promoting the idea of using shared cars for ev...
20,13,19_mobility_car mobility_sustainable_people need,"['mobility', 'car mobility', 'sustainable', 'p...",['correct understandable also sustainable situ...,neg,0.6,Advocating for governments to play a crucial r...
21,11,20_solar_drive solar_drive_energy,"['solar', 'drive solar', 'drive', 'energy', 'c...","['drive solar collaborate', 'drive solar charg...",neutral,1.0,Embracing solar-powered driving
26,10,25_car none_none_never_car never,"['car none', 'none', 'never', 'car never', 'no...",['leave nothing happy car maybe car unaffordab...,neutral,1.0,"Reflecting on the challenges of car ownership,..."
31,7,30_private_private car_private ownership_honest,"['private', 'private car', 'private ownership'...",['part discouragement strategy want ban privat...,neg,0.6,Advocating for a discouragement strategy to ba...


In [23]:
topics_df.to_csv('final_analysed_topics.csv', index=False)