In [8]:
def parse_lyrics(file_path):
    with open(file_path, 'r', encoding='UTF-8') as file:
        lyrics_data = {}
        current_title, current_artist = '', ''
        lyrics = ''
        for line in file:
            if line.startswith('Title: '):
                if current_title:
                    lyrics_data[current_title] = {'artist': current_artist, 'lyrics': lyrics.strip()}
                    lyrics = ''
                current_title = line[len('Title: '):].strip()
            elif line.startswith('Artist: '):
                current_artist = line[len('Artist: '):].strip()
            elif line.strip() == 'Lyrics:':
                continue
            else:
                lyrics += line
        if current_title:
            lyrics_data[current_title] = {'artist': current_artist, 'lyrics': lyrics.strip()}
        return lyrics_data


song_lyrics = parse_lyrics('song_lyrics.txt') 

In [16]:
song_lyrics

{'Flowers': {'artist': 'Miley Cyrus',
  'lyrics': "We were good, we were gold\nKinda dream that can't be sold\nWe were right 'til we weren't\nBuilt a home and watched it burn\n\nMm, I didn't wanna leave you\nI didn't wanna lie\nStarted to cry, but then remembered I\n\nI can buy myself flowers\nWrite my name in the sand\nTalk to myself for hours\nSay things you don't understand\nI can take myself dancing\nAnd I can hold my own hand\nYeah, I can love me better than you can\n\nCan love me better\nI can love me better, baby\nCan love me better\nI can love me better, baby\n\nPaint my nails cherry red\nMatch the roses that you left\nNo remorse, no regret\nI forgive every word you said\n\nOoh, I didn't wanna leave you, baby\nI didn't wanna fight\nStarted to cry, but then remembered I\n\nI can buy myself flowers\nWrite my name in the sand\nTalk to myself for hours, yeah\nSay things you don't understand\nI can take myself dancing, yeah\nI can hold my own hand\nYeah, I can love me better than yo

In [17]:
import pymongo
import csv

# connecting to MongoDB
client = pymongo.MongoClient('mongodb://localhost:27017/')
db = client['Most_popular_songs']
songs_collection = db['songs']

analysis_data = {}
with open('analysis_results.csv', mode='r', encoding='ISO-8859-1') as file:
    reader = csv.DictReader(file)
    for row in reader:
        analysis_data[row['title']] = row

csv_data = {}
with open('spotify-2023.csv', mode='r', encoding='ISO-8859-1') as file:
    reader = csv.DictReader(file)
    for row in reader:
        csv_data[row['title']] = row

all_songs_data = []

for title, lyrics in song_lyrics.items():
    song_data = {'title': title, 'lyrics': lyrics}
    if title in csv_data:
        song_data.update(csv_data[title])
    if title in analysis_data:
        song_data.update(analysis_data[title])
    all_songs_data.append(song_data)

# adding songs from CSV that are not in lyrics
for title, details in csv_data.items():
    if title not in song_lyrics:
        if title in analysis_data:
            details.update(analysis_data[title])
        all_songs_data.append(details)

In [23]:
all_songs_data

[{'title': 'Flowers',
  'lyrics': {'artist': 'Miley Cyrus',
   'lyrics': "We were good, we were gold\nKinda dream that can't be sold\nWe were right 'til we weren't\nBuilt a home and watched it burn\n\nMm, I didn't wanna leave you\nI didn't wanna lie\nStarted to cry, but then remembered I\n\nI can buy myself flowers\nWrite my name in the sand\nTalk to myself for hours\nSay things you don't understand\nI can take myself dancing\nAnd I can hold my own hand\nYeah, I can love me better than you can\n\nCan love me better\nI can love me better, baby\nCan love me better\nI can love me better, baby\n\nPaint my nails cherry red\nMatch the roses that you left\nNo remorse, no regret\nI forgive every word you said\n\nOoh, I didn't wanna leave you, baby\nI didn't wanna fight\nStarted to cry, but then remembered I\n\nI can buy myself flowers\nWrite my name in the sand\nTalk to myself for hours, yeah\nSay things you don't understand\nI can take myself dancing, yeah\nI can hold my own hand\nYeah, I can

In [24]:
for song_data in all_songs_data:
    songs_collection.update_one(
        {'title': song_data['title']},
        {'$set': song_data},
        upsert=True
    )

In [27]:
import pymongo

# connecting to MongoDB
client = pymongo.MongoClient('mongodb://localhost:27017/')
db = client['Most_popular_songs']
songs_collection = db['songs']

# converting 'danceability_%' from string to double
for song in songs_collection.find():
    if 'danceability_%' in song and isinstance(song['danceability_%'], str):
        try:
            danceability = float(song['danceability_%'])
            songs_collection.update_one({'_id': song['_id']}, {'$set': {'danceability_%': danceability}})
        except ValueError:
            print(f"Could not convert {song['title']}")

In [28]:
pipeline = [
    {
        '$match': {'danceability_%': {'$exists': True}}
    },
    {
        '$group': {
            '_id': None,
            'average_danceability': {'$avg': '$danceability_%'}
        }
    }
]

average_result = list(songs_collection.aggregate(pipeline))

if average_result:
    average_danceability = average_result[0]['average_danceability']
    print(f"The average danceability is: {average_danceability}")
else:
    print("No data available to calculate average danceability.")

The average danceability is: 67.02439024390245
