# **Collaborative Filtering**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns

In [2]:
df_ratings = pd.read_csv('dataset/ratings.csv', sep=';')

In [35]:
unique_users_count = df_ratings['user'].nunique()
unique_users_count

277

In [39]:
len(df_ratings)

11762

In [37]:
df_ratings['link_album'].nunique()

4877

In [11]:
df_ratings.columns

In [12]:
plt.figure(figsize=(20, 5))

sns.countplot(x=df_ratings["rating_album"])
plt.xlabel("Rating Album")
plt.ylabel("Frequency")
plt.title("Distribution of Rating Album")

plt.tight_layout()
plt.show()

In [13]:
plt.figure(figsize=(10, 5))

sns.countplot(df_ratings["link_user"])
plt.xlabel("Link User")
plt.ylabel("Frequency")
plt.title("Distribution of Link User")

plt.tight_layout()
plt.show()

In [3]:
interaction_matrix = df_ratings.pivot_table(index='user', columns='link_album', values='rating_album')

In [4]:
df_filled = interaction_matrix.fillna(0)

In [5]:
def standardize(row):
    new_row =(row - row.mean())/(row.max()-row.min())
    return new_row

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

ratings_std= df_filled.apply(standardize)
ratings_std = ratings_std.fillna(0)
item_similarity=cosine_similarity(ratings_std.T)

In [14]:
item_similarity_df = pd.DataFrame(item_similarity,
                                  index=ratings_std.columns,
                                  columns=ratings_std.columns)


In [15]:
def get_similar_album(album_name, user_rating):
    similar_score = item_similarity_df[album_name]*(user_rating-50)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

In [17]:
get_similar_album('album/100249-the-brobecks-violent-things.php', 80)

link_album
album/2972-patrick-stump-soul-punk.php                               30.000000
album/78529-fall-out-boy-m-a-n-i-a.php                               30.000000
album/122242-mumford-sons-delta.php                                  30.000000
album/382565-cynthia-erivo-ch-1-vs-1.php                             30.000000
album/234065-various-artists-rent.php                                30.000000
                                                                       ...    
album/578681-caroline-polachek-desire-i-want-to-turn-into-you.php    -0.844267
album/692306-olivia-rodrigo-guts.php                                 -0.863550
album/152796-tyler-the-creator-igor.php                              -0.877362
album/618404-danny-brown-jpegmafia-scaring-the-hoes.php              -0.907351
album/29250-kendrick-lamar-to-pimp-a-butterfly.php                   -1.018934
Name: album/100249-the-brobecks-violent-things.php, Length: 4877, dtype: float64

In [18]:
def get_similar_more_albums(user_ratings):
    total_scores = pd.Series(dtype=float)
    for album, rating in user_ratings:
        similar_scores = item_similarity_df[album] * (rating - 50)
        total_scores = total_scores.add(similar_scores, fill_value=0)
    total_scores = total_scores.sort_values(ascending=False)
    return total_scores


In [19]:
user_ratings = [
    ("album/100249-the-brobecks-violent-things.php", 5),
    ("album/100035-young-fathers-cocoa-sugar.php", 100),
    ("album/101215-mom-jeans-best-buds.php", 10)
]

In [20]:
#nyoba all taylor swift
user_ratings = [('album/541510-taylor-swift-midnights.php',100),
       ('album/934464-taylor-swift-the-tortured-poets-department-the-anthology.php', 100),
       ('album/313572-taylor-swift-evermore.php',100),
       ('album/264058-taylor-swift-folklore.php',100)]

In [21]:
hasil = pd.DataFrame(get_similar_more_albums(user_ratings), columns=['score'])
hasil_data = get_similar_more_albums(user_ratings)
hasil = pd.DataFrame(hasil_data, columns=['score'])
hasil['link_album'] = hasil_data.index
hasil = hasil.reset_index(drop=True)

hasil

Unnamed: 0,score,link_album
0,117.049121,album/264058-taylor-swift-folklore.php
1,113.849890,album/313572-taylor-swift-evermore.php
2,110.467764,album/541510-taylor-swift-midnights.php
3,97.903175,album/381307-taylor-swift-red-taylors-version.php
4,95.316947,album/934464-taylor-swift-the-tortured-poets-department-the-anthology.php
...,...,...
4872,-19.884293,album/503-madvillain-madvillainy.php
4873,-21.651115,album/618404-danny-brown-jpegmafia-scaring-the-hoes.php
4874,-21.927360,album/108509-kids-see-ghosts-kids-see-ghosts.php
4875,-22.989201,album/289791-travis-scott-utopia.php


In [25]:
hasil

In [24]:
df_albums = pd.read_csv('dataset/albums.csv', sep=';')
df_albums

Unnamed: 0,artis,link_artis,album,thumbnail_album,tracklist_album,link_review,tanggal_rilis,label,genre,produser,penulis,link_album,thumbnail_artis
0,Yard Act,artist/86788-yard-act/,Where's My Utopia?,https://cdn2.albumoftheyear.org/375x/album/778829-wheres-my-utopia_055808.jpg,An Illusion;|We Make Hits;|Down by the Stream;|The Undertow;|Dream Job;|Fizzy Fish;|Petroleum;|When the Laughter Stops;|Grifter's Grief;|Blackpool Illuminations;|A Vineyard for the North,artist/86788-yard-act/,"March 1, 2024",Island Records;|Republic,Dance-Punk;|Art Punk;|Post-Punk Revival;|Spoken Word;|Alternative Dance,Yard Act;|Remi Kabaka,Jay Russell;|Ryan Needham;|Sam Shipstone;|James Smith;|Christopher Duffin,artist/86788-yard-act/,https://cdn.albumoftheyear.org/artists/sq/yard-act_1642605375.jpg
1,Mile End,artist/192679-mile-end/,Mile End,https://cdn2.albumoftheyear.org/375x/album/553601-mile-end.jpg,,artist/192679-mile-end/,"July 16, 2021",,Hardcore Punk,,,artist/192679-mile-end/,
2,J Dilla,artist/448-j-dilla/,Donuts,https://cdn2.albumoftheyear.org/375x/album/donuts-1.jpg,"Donuts (Outro);|Workinonit;|Waves;|Light It;|The New;|Stop;|People;|The Diff'rence;|Mash;|Time: The Donut of the Heart;|Glazed;|Airworks;|Lightworks;|Stepson of the Clapper;|The Twister (Huh, What?);|One Eleven;|Two Can Win;|Don't Cry;|Anti-American Graffiti;|Geek Down;|Thunder;|Gobstopper;|One for Ghost;|Dilla Says Go;|Walkinonit;|The Factory;|U-Love;|Hi.;|Bye.;|Last Donut of the Night;|Donuts (Intro)",artist/448-j-dilla/,"February 7, 2006",Stones Throw,Instrumental Hip Hop;|Plunderphonics;|Soul;|Experimental Hip Hop,J Dilla,J Dilla,artist/448-j-dilla/,https://cdn.albumoftheyear.org/artists/sq/j-dilla_1448375742.jpg
3,The Gathering,artist/3111-the-gathering/,How to Measure a Planet?,https://cdn2.albumoftheyear.org/375x/album/56434-how-to-measure-a-planet.jpg,,artist/3111-the-gathering/,"November 9, 1998",Century Media,Art Rock,,,artist/3111-the-gathering/,
4,Biking With Francis,artist/255468-biking-with-francis/,HONEYDEW,https://cdn2.albumoftheyear.org/375x/album/717849-honeydew_2023.jpg,,artist/255468-biking-with-francis/,"June 30, 2023",,,,,artist/255468-biking-with-francis/,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4872,Freddie Gibbs & Madlib,artist/936-freddie-gibbs/,Piñata (Deluxe Edition),https://cdn2.albumoftheyear.org/375x/album/339806-pinata-deluxe-edition-1.jpg,,artist/936-freddie-gibbs/,"February 26, 2021",,,Madlib;|Alex Goose,,artist/936-freddie-gibbs/,https://cdn.albumoftheyear.org/artists/sq/freddie-gibbs_1664546436.jpg
4873,Baco Exu Do Blues,artist/34832-baco-exu-do-blues/,Bluesman,https://cdn2.albumoftheyear.org/375x/album/129241-bluesman.jpg,Bluesman;|Queima minha pele;|Me desculpa Jay Z;|Minotauro de Borges;|Kanye West da Bahia;|Flamingos;|Girassóis de Van Gogh;|Preto e prata;|BB King,artist/34832-baco-exu-do-blues/,"November 23, 2018",999,Conscious Hip Hop;|Trap;|Pop Rap;|Alternative R&B,Baco Exu Do Blues;|CESRV;|JLZ;|Tim Bernardes,,artist/34832-baco-exu-do-blues/,https://cdn.albumoftheyear.org/artists/sq/baco-exu-do-blues_1543342048.jpg
4874,Big Sean,artist/1847-big-sean/,I Decided.,https://cdn2.albumoftheyear.org/375x/album/2017/67764-i-decided.jpg,"Intro;|Light;|Bounce Back;|No Favors;|Jump Out the Window;|Moves;|Same Time, Pt. 1;|Owe Me;|Halfway Off the Balcony;|Voices In My Head / Stick To the Plan;|Sunday Morning Jetpack;|Inspire Me;|Sacrifices;|Bigger Than Me",artist/1847-big-sean/,"February 3, 2017",G.O.O.D. MUSIC;|Def Jam,Pop Rap;|Trap;|Contemporary R&B,Allen Ritter;|Bekon;|Detail;|DJ Dahi;|DJ Khalil,,artist/1847-big-sean/,https://cdn.albumoftheyear.org/artists/sq/big-sean_1676408021.jpg
4875,Nick Lyons,artist/98351-nick-lyons/,Unauthorised Broadcast,https://cdn2.albumoftheyear.org/375x/album/275189-unauthorised-broadcast.jpg,,artist/98351-nick-lyons/,"October 8, 2014",Ascensionism Records,,,,artist/98351-nick-lyons/,


In [25]:
hasil.dtypes

score         float64
link_album     object
dtype: object

In [26]:
df_hasil = df_albums.join(hasil.set_index("link_album"), on='link_album')
df_hasil

Unnamed: 0,artis,link_artis,album,thumbnail_album,tracklist_album,link_review,tanggal_rilis,label,genre,produser,penulis,link_album,thumbnail_artis,score
0,Yard Act,artist/86788-yard-act/,Where's My Utopia?,https://cdn2.albumoftheyear.org/375x/album/778829-wheres-my-utopia_055808.jpg,An Illusion;|We Make Hits;|Down by the Stream;|The Undertow;|Dream Job;|Fizzy Fish;|Petroleum;|When the Laughter Stops;|Grifter's Grief;|Blackpool Illuminations;|A Vineyard for the North,artist/86788-yard-act/,"March 1, 2024",Island Records;|Republic,Dance-Punk;|Art Punk;|Post-Punk Revival;|Spoken Word;|Alternative Dance,Yard Act;|Remi Kabaka,Jay Russell;|Ryan Needham;|Sam Shipstone;|James Smith;|Christopher Duffin,artist/86788-yard-act/,https://cdn.albumoftheyear.org/artists/sq/yard-act_1642605375.jpg,
1,Mile End,artist/192679-mile-end/,Mile End,https://cdn2.albumoftheyear.org/375x/album/553601-mile-end.jpg,,artist/192679-mile-end/,"July 16, 2021",,Hardcore Punk,,,artist/192679-mile-end/,,
2,J Dilla,artist/448-j-dilla/,Donuts,https://cdn2.albumoftheyear.org/375x/album/donuts-1.jpg,"Donuts (Outro);|Workinonit;|Waves;|Light It;|The New;|Stop;|People;|The Diff'rence;|Mash;|Time: The Donut of the Heart;|Glazed;|Airworks;|Lightworks;|Stepson of the Clapper;|The Twister (Huh, What?);|One Eleven;|Two Can Win;|Don't Cry;|Anti-American Graffiti;|Geek Down;|Thunder;|Gobstopper;|One for Ghost;|Dilla Says Go;|Walkinonit;|The Factory;|U-Love;|Hi.;|Bye.;|Last Donut of the Night;|Donuts (Intro)",artist/448-j-dilla/,"February 7, 2006",Stones Throw,Instrumental Hip Hop;|Plunderphonics;|Soul;|Experimental Hip Hop,J Dilla,J Dilla,artist/448-j-dilla/,https://cdn.albumoftheyear.org/artists/sq/j-dilla_1448375742.jpg,
3,The Gathering,artist/3111-the-gathering/,How to Measure a Planet?,https://cdn2.albumoftheyear.org/375x/album/56434-how-to-measure-a-planet.jpg,,artist/3111-the-gathering/,"November 9, 1998",Century Media,Art Rock,,,artist/3111-the-gathering/,,
4,Biking With Francis,artist/255468-biking-with-francis/,HONEYDEW,https://cdn2.albumoftheyear.org/375x/album/717849-honeydew_2023.jpg,,artist/255468-biking-with-francis/,"June 30, 2023",,,,,artist/255468-biking-with-francis/,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4872,Freddie Gibbs & Madlib,artist/936-freddie-gibbs/,Piñata (Deluxe Edition),https://cdn2.albumoftheyear.org/375x/album/339806-pinata-deluxe-edition-1.jpg,,artist/936-freddie-gibbs/,"February 26, 2021",,,Madlib;|Alex Goose,,artist/936-freddie-gibbs/,https://cdn.albumoftheyear.org/artists/sq/freddie-gibbs_1664546436.jpg,
4873,Baco Exu Do Blues,artist/34832-baco-exu-do-blues/,Bluesman,https://cdn2.albumoftheyear.org/375x/album/129241-bluesman.jpg,Bluesman;|Queima minha pele;|Me desculpa Jay Z;|Minotauro de Borges;|Kanye West da Bahia;|Flamingos;|Girassóis de Van Gogh;|Preto e prata;|BB King,artist/34832-baco-exu-do-blues/,"November 23, 2018",999,Conscious Hip Hop;|Trap;|Pop Rap;|Alternative R&B,Baco Exu Do Blues;|CESRV;|JLZ;|Tim Bernardes,,artist/34832-baco-exu-do-blues/,https://cdn.albumoftheyear.org/artists/sq/baco-exu-do-blues_1543342048.jpg,
4874,Big Sean,artist/1847-big-sean/,I Decided.,https://cdn2.albumoftheyear.org/375x/album/2017/67764-i-decided.jpg,"Intro;|Light;|Bounce Back;|No Favors;|Jump Out the Window;|Moves;|Same Time, Pt. 1;|Owe Me;|Halfway Off the Balcony;|Voices In My Head / Stick To the Plan;|Sunday Morning Jetpack;|Inspire Me;|Sacrifices;|Bigger Than Me",artist/1847-big-sean/,"February 3, 2017",G.O.O.D. MUSIC;|Def Jam,Pop Rap;|Trap;|Contemporary R&B,Allen Ritter;|Bekon;|Detail;|DJ Dahi;|DJ Khalil,,artist/1847-big-sean/,https://cdn.albumoftheyear.org/artists/sq/big-sean_1676408021.jpg,
4875,Nick Lyons,artist/98351-nick-lyons/,Unauthorised Broadcast,https://cdn2.albumoftheyear.org/375x/album/275189-unauthorised-broadcast.jpg,,artist/98351-nick-lyons/,"October 8, 2014",Ascensionism Records,,,,artist/98351-nick-lyons/,,


In [27]:
taylor_swift_data = df_hasil[df_hasil['artis'] == 'Taylor Swift']
taylor_swift_data['link_album'].unique()

array(['artist/323-taylor-swift/'], dtype=object)

In [28]:
sorted = df_hasil.sort_values(by='score', ascending=False)
top_10 =sorted.head(10)
top_10

Unnamed: 0,artis,link_artis,album,thumbnail_album,tracklist_album,link_review,tanggal_rilis,label,genre,produser,penulis,link_album,thumbnail_artis,score
0,Yard Act,artist/86788-yard-act/,Where's My Utopia?,https://cdn2.albumoftheyear.org/375x/album/778829-wheres-my-utopia_055808.jpg,An Illusion;|We Make Hits;|Down by the Stream;|The Undertow;|Dream Job;|Fizzy Fish;|Petroleum;|When the Laughter Stops;|Grifter's Grief;|Blackpool Illuminations;|A Vineyard for the North,artist/86788-yard-act/,"March 1, 2024",Island Records;|Republic,Dance-Punk;|Art Punk;|Post-Punk Revival;|Spoken Word;|Alternative Dance,Yard Act;|Remi Kabaka,Jay Russell;|Ryan Needham;|Sam Shipstone;|James Smith;|Christopher Duffin,artist/86788-yard-act/,https://cdn.albumoftheyear.org/artists/sq/yard-act_1642605375.jpg,
1,Mile End,artist/192679-mile-end/,Mile End,https://cdn2.albumoftheyear.org/375x/album/553601-mile-end.jpg,,artist/192679-mile-end/,"July 16, 2021",,Hardcore Punk,,,artist/192679-mile-end/,,
2,J Dilla,artist/448-j-dilla/,Donuts,https://cdn2.albumoftheyear.org/375x/album/donuts-1.jpg,"Donuts (Outro);|Workinonit;|Waves;|Light It;|The New;|Stop;|People;|The Diff'rence;|Mash;|Time: The Donut of the Heart;|Glazed;|Airworks;|Lightworks;|Stepson of the Clapper;|The Twister (Huh, What?);|One Eleven;|Two Can Win;|Don't Cry;|Anti-American Graffiti;|Geek Down;|Thunder;|Gobstopper;|One for Ghost;|Dilla Says Go;|Walkinonit;|The Factory;|U-Love;|Hi.;|Bye.;|Last Donut of the Night;|Donuts (Intro)",artist/448-j-dilla/,"February 7, 2006",Stones Throw,Instrumental Hip Hop;|Plunderphonics;|Soul;|Experimental Hip Hop,J Dilla,J Dilla,artist/448-j-dilla/,https://cdn.albumoftheyear.org/artists/sq/j-dilla_1448375742.jpg,
3,The Gathering,artist/3111-the-gathering/,How to Measure a Planet?,https://cdn2.albumoftheyear.org/375x/album/56434-how-to-measure-a-planet.jpg,,artist/3111-the-gathering/,"November 9, 1998",Century Media,Art Rock,,,artist/3111-the-gathering/,,
4,Biking With Francis,artist/255468-biking-with-francis/,HONEYDEW,https://cdn2.albumoftheyear.org/375x/album/717849-honeydew_2023.jpg,,artist/255468-biking-with-francis/,"June 30, 2023",,,,,artist/255468-biking-with-francis/,,
5,Aimee Mann,artist/8-aimee-mann/,Mental Illness,https://cdn2.albumoftheyear.org/375x/album/70080-mental-illness.jpg,Goose Snow Cone;|Stuck in the Past;|You Never Loved Me;|Rollercoasters;|Lies of Summer;|Patient Zero;|Good for Me;|Knock It Off;|Philly Sinks;|Simple Fix;|Poor Judge,artist/8-aimee-mann/,"March 31, 2017",Superego,Singer-Songwriter;|Folk Pop,,,artist/8-aimee-mann/,https://cdn.albumoftheyear.org/artists/sq/aimee-mann_1490707109.jpg,
6,Ces Cru,artist/3689-ces-cru/,Catastrophic Event Specialists,https://cdn2.albumoftheyear.org/375x/album/68386-catastrophic-event-specialists.jpg,,artist/3689-ces-cru/,"February 10, 2017",Strange Music,Hip Hop,,,artist/3689-ces-cru/,,
7,Swans,artist/1339-swans/,Cop,https://cdn2.albumoftheyear.org/375x/album/15003-cop_190656.jpg,Half Life;|Job;|Why Hide;|Clay Man;|Your Property;|Cop;|Butcher;|Thug,artist/1339-swans/,1984,K.422,No Wave;|Noise Rock;|Sludge Metal;|Industrial;|Industrial Metal,Michael Gira;|Roli Mosimann,,artist/1339-swans/,https://cdn.albumoftheyear.org/artists/sq/swans_1680183959.jpg,
8,Siouxsie and the Banshees,artist/5221-siouxsie-and-the-banshees/,Juju,https://cdn2.albumoftheyear.org/375x/album/11981-juju-1.jpg,Spellbound;|Into the Light;|Arabian Knights;|Halloween;|Monitor;|Night Shift;|Sin In My Heart;|Head Cut;|Voodoo Dolly,artist/5221-siouxsie-and-the-banshees/,"June 6, 1981",Polydor;|PVC,Gothic Rock;|Post-Punk;|Neo-Psychedelia;|Deathrock;|Noise Rock;|New Wave;|Psychedelic Rock;|Art Rock,Nigel Gray;|Siouxsie and the Banshees,,artist/5221-siouxsie-and-the-banshees/,https://cdn.albumoftheyear.org/artists/sq/siouxsie-and-the-banshees_1520288549.jpg,
9,Bad Omens,artist/24462-bad-omens/,THE DEATH OF PEACE OF MIND,,CONCRETE JUNGLE;|Nowhere To Go;|Take Me First;|THE DEATH OF PEACE OF MIND;|What It Cost;|Like A Villain;|bad decisions;|Just Pretend;|The Grey;|Who are you?;|Somebody else.;|IDWT$;|What do you want from me?;|ARTIFICIAL SUICIDE;|Miracle,artist/24462-bad-omens/,"February 25, 2022",Sumerian,Alternative Metal;|Metalcore;|Industrial Rock;|Alternative Rock,Noah Sebastian;|Joakim Karlsson;|Erik Ron;|Michael Taylor;|Jesse Cash,,artist/24462-bad-omens/,,


In [29]:
df_hasil.score.nunique()

0

# **Content Based Filtering**

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [30]:
tabel = pd.DataFrame({
    'link_album': df_albums['link_album'],
    'genre': df_albums['genre'],
    'artis': df_albums['artis'],
    'label': df_albums['label'],
    'produser': df_albums['produser'],
    'penulis': df_albums['penulis']
})

tabel = tabel.apply(lambda x: x.str.replace(' ', '_'))
tabel = tabel.apply(lambda x: x.str.replace('!', ''))
tabel = tabel.apply(lambda x: x.str.replace('?', ''))
tabel = tabel.apply(lambda x: x.str.replace('/', ''))
tabel = tabel.apply(lambda x: x.str.replace('-', '_'))
tabel = tabel.apply(lambda x: x.str.replace("'", '_'))
tabel = tabel.apply(lambda x: x.str.replace(';|', ' '))

combined = pd.DataFrame({
    'link_album': tabel['link_album'],
    'corpus': tabel[['genre', 'artis', 'label', 'produser', 'penulis']].apply(lambda x: ' '.join(map(str, x)), axis=1)
})

combined = combined.set_index('link_album')
combined = combined.apply(lambda x: x.str.replace('nan', ''))
combined

Unnamed: 0_level_0,corpus
link_album,Unnamed: 1_level_1
artist86788_yard_act,Dance_Punk Art_Punk Post_Punk_Revival Spoken_Word Alternative_Dance Yard_Act Island_Records Republic Yard_Act Remi_Kabaka Jay_Russell Ryan_Needham Sam_Shipstone James_Smith Christopher_Duffin
artist192679_mile_end,Hardcore_Punk Mile_End
artist448_j_dilla,Instrumental_Hip_Hop Plunderphonics Soul Experimental_Hip_Hop J_Dilla Stones_Throw J_Dilla J_Dilla
artist3111_the_gathering,Art_Rock The_Gathering Century_Media
artist255468_biking_with_francis,Biking_With_Francis
...,...
artist936_freddie_gibbs,Freddie_Gibbs_&_Madlib Madlib Alex_Goose
artist34832_baco_exu_do_blues,Conscious_Hip_Hop Trap Pop_Rap Alternative_R&B Baco_Exu_Do_Blues 999 Baco_Exu_Do_Blues CESRV JLZ Tim_Bernardes
artist1847_big_sean,Pop_Rap Trap Contemporary_R&B Big_Sean G.O.O.D._MUSIC Def_Jam Allen_Ritter Bekon Detail DJ_Dahi DJ_Khalil
artist98351_nick_lyons,Nick_Lyons Ascensionism_Records


In [31]:
corpus=combined.corpus.tolist()
corpus

['Dance_Punk Art_Punk Post_Punk_Revival Spoken_Word Alternative_Dance Yard_Act Island_Records Republic Yard_Act Remi_Kabaka Jay_Russell Ryan_Needham Sam_Shipstone James_Smith Christopher_Duffin',
 'Hardcore_Punk Mile_End   ',
 'Instrumental_Hip_Hop Plunderphonics Soul Experimental_Hip_Hop J_Dilla Stones_Throw J_Dilla J_Dilla',
 'Art_Rock The_Gathering Century_Media  ',
 ' Biking_With_Francis   ',
 'Singer_Songwriter Folk_Pop Aimee_Mann Superego  ',
 'Hip_Hop Ces_Cru Strange_Music  ',
 'No_Wave Noise_Rock Sludge_Metal Industrial Industrial_Metal Swans K.422 Michael_Gira Roli_Mosimann ',
 'Gothic_Rock Post_Punk Neo_Psychedelia Deathrock Noise_Rock New_Wave Psychedelic_Rock Art_Rock Siouxsie_and_the_Banshees Polydor PVC Nigel_Gray Siouxsie_and_the_Banshees ',
 'Alternative_Metal Metalcore Industrial_Rock Alternative_Rock Bad_Omens Sumerian Noah_Sebastian Joakim_Karlsson Erik_Ron Michael_Taylor Jesse_Cash ',
 'Alternative_R&B Synthpop Tory_Lanez One_Umbrella Bizness_Boi boyband Roark_Baile

In [32]:
from sklearn.feature_extraction.text import TfidfVectorizer
def tfidf_similarity(query):
    tfidf_vectorizer = TfidfVectorizer()
    corpus = combined['corpus'].values
    tfidf_vectorizer.fit(corpus)
    query_tfidf = tfidf_vectorizer.transform([query])
    corpus_tfidf = tfidf_vectorizer.transform(corpus)

    similarity_scores = query_tfidf.dot(corpus_tfidf.T)

    similarity_scores_dense = similarity_scores.toarray()

    sorted_indices = np.argsort(similarity_scores_dense)[0][::-1]

    relevant_links = combined.index[sorted_indices].tolist()

    tfidf_scores = similarity_scores_dense[0][sorted_indices]

    result = pd.DataFrame({'link_album': relevant_links, 'tfidf_score': tfidf_scores}).set_index('link_album')
    result = result.apply(lambda x: x.str.replace('_', '-'))
    return result

In [33]:
links = [
    'album/541510-taylor-swift-midnights.php',
    'album/934464-taylor-swift-the-tortured-poets-department-the-anthology.php',
    'album/313572-taylor-swift-evermore.php',
    'album/264058-taylor-swift-folklore.php'
    ]

for i in range(len(links)):
    links[i] = links[i].replace("-", "_")

query = combined.loc[combined.index.isin(links)]
column_values = query['corpus'].astype(str)
combined_string = ' '.join(column_values)
words = combined_string.split()
unique_words = list(set(words))
query = ' '.join(unique_words)
result= tfidf_similarity(query)
result = result.drop(result.index[:len(links)])
result = result.apply(lambda x: x.str.replace('_', '-'))
result.head(10)

AttributeError: Can only use .str accessor with string values!