In [1]:
import pandas as pd
df=pd.read_csv("../data/raw/top50.csv",encoding='iso-8859-1') 

def clean_top50_first3_cols(df: pd.DataFrame) -> pd.DataFrame:
    """
    Clean the first three columns of the top 50 dataset:
    - Delete 'Unnamed: 0'
    - Rename 'Track.Name' and 'Artist.Name'
    - Clean 'Track_Name' and 'Artist_Name'
    - Add a Rank column (from 1 to n)
    """
    df = df.copy()

    # delete Unnamed: 0
    if 'Unnamed: 0' in df.columns:
        df = df.drop(columns=['Unnamed: 0'])

    # rename columns
    df = df.rename(columns={
        'Track.Name': 'Track_Name',
        'Artist.Name': 'Artist_Name'
    })

    # clean string columns
    str_cols = ['Track_Name', 'Artist_Name']
    for col in str_cols:
        if col in df.columns:
            df[col] = df[col].astype(str).str.strip()
            df[col] = df[col].str.replace(r'\s+', ' ', regex=True)

    #clean numeric data
    numeric_mask = df['Track_Name'].str.match(r'^\d+(\.\d+)?$', na=False)
    df.loc[numeric_mask, 'Track_Name'] = "Unknown"

    # drop duplicates based on renamed columns
    df = df.drop_duplicates(subset=['Track_Name', 'Artist_Name'])

    # rebuild Rank (ensure 1…n and place it in the first column)
    df.insert(0, 'Rank', range(1, len(df) + 1))

    return df

In [2]:

df_clean = clean_top50_first3_cols(df)

df_clean.columns

Index(['Rank', 'Track_Name', 'Artist_Name', 'Genre', 'Beats.Per.Minute',
       'Energy', 'Danceability', 'Loudness..dB..', 'Liveness', 'Valence.',
       'Length.', 'Acousticness..', 'Speechiness.', 'Popularity'],
      dtype='object')

In [3]:
df_clean.head(50)

Unnamed: 0,Rank,Track_Name,Artist_Name,Genre,Beats.Per.Minute,Energy,Danceability,Loudness..dB..,Liveness,Valence.,Length.,Acousticness..,Speechiness.,Popularity
0,1,Señorita,Shawn Mendes,canadian pop,117,55,76,-6,8,75,191,4,3,79
1,2,China,Anuel AA,reggaeton flow,105,81,79,-4,8,61,302,8,9,92
2,3,boyfriend (with Social House),Ariana Grande,dance pop,190,80,40,-4,16,70,186,12,46,85
3,4,Beautiful People (feat. Khalid),Ed Sheeran,pop,93,65,64,-8,8,55,198,12,19,86
4,5,Goodbyes (Feat. Young Thug),Post Malone,dfw rap,150,65,58,-4,11,18,175,45,7,94
5,6,I Don't Care (with Justin Bieber),Ed Sheeran,pop,102,68,80,-5,9,84,220,9,4,84
6,7,Ransom,Lil Tecca,trap music,180,64,75,-6,7,23,131,2,29,92
7,8,How Do You Sleep?,Sam Smith,pop,111,68,48,-5,8,35,202,15,9,90
8,9,Old Town Road - Remix,Lil Nas X,country rap,136,62,88,-6,11,64,157,5,10,87
9,10,bad guy,Billie Eilish,electropop,135,43,70,-11,10,56,194,33,38,95
