In [2]:
import csv
import os
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import torch

In [50]:
import librosa

def extract_features(songFile, chunckSize = 30):
    '''
    songFile should be a wav file
    
    chunkSize is how many seconds of audio
    
    Returns tuple of features of a songFlile:
    tempo, beat_frames, chroma_stft, rmse, spec_cent, spec_bw, rolloff, zcr, mfcc
    '''
    
    y, sr = librosa.load(songFile)
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)

    beats = beat_frames.shape[0]

    # Use the average value
    chroma_stft = np.mean(chroma_stft)
    rmse = np.mean(rmse)
    spec_cent = np.mean(spec_cent)
    spec_bw = np.mean(spec_bw)
    rolloff = np.mean(rolloff)
    zcr = np.mean(zcr)
    mfcc = np.mean(mfcc)

    return tempo, beats, chroma_stft, rmse, spec_cent, spec_bw, rolloff, zcr, mfcc

In [104]:
def create_csv(src, output, songNumber = None):
    '''
    output should be the name of the csv file you want to create
    
    songNumber is number of songs per genre. If left blank will do all it can find
    '''
    
    genreNames = [f.name for f in os.scandir(src) if f.is_dir()]

    features = ['Song Name', 'Genre', 'Tempo', 'Beats', 'Chroma_stft', 'RSME', 'Spec_Cent', 'Spec_BW', 'Rolloff', 'ZCR', 'MFCC']

    with open(output, mode='w') as featureCSV:
        featureWriter = csv.writer(featureCSV, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        featureWriter.writerow(features)
        
        songCount = 0
        
        for genre in genreNames:
            genreFolder = Path(src, genre)
            songs = [f.name for f in os.scandir(genreFolder) if (f.is_file() and str(f.name) != '.DS_Store')]
            
            if songNumber == None:
                for song in songs:
                    songFile = Path(genreFolder, song)
                    songName = song[4:-4]
                    
                    technical_features = extract_features(songFile)
                    row = [songName, genre]
                    row.extend(technical_features)
                    featureWriter.writerow(row)
                    
                    if songCount%10==0:
                        print(f'On Song {songCount +1}')
                    songCount +=1
            
            else:
                i=0
                for song in songs:
                    if i < songNumber:
                        songFile = Path(genreFolder, song)
                        songName = song[4:-4]
                        technical_features = extract_features(songFile)
                        row = [songName, genre]
                        row.extend(technical_features)
                        featureWriter.writerow(row)
                        i+=1
                    else:
                        break
            


In [73]:
filePath = Path(os.getcwd(), 'SongFeatures.csv')
print(filePath)

/Users/ibrahimbutt/Hamza/aps360/Project/SongFeatures.csv


In [105]:
src = Path(os.getcwd(), 'Trimmed Audio')
create_csv(src, filePath)

On Song 1
On Song 11
On Song 21
On Song 31
On Song 41
On Song 51
On Song 61
On Song 71
On Song 81
On Song 91
On Song 101
On Song 111
On Song 121
On Song 131
On Song 141
On Song 151
On Song 161
On Song 171
On Song 181
On Song 191
On Song 201
On Song 211
On Song 221
On Song 231
On Song 241
On Song 251
On Song 261
On Song 271
On Song 281
On Song 291
On Song 301
On Song 311
On Song 321
On Song 331
On Song 341
On Song 351
On Song 361
On Song 371
On Song 381
On Song 391
On Song 401
On Song 411
On Song 421
On Song 431
On Song 441
On Song 451
On Song 461
On Song 471
On Song 481
On Song 491
On Song 501
On Song 511
On Song 521
On Song 531
On Song 541
On Song 551
On Song 561
On Song 571
On Song 581
On Song 591
On Song 601
On Song 611
On Song 621
On Song 631
On Song 641
On Song 651
On Song 661
On Song 671
On Song 681


In [51]:
features = extract_features(Path(src, "alternative", 'Adult Hits.wav'))

In [90]:
import pydub
from pydub.utils import make_chunks


def splitAudioFiles(src, seconds):
    
    trimmedDir = Path(src, 'Trimmed Audio')
    
    try:
        os.rmdir(trimmedDir)
    except OSError as error:  
        pass 
    
    genreNames = [f.name for f in os.scandir(Path(src, 'wav files')) if f.is_dir()]
    
    
    try:  
        os.mkdir(trimmedDir)  
    except OSError as error:  
        pass 
    
    
    for genreName in genreNames:
            
        trimmedGenre = Path(trimmedDir, genreName)
        
        try:  
            os.mkdir(trimmedGenre)  
        except OSError as error:  
            pass 
        
        wavGenrePath = Path(src, 'wav files', genreName)
        

        # :-4 is to remove .wav from the name
        songnames = [f.name[:-4] for f in os.scandir(wavGenrePath) if (f.is_file() and str(f.name) != '.DS_Store')]

        for songname in songnames:
            wavSongPath = Path(wavGenrePath, songname+'.wav')
            
            sound = pydub.AudioSegment.from_file(wavSongPath, 'wav')
            
            chunk_length_ms = seconds*1000 # pydub calculates in millisec
            chunks = make_chunks(sound, chunk_length_ms) #Make chunks of one sec
            
            check = False
            for i, chunk in enumerate(chunks):
                if check:
                    break
                
                chunk.export(Path(trimmedGenre,str(seconds)+'s_'+songname+'.wav'), format='wav')
                
                check=True

    

In [91]:
splitAudioFiles(os.getcwd(),30)

In [79]:
print(os.getcwd())

/Users/ibrahimbutt/Hamza/aps360/Project
