In [48]:
# Inspired by:
# https://stackoverflow.com/questions/39230595/how-to-get-the-fundamental-frequency-using-harmonic-product-spectrum

from pylab import *
from numpy import *
import math
from ipywidgets import *

HPS_ITERATIONS = 5

def HPS(samplerate, dataVoice):
    T_MAX = 3
    T = len(dataVoice) / samplerate
    samplerate = int(samplerate)

    # Clip the sample to T_MAX seconds if necessary
    if T > T_MAX:
        startSample = len(dataVoice) // 2 - T_MAX * samplerate // 2
        endSample = len(dataVoice) // 2 + T_MAX * samplerate // 2
        dataVoice = dataVoice[startSample:endSample]
        T = T_MAX

    # Analyze one-second-wide windows separately
    windows = [ dataVoice[i*samplerate:(i+1)*samplerate] for i in range(int(T))]
    windowResults = []
    for window in windows:
        if(len(window) == 0):
            continue

        # According to HPS, multiply the freqs by their multiples
        fftOriginal = abs(fft.fft(window)) / samplerate
        fftHarmonic = copy(fftOriginal)
        for i in range(2, HPS_ITERATIONS):
            squeezed = copy(fftOriginal[::i])
            fftHarmonic = fftHarmonic[:len(squeezed)]
            fftHarmonic *= squeezed

        windowResults.append(fftHarmonic)
    
    # Join the results of each window
    result = np.zeros(len(windowResults[0]))
    for windowResult in windowResults:
        if(len(windowResult) != len(result)):
            continue
        result += windowResult
    return result

In [49]:
from playsound import playsound
from scipy.io import wavfile

def readSound(filename):
    samplerate, data = wavfile.read(filename)
    dataShape = np.shape(data)
    if len(dataShape) > 1:
        data = data[:, 0]
    return samplerate, data

In [51]:
maleRange = [85, 155]
femaleRange = [165, 255]
def getGenderByHPS(hpsResult):
    maleSum = sum(hpsResult[maleRange[0]:maleRange[1]])
    femaleSum = sum(hpsResult[femaleRange[0]:femaleRange[1]])

    if(maleSum > femaleSum):
        return 'M'
    else:
        return 'K'

samplerate, data = readSound('sounds/001_K.wav')
hps1 = HPS(samplerate, data)
isMale1 = getGenderByHPS(hps1)
print(isMale1)

samplerate, data = readSound('sounds/002_M.wav')
hps2 = HPS(samplerate, data)
isMale2 = getGenderByHPS(hps2)
print(isMale2)

K
M


  samplerate, data = wavfile.read(filename)


In [53]:
import os

# decisions[expected][actual]
decisions = {
    'K': { 'K': 0, 'M': 0 },
    'M': { 'K': 0, 'M': 0 },
}
matches = 0

sounds = os.listdir('sounds')
for soundFile in sounds:
    expected = soundFile[4]

    samplerate, samples = readSound(f'sounds/{soundFile}')
    hps = HPS(samplerate, samples)
    gender = getGenderByHPS(hps)
    
    decisions[expected][gender] += 1

    if expected == gender:
        matches += 1

print(f'Matches: {matches}/{len(sounds)} = {int(matches / len(sounds) * 100)}%')

print('\tOczekiwane')
print('\tK\tM')
print(f'K\t{decisions["K"]["K"]}\t{decisions["M"]["K"]}')
print(f'M\t{decisions["K"]["M"]}\t{decisions["M"]["M"]}')

  samplerate, data = wavfile.read(filename)


Matches: 83/91 = 91%
	Oczekiwane
	K	M
K	44	6
M	2	39


  samplerate, data = wavfile.read(filename)
