Query:

```{sql}
select 
  sounds 
from `relax-melodies-android.test_cumulative_events_table.listening_events_staging`
where 
  sounds is not null 
```

### Reading the data

In [3]:
import pandas as pd

df = pd.read_csv("/Users/emulie/Downloads/bquxjob_294361a5_197b912ea51.csv")

In [4]:
df.head()

Unnamed: 0,sounds
0,"[""ambience.brownnoise"", ""music.dreamydelight"",..."
1,"[""music.emotionalrelease"", ""ambience.rain""]"
2,"[""music.nessiedream""]"
3,"[""ambience.vacuum""]"
4,"[""music.underwatercity_music"", ""ambience.etern..."


### Data Transformation

Sources:
- tracks: => [list]
- singles: all different content that can be listened to => list
    - soundscape_singles
    - music_singles
    - ambience_singles
    - solfeggio_singles
    - asmr_singles
    - binaural_singles
    - isochronics_singles
- prefixes:


In [309]:
import ast 

tracks = []
skipped = 0
for i, item in enumerate(df['sounds']):
    try: 
        track = ast.literal_eval(item)
        tracks.append(track)
    except:
        # print(f"{item}") --- removing rows where 
        skipped += 1

print(f"Skipped {skipped / len(df['sounds']) * 100.0}\%")

Skipped 0.021003082511227383\%


In [310]:
for track in tracks:
    track.sort()

In [311]:
tracks[:5]

[['ambience.brownnoise',
  'ambience.rainonroof',
  'isochronic.isochronic4hz',
  'music.dreamydelight'],
 ['ambience.rain', 'music.emotionalrelease'],
 ['music.nessiedream'],
 ['ambience.vacuum'],
 ['ambience.eternity', 'music.underwatercity_music']]

In [312]:
singles = list(set([item for track in tracks for item in track]))
singles.sort()

In [313]:
prefixes = list(set([item.split('.')[0] for item in singles if len(item.split('.')) > 1] ))

In [314]:
prefixes

['isochronics',
 'music',
 'asmr',
 'isochronic',
 'soundscape',
 'ambience',
 'solfeggio',
 'moves',
 'binaural']

In [315]:
soundscape_singles = set([item for item in singles if 'soundscape' in item])
music_singles = set([item for item in singles if 'music.' in item])
asmr_singles = set([item for item in singles if 'asmr' in item])
ambience_singles = set([item for item in singles if 'ambience' in item])
solfeggio_singles = set([item for item in singles if 'solfeggio' in item])
moves_singles = set([item for item in singles if 'moves' in item])
binaural_singles = set([item for item in singles if 'binaural' in item])
isochronics_singles = set([item for item in singles if 'isochronics' in item or 'isochronic' in item])

### Use case 1 - Sounds that pairs well together

Steps:
- In each rows, filter only for "soundscape" and "ambience"
- Dict Count for each mixes
- For each sound, rank mixes
- make co-occurence matrix

In [316]:
# sounds_pairings = [[item for item in track if item in soundscape_singles or item in ambience_singles] for track in tracks]
sounds_pairings = [[item for item in track if item in ambience_singles] for track in tracks]
sounds_pairings = [track for track in sounds_pairings if track]
for track in sounds_pairings:
    track.sort()

In [257]:
from collections import defaultdict

sound_pairings_count = defaultdict(int)
for track in sounds_pairings:
    sound_pairings_count[f'{track}'] += 1

In [258]:
sound_pairings_count = sorted(sound_pairings_count.items(), key=lambda kv: kv[1], reverse=True)
sound_pairings_count = [(eval(track), count) for track, count in sound_pairings_count]
sound_pairings_count = [(track, count) for track, count in sound_pairings_count if len(track) > 1]

In [259]:
sound_pairings_count[:5]

[(['ambience.birds', 'ambience.eternity', 'ambience.ocean'], 5452),
 (['ambience.ocean', 'ambience.rain'], 2544),
 (['ambience.eternity', 'ambience.windintrees'], 2279),
 (['ambience.ocean', 'ambience.rain', 'ambience.windintrees'], 1937),
 (['ambience.eternity', 'ambience.rain'], 1602)]

##### Making Co-occurence Matrix

In [260]:
# sounds_singles = soundscape_singles | ambience_singles
sounds_singles = ambience_singles
idx_to_sounds = {i: single for i, single in enumerate(sounds_singles)}
sounds_to_idx = {single: i for i, single in enumerate(sounds_singles)}
N = len(sounds_singles)

In [261]:
import numpy as np

sounds_occurences = np.zeros((N, N))
for track, count in sound_pairings_count:
    num_tracks = len(track)    
    for i in range(num_tracks):
        for j in range(i, num_tracks):
            idx_i = sounds_to_idx[track[i]]
            idx_j = sounds_to_idx[track[j]]
            if i != j and idx_i and idx_j:
                sounds_occurences[idx_i, idx_j] += count
                sounds_occurences[idx_j, idx_i] += count

In [262]:
sounds_perc = sounds_occurences / sounds_occurences.sum(axis=1, keepdims=True)
sounds_perc = np.nan_to_num(sounds_perc, 0.0)

  sounds_perc = sounds_occurences / sounds_occurences.sum(axis=1, keepdims=True)


##### Rank Sound Suggestions for each sound => For each sounds, suggest sound to add (complete)

In [263]:
sounds_ranking = defaultdict(list)
for i, percentages in enumerate(sounds_perc):
    sorted_indices = np.argsort(percentages).tolist()[::-1]
    rank = [(k, idx_to_sounds[k], percentages[k].item()) for k in sorted_indices if percentages[k]]
    sounds_ranking[i] = rank

In [264]:
# sounds_ranking

##### Rank Mixes for each sound => for each sound, suggest mixes (complete)

In [281]:
mixes_occurences = defaultdict(list)
for track, count in sound_pairings_count:
    # print(track, len(track))
    num_tracks = len(track)
    for i in range(num_tracks):
        # print(track[i])
        idx_single = sounds_to_idx[track[i]]
        mix_complement = track[:i] + track[i+1:]
        if mix_complement:
            mixes_occurences[idx_single].append((mix_complement, count))

In [293]:
for i in mixes_occurences:
    sorted(mixes_occurences[i], key=lambda kv: kv[1], reverse=True)

In [337]:
# mixes_occurences

##### Rank Mixes for each Mixes => for each mixes, suggest sounds to add (incomplete)

### Use case 2 - Mixes that pairs well with music

Steps:
- Get tracks with music in them + sort music such that it's the last element of track
- 

In [360]:
music_tracks = [track for track in tracks for item in track if item in music_singles]
music_tracks = [track for track in music_tracks if len(track) > 1]

In [361]:
idx_to_music = {i: single for i, single in enumerate(music_singles)}
music_to_idx = {single: i for i, single in enumerate(music_singles)}

In [362]:
music_pairings_count = defaultdict(int)
for track in music_tracks:
    music_pairings_count[f'{track}'] += 1
music_pairings_count = [(eval(track), count) for track, count in music_pairings_count.items()]

In [363]:
# music_pairings_count

##### Rank Mixes for each music => for each music, suggest mixes to add (complete)

In [364]:
music_mixes_occurences = defaultdict(list)
for track, count in music_pairings_count:
    num_tracks = len(track)
    for i in range(num_tracks):
        single = track[i]
        if single in music_singles:
            idx_music = music_to_idx[single]
            mix = track[:i] + track[i+1:]
            music_mixes_occurences[idx_music].append((mix, count))

In [367]:
for i in music_mixes_occurences:
    music_mixes_occurences[i] = sorted(music_mixes_occurences[i], key=lambda kv: kv[1], reverse=True)

In [369]:
# music_mixes_occurences