# Install Libraries (Colab Only)

In [1]:
import pandas as pd

In [3]:
!pip install openai

In [3]:
!pip install tiktoken

Defaulting to user installation because normal site-packages is not writeable


# Define Parameters

### GPT version used for generation

In [2]:
gpt_version = "gpt-4-0125-preview" # change to this for use GPT3 "gpt-3.5-turbo-0125"

### Pricing

In [3]:
pricings = {"gpt-4-0125-preview":(10,30),
            "gpt-3.5-turbo-0125": (0.50, 1.50)}

### Max Tokens

In [4]:
max_tokens = 4096

### Moral Values and their Descriptors

In [6]:
moral_values = {"Care": "underlies the virtues of kindness and nurturance, empathy and compassion towards other people",
                "Harm": "which represents the opposite of care, characterised by violent actions and intentions",
                "Fairness": " which underlies the virtues of honesty, fair treatment, justice, and dependability.",
                "Cheating": "which represents the opposite of fairness, encapsulating instances of unfairness and injustice.",
                "Loyalty": "which deals with group loyalty, self-sacrifice, and vigilance against betrayal",
                "Betrayal":  "which underlies wrongdoing and betraying your group or your relationship.",
                "Authority": "which underlies virtues of leadership and respect within hierarchical relationships",
                "Subversion": "which refers to acts of defiance against authority or hierarchy, and  rebellion against control",
                "Purity": " which is concerned with the sanctity of the body and spirit, promoting virtues like chastity and self-restraint",
                "Degradation": "that denotes the violation of purity and sanctity, including both physical and emotional corruption"}

In [7]:
all_moral_values = []
for values in moral_values:
  all_moral_values.append(values)
  possible_moral_values = all_moral_values.copy()
all_moral_values = [[value] for value in all_moral_values]

In [8]:
possible_moral_values

['Care',
 'Harm',
 'Fairness',
 'Cheating',
 'Loyalty',
 'Betrayal',
 'Authority',
 'Subversion',
 'Purity',
 'Degradation']

In [9]:
all_moral_values

[['Care'],
 ['Harm'],
 ['Fairness'],
 ['Cheating'],
 ['Loyalty'],
 ['Betrayal'],
 ['Authority'],
 ['Subversion'],
 ['Purity'],
 ['Degradation']]

Here below you can also add combination of moral values if you want  to include them and generate songs with more than one moral value

In [12]:
# The Moral Combinattions encountered in human annotated lyrics:
add_moral_combinations = [
# Negative combinations of 2 Moral Foundations:
("Harm", "Degradation"),
("Harm", "Subversion"),
("Harm", "Betrayal"),
("Betrayal", "Subversion"),
("Betrayal", "Degradation"),
("Subversion", "Degradation"),
("Subversion", "Cheating"),
("Cheating", "Harm"),
("Cheating", "Degradation"),
("Cheating", "Betrayal"),

# Negative combination of 3 Moral Foundations:
("Harm", "Betrayal", "Degradation"),
("Harm", "Cheating", "Betrayal"),
("Harm", "Subversion", "Degradation"),
("Cheating", "Betrayal", "Degradation"),

# Mixed combinations of 2 Moral Foundations:
("Harm", "Authority"),
("Harm", "Loyalty"),
("Loyalty", "Subversion"),
("Care", "Cheating"),
("Care", "Subversion"),
("Fairness", "Subversion"),
("Fairness", "Harm"),
("Betrayal", "Purity"),
("Authority", "Degradation"),

# Mixed combinations of 3 Moral Foundations:
("Harm", "Fairness", "Subversion"),
("Harm", "Fairness", "Authority"),
("Harm", "Loyalty", "Authority"),
("Harm", "Betrayal", "Purity"),
("Harm", "Betrayal", "Authority"),
("Harm", "Loyalty", "Degradation"),

# Positive combinations of 2 Moral Foundations:
("Care", "Fairness"),
("Care", "Purity"),
("Care", "Loyalty"),
("Care", "Authority"),
("Fairness", "Authority"),
("Fairness", "Purity"),
("Fairness", "Loyalty"),
("Loyalty", "Purity"),
("Loyalty", "Authority"),
("Authority", "Purity"),

# Positive combinations of 3 Moral Foundations:
("Care", "Fairness", "Loyalty"),
("Care", "Loyalty", "Purity"),
("Care", "Fairness", "Purity"),
("Care", "Fairness", "Authority"),
("Loyalty", "Authority", "Purity"),
("Loyalty", "Fairness", "Authority"),
("Loyalty", "Fairness", "Purity")]
all_moral_values.extend(add_moral_combinations)
# all_moral_values = add_moral_combinations

In [10]:
len(all_moral_values)

10

In [11]:
all_moral_values

[['Care'],
 ['Harm'],
 ['Fairness'],
 ['Cheating'],
 ['Loyalty'],
 ['Betrayal'],
 ['Authority'],
 ['Subversion'],
 ['Purity'],
 ['Degradation']]

In [12]:
possible_moral_values

['Care',
 'Harm',
 'Fairness',
 'Cheating',
 'Loyalty',
 'Betrayal',
 'Authority',
 'Subversion',
 'Purity',
 'Degradation']

### OpenAI API Key
Here, insert the API key for OpenAI. You need to create an account and add credit to it in order to use the code below

In [13]:
import openai

openai.api_key = "" # enter the api key here

### Number of Examples to generate

In [16]:
num_samples = 2000 # change this to automatise and produce all songs at once

In [17]:
len(all_moral_values)*6

60

### Define How many songs by Genre to Include and whether to use the Genre variable

In [18]:
use_genre = True

In [19]:
# percentage
genres = {"Rock": 0.15,
          "Pop": 0.15,
          "Country": 0.15,
          "Hip Hop": 0.15,
          "R&B": 0.10,
          "Soul":0.10,
          "Folk": 0.10,
          "Blues": 0.05,
          "Jazz":0.05
           }

In [20]:
sum(genres.values())

1.0

### Download Artists
It was noted that the songs generated are quite generic and similar to each others. In order to diversify the songs, we then want to give artists from a specific music genre as a template to follow rather than the generic genre. To do so, we download the MusicOSet dataset including over 11.000 bands and solo artists and we filter artists from the specified music genres

In [21]:
!wget https://marianaossilva.github.io/DSW2019/assets/data/musicoset_metadata.zip

--2024-04-08 12:02:19--  https://marianaossilva.github.io/DSW2019/assets/data/musicoset_metadata.zip
Resolving marianaossilva.github.io (marianaossilva.github.io)... 2606:50c0:8002::153, 2606:50c0:8001::153, 2606:50c0:8000::153, ...
Connecting to marianaossilva.github.io (marianaossilva.github.io)|2606:50c0:8002::153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6017643 (5.7M) [application/zip]
Saving to: ‘musicoset_metadata.zip.1’


2024-04-08 12:02:19 (224 MB/s) - ‘musicoset_metadata.zip.1’ saved [6017643/6017643]



In [22]:
!unzip musicoset_metadata.zip

Archive:  musicoset_metadata.zip
replace musicoset_metadata/albums.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [32]:
artists = pd.read_table("musicoset_metadata/artists.csv")

In [33]:
artists

Unnamed: 0,artist_id,name,followers,popularity,artist_type,main_genre,genres,image_url
0,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,34554242.0,96,singer,dance pop,"['dance pop', 'pop', 'post-teen pop']",https://i.scdn.co/image/b1dfbe843b0b9f54ab2e58...
1,26VFTg2z8YR0cCuwLzESi2,Halsey,7368242.0,90,singer,dance pop,"['dance pop', 'electropop', 'etherpop', 'indie...",https://i.scdn.co/image/22a5f3d8c42bc7cb55215e...
2,0Y5tJX1MQlPlqiwlOH1tJY,Travis Scott,6313709.0,94,rapper,pop,"['pop', 'pop rap', 'rap']",https://i.scdn.co/image/dc5eba5e032c2e5bc4d42c...
3,246dkjvS1zLTtiykXe5h60,Post Malone,16737002.0,96,rapper,dfw rap,"['dfw rap', 'pop', 'rap']",https://i.scdn.co/image/f9d8b742b66609f12da023...
4,1zNqQNIdeOUZHb8zbZRFMX,Swae Lee,483032.0,89,singer,trap music,['trap music'],https://i.scdn.co/image/a177469870b41f7e17e3b5...
...,...,...,...,...,...,...,...,...
11513,7vyRisgvM6Wm0Pnp0qXx6m,Sweeney Todd,634.0,19,-,classic canadian rock,['classic canadian rock'],https://i.scdn.co/image/47166b7afefd590ce85086...
11514,2Uh4UmiQhrrElbrvJVH0dT,Brooklyn Dreams,318.0,32,band,-,[],https://i.scdn.co/image/bae9c82929c50d2dadb1ec...
11515,1VGFS4UGLOAxlMGqzcqHG1,PMD,1405.0,32,-,-,[],https://i.scdn.co/image/926172541a4dff5dca6bce...
11516,3Se8xpgCBmfXVnZqRSRRH9,The Tribute Co.,274.0,21,-,-,[],https://i.scdn.co/image/d4e9b2299fc5cbe4332251...


In [35]:
artists[artists.main_genre.str.contains('jazz')].sort_values(by = "popularity", ascending = False)

Unnamed: 0,artist_id,name,followers,popularity,artist_type,main_genre,genres,image_url
3694,7G1GBhoKtEPnP86X2PvEYO,Nina Simone,1425463.0,72,singer,jazz blues,"['jazz blues', 'soul', 'soul jazz', 'vocal jazz']",https://i.scdn.co/image/32639038b81bf78f356e1a...
4724,6J7biCazzYhU3gM9j1wfid,Jamiroquai,1333282.0,71,band,acid jazz,"['acid jazz', 'dance pop']",https://i.scdn.co/image/ff80e8160d1c0025dc12c2...
8657,11kBu957KTYoAltZHDm8gW,Buena Vista Social Club,653009.0,68,-,latin jazz,"['latin jazz', 'world']",https://i.scdn.co/image/5d20d6f8d14f216a770d1a...
1807,6I3M904Y9IwgDjrQ9pANiB,Kenny G,513445.0,67,-,smooth jazz,"['smooth jazz', 'smooth saxophone']",https://i.scdn.co/image/0b4efeba9b97bf02028f5d...
6779,25KNo5GDS6ZpLkjasaecA3,Thievery Corporation,463095.0,65,duo,acid jazz,"['acid jazz', 'downtempo', 'electronic', 'live...",https://i.scdn.co/image/7e1f7ad6e0cd4aad43e71a...
...,...,...,...,...,...,...,...,...
6099,1YENA5yMZMS2ta6wzLKWPg,Johnny Lytle,889.0,17,-,jazz vibraphone,"['jazz vibraphone', 'soul jazz']",https://i.scdn.co/image/35683c943ba495f5fb5ea5...
10227,6e9Ex9Qjrbzae5ivbHzaG9,Passport,2718.0,14,-,jazz fusion,['jazz fusion'],https://i.scdn.co/image/565d460f854b055fb75dd4...
10738,3y58dknRdBENZ8oVErWu9B,Oliver Lake,902.0,12,-,avant-garde jazz,"['avant-garde jazz', 'free improvisation', 'ja...",https://i.scdn.co/image/99cc519b66f2e9948e5b4e...
10921,2hTvGUmcaZ95Pjs1avHHIS,Diane Marino,388.0,8,-,deep vocal jazz,['deep vocal jazz'],https://i.scdn.co/image/6df73d951818187a270f69...


In [36]:
artists.main_genre.value_counts()[:30]

main_genre
-                       3148
album rock               305
dance pop                280
alternative metal        275
adult standards          207
contemporary country     206
classic soul             144
alternative rock         135
alternative hip hop      111
brill building pop       110
disco                     94
alternative dance         93
atl hip hop               92
bubblegum pop             89
alternative country       88
dance rock                81
art rock                  81
acoustic pop              79
funk                      78
anthem worship            73
karaoke                   70
classic uk pop            69
blues-rock                66
gospel                    64
country                   62
latin                     60
ccm                       55
east coast hip hop        53
australian pop            52
glam metal                51
Name: count, dtype: int64

# Utility Functions

#### compute number of tokens and pricing (pricing is per 1000 token)

In [41]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

print(num_tokens_from_string("Hello world, let's test tiktoken.", gpt_version))

9


In [42]:
def compute_max_price(prompt: str, gpt_version: str) -> int:
    """
    Compute maximum price for API call with given prompt
    The maximum price is computed by assuming the model will output
    the maximum number of tokens that it can output (i.e. 4096).
    Return price in dollars
    """
    input_tokens = num_tokens_from_string(prompt, gpt_version)
    price_in = input_tokens/1000000*pricings[gpt_version][0]
    price_out = max_tokens/1000000*pricings[gpt_version][1]

    return price_in+price_out

print(compute_max_price("Hello world, let's test tiktoken.", gpt_version))

0.12297


In [43]:
def compute_actual_price(completion, gpt_version: str) -> int:
    """
    Compute maximum price for API call with given prompt
    The maximum price is computed by assuming the model will output
    the maximum number of tokens that it can output (i.e. 4096).
    Return price in dollars
    """
    price_in = completion.usage.prompt_tokens/1000000*pricings[gpt_version][0]
    price_out = completion.usage.completion_tokens/1000000*pricings[gpt_version][1]

    return price_in+price_out

#### Accessing Moral Values Descriptors

In [44]:
def get_moral_value(moral_value):
  for key, description in moral_values.items():
    if moral_value in key:
      return description

### Vectorise Moral Values
Create vector representation of moral values to store results

In [45]:
def vectorise_moral_values(moral_values, possible_moral_values):
  moral_values_vector = {value:0 for value in possible_moral_values}
  for value in moral_values:
    moral_values_vector[value] = 1
  return moral_values_vector

In [46]:
vectorise_moral_values(["Care", "Fairness"], possible_moral_values)

{'Care': 1,
 'Harm': 0,
 'Fairness': 1,
 'Cheating': 0,
 'Loyalty': 0,
 'Betrayal': 0,
 'Authority': 0,
 'Subversion': 0,
 'Purity': 0,
 'Degradation': 0}

### Prompt Creation and Response Formatting

In [47]:
import re

def prompt_creation(moral_values,
                    gpt_version,
                    genre=None,
                    option=2,
                    exclude_indeces=None):
  direct_instruction = "write original lyrics of a song expressing these moral foundations. DO NOT directly mention these moral foundations. DO NOT explicitly talk about morality."

  name = None

  if genre is not None and option<3:
    direct_instruction += f" Write it as a {genre} song."
  if not option:
    # option 0 does not include any music genre nor role assignment or moral value explanation
    value = moral_values[0]
    instruction = f"Given the Moral dimension of {value}"
    if len(moral_values)>1:
      for value in moral_values[1:]:
          instruction = " ".join([instruction, f"and the Moral dimension of {value}"])
    prompt = ", ".join([instruction, direct_instruction])
    prompt = [{"role": "user", "content": prompt}]
  elif option==1:
    # option 1 does include role assignment but no moral value explanation
    value = moral_values[0]
    instruction = f"Given the Moral dimension of {value}"
    if len(moral_values)>1:
      for value in moral_values[1:]:
          instruction = " ".join([instruction, f"and the Moral dimension of {value}"])
    prompt = ", ".join([instruction, direct_instruction])
    prompt = [{"role": "system", "content": "You are an assistant to a songwriter, and you need to assist in writing lyrics related to the Moral foundations described in the Moral Foundation Theory."},
             {"role": "user", "content": prompt}]
  elif option==2:
    # option 2 does include role assignment and moral value explanation
    value = moral_values[0]
    description = get_moral_value(value)

    instruction = f"Given the Moral dimension of {value}"
    if len(moral_values)>1:
      for value in moral_values[1:]:
          description = get_moral_value(value)

          instruction = " ".join([instruction, f"and the Moral dimension of {value}, defined as the foundation {description},"])
    prompt = " ".join([instruction, direct_instruction])
    prompt = [{"role": "system", "content": "You are an assistant to a songwriter, and you need to assist in writing lyrics related to the Moral foundations described in the Moral Foundation Theory."},
             {"role": "user", "content": prompt}]
  elif option==3:
    # option 3 does include role assignment and moral value explanation, plus artists instead of genres
    value = moral_values[0]
    description = get_moral_value(value)

    n_attempts = 0
    idx = -1
    while idx in exclude_indeces:

        artist_filt = artists[artists.main_genre.str.contains(genre.lower())]
        if n_attempts==len(artist_filt):
            break

        artist = artist_filt.sample(1)
        idx = artist.index.values[0]

        name = artist["name"].values[0]
        n_attempts+=1

    exclude_indeces.add(idx)
    direct_instruction += f" Write it in the style of {name}."

    instruction = f"Given the Moral dimensions of {value}"
    if len(moral_values)>1:
      for value in moral_values[1:]:
          description = get_moral_value(value)

          instruction = " ".join([instruction, f"and the Moral dimension of {value}, defined as the foundation {description},"])
    prompt = " ".join([instruction, direct_instruction])
    prompt = [{"role": "system", "content": "You are an assistant to a songwriter, and you need to assist in writing lyrics related to the Moral foundations described in the Moral Foundation Theory."},
             {"role": "user", "content": prompt}]

  max_price = compute_max_price(" ".join([msg["content"] for msg in prompt]), gpt_version)

  if exclude_indeces is not None:
    return prompt, max_price, exclude_indeces, name
  return prompt, max_price, name

def clean_completion(completion):
   return re.sub("\n\(.*?\)\n", "", completion)

def get_completion(moral_values,
                   model,
                   genre=None,
                   temperature=1,
                   prompt_option=2,
                   exclude_indeces=None):

  if exclude_indeces is not None and prompt_option==3:
      prompt, max_price, exclude_indeces, name = prompt_creation(moral_values, gpt_version=model, genre=genre,option=prompt_option, exclude_indeces=exclude_indeces)
  else:
      prompt, max_price, name =prompt_creation(moral_values, gpt_version=model, genre=genre,option=prompt_option)

  response = openai.chat.completions.create(

  model=model,

  messages=prompt,

  temperature=temperature,

  )

  actual_price = compute_actual_price(response, model)

  lyric = clean_completion(response.choices[0].message.content)

  return lyric, actual_price, max_price, exclude_indeces, name

# Main Process

In [48]:
prompt_option = 3 # use both role assignment and moral value description, define artisti to mimic rather than genre more generally
temperature = 1 # increase for more creative outputs, decrease for more deterministic ones

In [49]:
len(all_moral_values)*7*3

210

In [50]:
generated_samples = 0
running_price = 0
results = {"lyrics":[], "genre": [], "artist": []} # Can we add artist information?
exclude_indeces = set([-1])
for moral_value in possible_moral_values:
  results[moral_value] = []
if use_genre:
  if num_samples<len(genres)*len(all_moral_values):
      print("Not enough sample to generate to include all music genres!")
  for values in all_moral_values:
      for genre, percentage in genres.items():
          for _ in range(round(num_samples*percentage/len(all_moral_values))):
              if generated_samples>num_samples:
                  break
              elif not generated_samples%10:
                  print(f"Generated {generated_samples} song lyrics. Running price: {running_price}")
              lyrics, price, _, exclude_indeces, name = get_completion(values,
                                                                 gpt_version,
                                                                 genre,
                                                                 temperature=temperature,
                                                                 prompt_option=prompt_option,
                                                                 exclude_indeces=exclude_indeces)
              running_price += price
              moral_vector = vectorise_moral_values(values, possible_moral_values)
              for value, is_there in moral_vector.items():
                results[value].append(is_there)
              results["lyrics"].append(lyrics)
              results["genre"].append(genre)
              results["artist"].append(name)
              df_results = pd.DataFrame(results)
              df_results.to_csv(f"automatic_lyrics_{gpt_version}_with_prompt_option_{prompt_option}_multiple_mft_per_song.csv", index = None)  
              generated_samples += 1
else:
  while generated_samples<num_samples:
      for values in all_moral_values:
          for _ in range(round(num_samples/len(all_moral_values))):
              if generated_samples>num_samples:
                    break
              elif not generated_samples%10:
                  print(f"Generated {generated_samples} song lyrics. Running price: {running_price}")
              lyrics, price, _, exclude_indeces, name = get_completion(values,
                                                                 gpt_version,
                                                                 temperature=temperature,
                                                                 prompt_option=prompt_option,
                                                                 exclude_indeces=exclude_indeces)

              moral_vector = vectorise_moral_values(values, possible_moral_values)
              for value, is_there in moral_vector.items():
                  results[value].append(is_there)
              results["lyrics"].append(lyrics)
              results["genre"].append(None)
              results["artist"].append(None)
              df_results = pd.DataFrame(results)
              df_results.to_csv(f"automatic_lyrics_{gpt_version}_with_prompt_option_{prompt_option}_multiple_mft_per_song.csv")
              running_price += price
              generated_samples += 1

Generated 0 song lyrics. Running price: 0
Generated 10 song lyrics. Running price: 0.12376000000000001
Generated 20 song lyrics. Running price: 0.25447000000000003
Generated 30 song lyrics. Running price: 0.39084000000000013
Generated 40 song lyrics. Running price: 0.5226800000000001
Generated 50 song lyrics. Running price: 0.6357300000000002
Generated 60 song lyrics. Running price: 0.7591300000000002
Generated 70 song lyrics. Running price: 0.8822300000000002
Generated 80 song lyrics. Running price: 1.0150800000000002
Generated 90 song lyrics. Running price: 1.1398600000000003
Generated 100 song lyrics. Running price: 1.27204
Generated 110 song lyrics. Running price: 1.3932600000000002
Generated 120 song lyrics. Running price: 1.5143000000000004
Generated 130 song lyrics. Running price: 1.6408800000000006
Generated 140 song lyrics. Running price: 1.7658800000000008
Generated 150 song lyrics. Running price: 1.9007200000000009
Generated 160 song lyrics. Running price: 2.0240200000000006