In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/housex-text-processing/housex-processing/selected_artists.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/melodic_house/1.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/melodic_house/2.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/future_house/3.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/future_house/1.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/future_house/2.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/future_house/4.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/progressive_house/3.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/progressive_house/1.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/progressive_house/2.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/progressive_house/4.txt
/kaggle/input/housex-text-processing/housex-processing/corpus/bass_house/3.txt
/kaggle/input

## Load descriptors for a file

In [2]:
def get_file_descriptors(file_path):
    try:
        with open(file_path, "r") as f:
            lines = f.readlines()
            lines = lines[1:] # Skip the URL
            lines = [x for x in lines if len(x) > 0] # Skip empty lines
            merged_string = ' '.join(lines)
            return merged_string
    except Exception as e:
        print(f"Error: {e}")
        return None

In [3]:
get_file_descriptors("/kaggle/input/housex-text-processing/housex-processing/corpus/melodic_house/1.txt")

'The recently released EP from Avicii titled “Avici” has not only got some amazing responses but some great remixes too. One of such fine remixes is from the Norwegian producer Jack Taylor. Giving an emotional progressive twist to the original song, the remix stands out and delivers some great vibes. The young artist followed the Nort European trend of making beautiful and emotional progressive, and once again I’m proud to announce a new talent to check out!\n The remix starts out with a energetic intro, including the lovely guitar snippet from the original track. Smoothly guiding into the breakdown, the original track comes up. Just as the break from the original track changes to the drop, an charming guitar melody comes up…. It really feels like a piece of the previous version and blends in perfectly. An example of good transition!\n The drop feels epic with the same melody, now backed with some more saw synths and the same guitar melody. I really have to admire Jack’s idea of extend

## Store the descriptors for each sub-genre

In [5]:
from glob import glob
all_files = glob('/kaggle/input/housex-text-processing/housex-processing/corpus' + '/**/*.txt', recursive=True)

GENRE_DICT = {}

for current_file in all_files:
    genre = current_file.split('/')[-2]
    if genre not in GENRE_DICT:
        GENRE_DICT[genre] = []
    desc = get_file_descriptors(current_file)
    GENRE_DICT[genre].append(desc)

In [6]:
GENRE_DICT.keys()

dict_keys(['melodic_house', 'future_house', 'progressive_house', 'bass_house'])

## Load Model from Huggingface
- stablelm-base-alpha-3b-v2

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-base-alpha-3b-v2")
model = AutoModelForCausalLM.from_pretrained(
  "stabilityai/stablelm-base-alpha-3b-v2",
  trust_remote_code=True,
  torch_dtype="auto",
)
model.cuda()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

StableLMAlphaForCausalLM(
  (transformer): StableLMAlphaModel(
    (embed): Embedding(50432, 2560)
    (layers): ModuleList(
      (0-31): 32 x DecoderLayer(
        (norm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (attention): Attention(
          (qkv_proj): Linear(in_features=2560, out_features=7680, bias=False)
          (out_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (rotary_emb): RotaryEmbedding()
        )
        (mlp): MLP(
          (gate_proj): Linear(in_features=2560, out_features=13824, bias=False)
          (out_proj): Linear(in_features=6912, out_features=2560, bias=False)
          (act): SiLU()
        )
      )
    )
    (final_norm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=2560, out_features=50432, bias=False)
)

## Sample StableLM run

In [8]:
inputs = tokenizer("The weather is always wonderful", return_tensors="pt").to("cuda")
tokens = model.generate(
  **inputs,
  max_new_tokens=100,
  temperature=0.95,
  top_p=0.95,
  do_sample=True,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


The weather is always wonderful here, and a short drive takes you to the mountains which is a great place to escape for a few days. The resort has a good location, a clean pool and comfortable rooms. The staff is always very friendly and helpful. We will definitely stay again. more


## Prompting strategy

In [9]:
from tqdm import tqdm
PROMPT_DICT_1 = {}

for _k in GENRE_DICT:
    if _k not in PROMPT_DICT_1:
        PROMPT_DICT_1[_k] = []
    # Get the output from FLAN-T5-base
    for inputs in tqdm(GENRE_DICT[_k]):
        prompt = f"Generate 5 verbs to summarise the house music piece described as follows: '{inputs}'. Output only 5 words separated by ','. Note that this is a {_k} song. The selected words should be consistent with the input text. Do not output full sentences."
        print('='*40)
        print(f"Prompt: {prompt}")
        print('='*40)
        inputs = tokenizer(prompt, return_tensors="pt")
        inputs.to("cuda")
        outputs = model.generate(
          **inputs,
          max_new_tokens=100,
          temperature=0.95,
          top_p=0.95,
          do_sample=True,
        )
        selected_words = list(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))[0]
        PROMPT_DICT_1[_k].append(selected_words)

  0%|          | 0/2 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: 'The recently released EP from Avicii titled “Avici” has not only got some amazing responses but some great remixes too. One of such fine remixes is from the Norwegian producer Jack Taylor. Giving an emotional progressive twist to the original song, the remix stands out and delivers some great vibes. The young artist followed the Nort European trend of making beautiful and emotional progressive, and once again I’m proud to announce a new talent to check out!
 The remix starts out with a energetic intro, including the lovely guitar snippet from the original track. Smoothly guiding into the breakdown, the original track comes up. Just as the break from the original track changes to the drop, an charming guitar melody comes up…. It really feels like a piece of the previous version and blends in perfectly. An example of good transition!
 The drop feels epic with the same melody, now backed with some more saw 

 50%|█████     | 1/2 [00:03<00:03,  3.32s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: 'I wanted to end this beautiful September with a masterpiece. Steerner, one of the most underrated producers in the history, is back with a monstrous “More Than You Know” bootleg.
 “More Than You Know” (Steerner Bootleg) impressed me, using a euphemism. This is how a bootleg should be made, with the exact balance between original vibes, experimentation and signature sounds. 33% each one, and the remaining 1% goes in goosebumps.
 Talking about the original version, I’ve never reviewed it. I wasn’t a big fan of it, I liked the vocal but not the slow rhythm, During the past months some unofficial versions came out (my favorite was the Firebeatz one), but I had not found the “right remix”. There was potential, obviously, we are talking about A&I, the song is good, but not 100% good.
 I know it’s quite difficult to understand, but in few words, I was looking for the right “More Than You Know”, and it arrived t

100%|██████████| 2/2 [00:07<00:00,  3.53s/it]
  0%|          | 0/4 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: 'The eminent dance music label Enhanced continues to prove its quality in the scene without any falter, as they further introduce new talents. Unveiling an enticing production from the debutants Maxim Schunk and Raven & Kreyn, this release has been voiced by the talented singer Bishøp. Titled “My Name”, the track showcases charm and groove simultaneously.
 Drawing inspiration from 90s classic R&B hit song “Say My Name”, this release moulds and flips the vintage track into a modern contemporary future house production, and the result is delightful! The breakdown features the enchanting vocal from Bishøp, further candy-coated with vibrant dance pianos in the backdrop.
 Swirling around, the breakdown brims with sumptuous house vibes. A brief riser introduces the rhythmic drop into the scene. Consisting of usual future house synths coupled with an alluring and energetic melody, the drop takes over with peppin

 25%|██▌       | 1/4 [00:03<00:09,  3.23s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Excellent vocal performance by Eddie Jonsson
 • The guitar in the breakdown is noteworthy
 • Predictable future bounce drop
 Dropgun is one of my favorite duos, and it’s not just their music. They have an excellent marketing strategy, focused on their purple mascot, Little Drop. You can find it on Dropgun’s recent covers and videos, and many of their releases tell his story. It’s like a Dropgun Cinematic Universe, orbiting around Little Drop.
 Speaking about their future bounce/electro house approach, which is similar to Brooks one, in the latest releases, I find it rather catchy. It doesn’t particularly stand out from the mass compared to the other structures used by Dropgun, but it keeps the right energy while leaving some space for commercialism. “A Little More About You“, follows this exact strategy, with distinct attention to the vocal.
 But that’s just about it: a fantastic vocal, with a chorus, 

 50%|█████     | 2/4 [00:04<00:04,  2.19s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Surreal experience in the drop
 • Unexpected combination of two synergistic styles
 • Cliché vocal packed with surprises
 I have always kept a true respect towards Laidback Luke, one of the few veterans that, in my opinion, has managed to stay “real” in the scene, even after all this years. His Future House-oriented style keeps receiving twists (as he often brands it as “Twisted House”) and different variations based on the artist he’s working along with, and I consider it a great example of “constructive” team-up. A collaboration made because there is true synergy between the people involved.
 Domastic is a versatile talent in the scene, his sound is characterized by very aggressively constructed basslines, which are often surprising. I discovered him thanks to “Elephant” back in the days, and I’m glad to see that he has achieved various nice milestones. Amassing millions of streams because of his fi

 75%|███████▌  | 3/4 [00:08<00:02,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Excellent mood in the buildup section
 • Predictable features in the drop
 • Robust track with clean sound design
 Jay Eskar’s debut has been awaited by me for quite some time now, owing to his unwavering talented efforts on the scene. It had to be either Hexagon or the No Copyright Sounds label, as he found breakthrough with Mixmash records (recommendation: “Ignite”). The Mexican creator has been devoted to Future Bounce for years now, reinventing the style to his ingenuity, once brought to fame by Brooks and Mike Williams.
 Forging a sonical alliance with Doxed, a young creator who is into Bass and Future House spectrum, and Rico 56 for the vocal department, the group debuted last Friday with “Let’s Run Away” and needless to say, the overall idea is fascinating!
 This might not be the best of the said team’s outcome, to be brutally honest, as the drop holds onto a few run-of-the-mill ingredients. Ins

100%|██████████| 4/4 [00:11<00:00,  2.91s/it]
  0%|          | 0/4 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Quality vocal from Michel Zitron
 • Predictable drop, we are all aware of the formula
 • High technical quality and sound design, but also expectations…
 Dutch sensation Martin Garrix has been widely known for his canorous Progressive sound, besides creating radio-compatible hits simultaneously. In particular, we are talking about his compositions together with Matisse & Sadko. These guys have created many dance-floor bops together, among which “Break Through The Silence“, “Dragon“, “Together“, “Forever” has made it absolutely worthwhile. Their next venture is yet another festival ground-breaker, which goes by the name “Hold On” featuring the Swedish singer Michel Zitron on the vocals.
 Over the last few years, it has become a commonplace fact that their sound blend goes very well together, as demonstrated in their previous records. This one has been constructed similarly in a very flourishing way none

 25%|██▌       | 1/4 [00:03<00:10,  3.47s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Catchy rework
 • Attractive vocals
 • Dance-pop makeover of the classic
 Time and time again, Alice Deejay’s evergreen “Better Off Alone” has been far from a solitary, one-time success— rather, it brought upon several other hits in pop and dance music alike. Needless to say, repetition leads to annoyance and when the success of a project is borrowed too many times, its novelty eventually wears off. Could the same be said about the newest project from Alan Walker, Dash Berlin, and Vikkstar? Probably not.
 “Better Off (Alone, Pt. III)” is surprisingly praiseworthy, rather than an unneeded release wanting to gather streams.
 An unexpected team-up, Alan Walker is synonymous mostly with the commercial dance-pop arena; known for his midtempo, radio-ready productions, the Nordic alias has retained a massive influence over the years (although getting mixed reactions due to his involvement with music creation).

 50%|█████     | 2/4 [00:04<00:03,  1.79s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Special care in the breakdown’s atmosphere
 • Subtle editing in the vocal
 • Another milestone for the duo Wildvibes-Patrick Key
 A couple of years ago I wrote a piece about Wildvibes, a young guy that impressed me with his quality Progressive House. I kept watching him growing, reaching new and important milestones. I saw him earning his first support on Hardwell on Air in 2017, his first million plays on SC, then an official main release a few months ago… And now, with his loyal partner Patrick Key, there is another important release on Maxximize, “Meant For You“. Blasterjaxx’s label doesn’t have many releases, but it’s finding promising names, such as Declain, ANG and Mountblaq.
 The duo Wildvibes-Patrick Key is accompanied by Lasso the Sun, providing an excellent performance with some high-pitched segments that add a refreshing twist to a classic element of the genre. A sweet guitar and a piano sup

 75%|███████▌  | 3/4 [00:04<00:01,  1.24s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Slower pace minus the cheesiness
 • Heart-warming vocal
 • Different theme, yet for the holidays
 Before this year goes into the oblivion, I couldn’t resist on mentioning one of the more interesting names of 2021, KAAZE. His efforts are admirably ingenious, building a particular sound design while experimenting with the BLK RSE project in the spare time. His interactive-ness with the fans is an added bonus, as we can see in the “KAAZEMAS” idea where he gives away bonus contents in the year-end.
 On a personal note, I haven’t found myself as excited to BLK RSE’s latest creations, as both “Chrome” and “Systemize” didn’t stuck with me for long; with “Young Ones”, however, the Swedish act has changed my recent colder stance towards him. It’s the jolly Christmas after all, magic happens!
 It’s a slower take on KAAZE’s usual progressive demeanour, tagging along the familiar influences in the lead and a gentl

100%|██████████| 4/4 [00:05<00:00,  1.43s/it]
  0%|          | 0/4 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• First team-up between two giants of the current Bass House scene
 • Very similar to “Hyper”…
 • Excellent job on the breakdown’s loop
 Expecting something more from the talented names involved here, and not something similar to “Hyper”, unfortunately, the EP contains similar naming and hence coincidences remain here. Released a couple of weeks back by STMPD RCRDS, today’s victim is the result obtained by the renowned House music aliases Siks and Julian Jordan: “Juice”.
 Speaking more precisely, the potential in the premise here was pretty convincing. Siks is a talented up-and-coming mastermind in the Future House and Bass House scene, often capable to derive some twists to the formulas overused by many others, and is captured through a funky or more experimental sense. Similarly, Julian Joran has developed his glitchy Bass Hosue style and has evolved a lot, with hits such as “Diamonds” and “Funk” with 

 25%|██▌       | 1/4 [00:00<00:02,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Perfect amalgamation of signatures
 • Playful but catchy breakdown
 • Intricate drop hook with Dyro’s influence
 What do we (or at least, I) expect from any collaboration?‌ We want to see how the styles merge together, if they have the chemistry to intrigue, and how they embedded their joint efforts in it. In the other example, I only hear 22Bullets:‌ no trace of Timmy and no trace of these random guys with a very hironic name. In “Warp‌ Speed”, there is a perfect combination between two colleagues that put their amazing efforts into a quality product.
 Since their Revealed times, fans were dreaming about a team-up between the dirty electro house of Dyro (which has taken a more melodic approach lately, as we can hear) and the futuristic sounds of Julian Calor. “Warp‌ Speed”‌ is finally here, with a pack of interesting ideas attached!
 Let’s start with the breakdown from Julian Calor: the Dutch talent b

 50%|█████     | 2/4 [00:04<00:04,  2.44s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Creative sound arrangements
 • Blend of Bass and Tech House
 • Minimal but aggressive
 With the renowned Dutch label of STMPD pushing out their own flavour of House music with the help of their multi-talented roster. Among this coveted list of musicians, the Dutch personage Loopers has made a steady and unhindered reputation in Martin Garrix’s label since the last couple of years. This Friday witnessed his latest release, “Groove Police”, a type of production which strikes a chord within a few seconds of its playback, emitting an energetic and groovy vibe meant for accelerating the after-hours dance-floor activity.
 Loopers has reclaimed his place in STMPD with flair, not to forget his collaboration with native heavyweights Martin Garrix & Dyro. The preceding year, the esteemed solo act made explosive dance numbers such as “Out Of Control” (with Seth Hills), extrapolating and experimenting with the upr

 75%|███████▌  | 3/4 [00:04<00:01,  1.53s/it]Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt: Generate 5 verbs to summarise the house music piece described as follows: '• Clear superhero soundtrack theme
 • Surprising vocal performance
 • Right balance of orchestral and EDM
 After the promising feedback on “Sentio“, I was craving for a final Garrix track before the dusk of the year, and lucky me: “Hero” revealed itself! I am impressed by the versatility shown by the Dutch creator, as this time he takes a trip into the pop and commercial realms. Surprise surprise, he’s no stranger to that and we won’t be bashing as there’s a great deal to unravel here!
 First of all, “Hero” is released in collaboration with Marvel’s “Snap” mobile game. While I am particularly devoted to the game’s premise itself (usually card games are riddled with micro-transactions), it clearly explains the title, design, and visuals (by the way, the music video is sick!). For Marvel’s PR team, this is a solid plan! The vocalist is JVKE, whom I didn’t originally prefer as he is principally a TikTok inf

100%|██████████| 4/4 [00:05<00:00,  1.49s/it]


In [10]:
PROMPT_DICT_1

{'melodic_house': ['Generate', 'Generate'],
 'future_house': ['Generate', 'Generate', 'Generate', 'Generate'],
 'progressive_house': ['Generate', 'Generate', 'Generate', 'Generate'],
 'bass_house': ['Generate', 'Generate', 'Generate', 'Generate']}

## Generate `.md` file for the outputs

In [11]:
for _k in PROMPT_DICT_1:
    print(_k)
    for desc in PROMPT_DICT_1[_k]:
        print(f" - {desc}")
    print()

melodic_house
 - Generate
 - Generate

future_house
 - Generate
 - Generate
 - Generate
 - Generate

progressive_house
 - Generate
 - Generate
 - Generate
 - Generate

bass_house
 - Generate
 - Generate
 - Generate
 - Generate

