In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [11]:
pip install requests beautifulsoup4 pypinyin deep-translator

Note: you may need to restart the kernel to use updated packages.


In [12]:
import requests
from bs4 import BeautifulSoup
from pypinyin import lazy_pinyin
from deep_translator import GoogleTranslator

In [13]:
# Step 1: Define the URL (Modify for your song)
URL = "https://www.top10pinyinlyrics.com/2025/02/first-frost-bai-jing-ting-yin-xing-ren.html"

# Step 2: Fetch the webpage content
response = requests.get(URL)
if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")

    # Step 3: Locate the lyrics container (Modify selector if needed)
    lyrics_div = soup.find("div", class_="post-body")  # Modify based on actual webpage structure

    if lyrics_div:
        chinese_lyrics = lyrics_div.get_text(separator="\n", strip=True)

        # Step 4: Convert Chinese lyrics to Pinyin using pypinyin
        chinese_lines = chinese_lyrics.split("\n")
        pinyin_lyrics = [" ".join(lazy_pinyin(line)) for line in chinese_lines]

        # Step 5: Translate lyrics to English using Deep Translator
        translator = GoogleTranslator(source="zh-CN", target="en")  # Use 'zh-TW' for Traditional Chinese
        english_lyrics = [translator.translate(line) for line in chinese_lines]


In [9]:
# Step 6: Save the structured lyrics (Chinese + Pinyin + English) without duplicates
formatted_lyrics = []
unique_lines = set()  # To store and check for duplicate lines

for i in range(len(chinese_lines)):
    # Ensure no NoneType values exist
    chinese = chinese_lines[i] if chinese_lines[i] else ""
    pinyin = pinyin_lyrics[i] if pinyin_lyrics[i] else ""
    english = english_lyrics[i] if english_lyrics[i] else ""

    # Convert to a set to remove duplicates
    line_set = {chinese, pinyin, english}

    # Ensure all values are strings and join without duplicates
    clean_lines = "\n".join(filter(None, line_set))  # `filter(None, line_set)` removes empty strings

    if clean_lines not in unique_lines:  # Prevent duplicate block writing
        formatted_lyrics.append(clean_lines)
        unique_lines.add(clean_lines)

# Convert list to string
final_lyrics = "\n\n".join(formatted_lyrics)

# Print the cleaned lyrics
print("✅ Extracted Lyrics (No Duplicates):\n")
print(final_lyrics)

# Step 7: Save lyrics to a file
with open("formatted_lyrics_cleaned.txt", "w", encoding="utf-8") as file:
    file.write(final_lyrics)

print("\n🎵 Cleaned Lyrics saved to 'formatted_lyrics_cleaned.txt'!")


✅ Extracted Lyrics (No Duplicates):

English Translation, pinyin lyrics: Bai Jing Ting  Bai Jingting sings for the OST of First Frost It’s hard to coax.
English Translation, pinyin lyrics: Bai Jing Ting   bai jing ting  sings for the OST of First Frost  nan hong .
English Translation, pinyin lyrics: Bai Jing Ting  白敬亭 sings for the OST of First Frost 难哄.

Other First Frost OST songs

HERE

wu lun ni ken huo bu ken   wo dou xuan ze deng
Whether you want to or not, I will choose to wait
无论你肯或不肯 我都选择等

Wúlùn nǐ kěn huò bù kěn  wǒ doū xuǎnzé děng
Wúlùn nǐ kěn huò bù kěn wǒ doū xuǎnzé děng

(Whether you are willing or not, I will choose to wait)

deng dao ni jie shu hao jiu    tan xian de lv cheng
等到你结束好久  探险的旅程
Wait until you end your long adventure journey

děngdào nǐ jiéshù hǎojiǔ tànxiǎn de lǚchéng
děngdào nǐ jiéshù hǎojiǔ  tànxiǎn de lǚchéng

( Wait till you finish your long adventurous journey)
( Wait till you finish your long adventure journey)

If there is no loneliness
要是没有寂寞陪衬
yao

In [14]:
# Step 6: Save the structured lyrics (Chinese + Pinyin + Single English Translation) without duplicates
formatted_lyrics = []
unique_lines = set()  # To store and check for duplicate lines

for i in range(len(chinese_lines)):
    # Ensure no NoneType values exist
    chinese = chinese_lines[i] if chinese_lines[i] else ""
    pinyin = pinyin_lyrics[i] if pinyin_lyrics[i] else ""
    english = english_lyrics[i] if english_lyrics[i] else ""

    # Avoid duplicate Pinyin variations & Keep only one English translation
    line_set = {chinese, pinyin, english}
    clean_lines = [chinese, pinyin, english]  # Maintain original order

    # Ensure unique structure and remove empty values
    clean_lines = "\n".join(filter(None, clean_lines))  # `filter(None, clean_lines)` removes empty strings

    if clean_lines not in unique_lines:  # Prevent duplicate block writing
        formatted_lyrics.append(clean_lines)
        unique_lines.add(clean_lines)

# Convert list to string
final_lyrics = "\n\n".join(formatted_lyrics)

# Print the cleaned lyrics
print("✅ Extracted Lyrics (No Duplicates, One Translation):\n")
print(final_lyrics)

# Step 7: Save lyrics to a file
with open("formatted_lyrics_cleaned.txt", "w", encoding="utf-8") as file:
    file.write(final_lyrics)

print("\n🎵 Cleaned Lyrics saved to 'formatted_lyrics_cleaned.txt'!")


✅ Extracted Lyrics (No Duplicates, One Translation):

English Translation, pinyin lyrics: Bai Jing Ting  白敬亭 sings for the OST of First Frost 难哄.
English Translation, pinyin lyrics: Bai Jing Ting   bai jing ting  sings for the OST of First Frost  nan hong .
English Translation, pinyin lyrics: Bai Jing Ting  Bai Jingting sings for the OST of First Frost It’s hard to coax.

Other First Frost OST songs
Other First Frost OST songs
Other First Frost OST songs

HERE
HERE
HERE

无论你肯或不肯 我都选择等
wu lun ni ken huo bu ken   wo dou xuan ze deng
Whether you want to or not, I will choose to wait

Wúlùn nǐ kěn huò bù kěn  wǒ doū xuǎnzé děng
Wúlùn nǐ kěn huò bù kěn  wǒ doū xuǎnzé děng
Wúlùn nǐ kěn huò bù kěn wǒ doū xuǎnzé děng

(Whether you are willing or not, I will choose to wait)
(Whether you are willing or not, I will choose to wait)
(Whether you are willing or not, I will choose to wait)

等到你结束好久  探险的旅程
deng dao ni jie shu hao jiu    tan xian de lv cheng
Wait until you end your long adventure journ