In [64]:
import pandas as pd

from collections import Counter
from datetime import datetime

In [65]:
filename = "melon_chart.csv"
raw_data = pd.read_csv(filename)

In [66]:
raw_data.head()


Unnamed: 0,rank,title,singer,album,date,like
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01.01,114191
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01.01,85818
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01.01,143931
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01.01,68931
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01.01,287040


In [67]:
# ----------------------------------------------------------#
# # 해당 부분은 현재 해결하여 신규 데이터부터 적용할 필요없음.
# raw_data['tmp'] = raw_data['album']
# raw_data['album'] = raw_data['singer']
# raw_data['singer'] = raw_data['tmp']
# raw_data.drop(['tmp'],axis=1,inplace = True)
# raw_data.head()
# ----------------------------------------------------------#

# 1. 봄 / 겨울 구분 컬럼 추가
# 봄 : 0 겨울 : 1 
data = raw_data
data[['year','month','day','week']] = data['date'].str.split('.',expand=True)
data['year'] = data['year'].astype('int')
data['month'] = data['month'].astype('int')
data['day'] = data['day'].astype('int')
data['week'] = data['week'].astype('int')
data['season'] = data['month'].apply(lambda x : 1 if x in [1,2,12] else 0)

data.head()

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01.01,114191,2024,1,1,1,1
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01.01,85818,2024,1,1,1,1
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01.01,143931,2024,1,1,1,1
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01.01,68931,2024,1,1,1,1
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01.01,287040,2024,1,1,1,1


In [71]:
data.shape

(37854, 11)

# 0. 동일년도 봄에도 등장하고, 겨울에도 등장하는 노래 조회.

In [72]:
spring_songs = data[data['month'].isin([3,4,5])]
winter_songs = data[data['month'].isin([1,2,12])]

In [92]:
# 동일한 년도에 등장한 노래
both_table = pd.merge(spring_songs,winter_songs,on=['title','year'])

# 원본데이터에서 동일한 년도에 등장한 노래 제거
joined_data = pd.merge(data, both_table, left_on=['title', 'singer'], right_on=['title', 'singer_x'], how='left')
result_data = joined_data[pd.isna(joined_data['singer_x'])] # 동일한 년도에 등장하지 않았다면 singer_x 가 nan 일 것 이기 떄문에. 

In [97]:
result_data.head()

Unnamed: 0,rank,title,singer,album,date,like,year_x,month,day,week,...,season_x,rank_y,singer_y,album_y,date_y,like_y,month_y,day_y,week_y,season_y
4502,43,잠시라도 우리,"성시경, 나얼",잠시라도 우리,2024.01.01.01,39656,2024,1,1,1,...,,,,,,,,,,
5241,50,Chill Kill,Red Velvet (레드벨벳),Chill Kill - The 3rd Album,2024.01.01.01,42790,2024,1,1,1,...,,,,,,,,,,
6088,56,화이트 (White),폴킴,화이트 (White),2024.01.01.01,18814,2024,1,1,1,...,,,,,,,,,,
6653,60,GODS,"NewJeans, League of Legends",2023 리그 오브 레전드 월드 챔피언십 주제곡,2024.01.01.01,46175,2024,1,1,1,...,,,,,,,,,,
9019,72,Snowman,Sia,Everyday Is Christmas (Snowman Deluxe Edition),2024.01.01.01,265321,2024,1,1,1,...,,,,,,,,,,


In [107]:
# 필요한 행만 추출 
data.columns
result_data = result_data[['rank', 'title', 'singer', 'album', 'date', 'like', 'year_x', 'month',
       'day', 'week', 'season']].rename(columns={'year_x':'year'})

In [108]:
result_data

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
4502,43,잠시라도 우리,"성시경, 나얼",잠시라도 우리,2024.01.01.01,39656,2024,1,1,1,1
5241,50,Chill Kill,Red Velvet (레드벨벳),Chill Kill - The 3rd Album,2024.01.01.01,42790,2024,1,1,1,1
6088,56,화이트 (White),폴킴,화이트 (White),2024.01.01.01,18814,2024,1,1,1,1
6653,60,GODS,"NewJeans, League of Legends",2023 리그 오브 레전드 월드 챔피언십 주제곡,2024.01.01.01,46175,2024,1,1,1,1
9019,72,Snowman,Sia,Everyday Is Christmas (Snowman Deluxe Edition),2024.01.01.01,265321,2024,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
2755102,96,그날 이후로,나윤권,김형석 With 나윤권,2010.12.26.05,2379,2010,12,26,5,1
2755103,97,I'll Be Back,2PM,Still 2:00pm,2010.12.26.05,9493,2010,12,26,5,1
2755104,98,널 지우는 일,김지수,널 지우는 일,2010.12.26.05,1635,2010,12,26,5,1
2755105,99,Thanks To,"용준형, 양요섭",My Story,2010.12.26.05,18397,2010,12,26,5,1


In [109]:
result_data.to_csv("./except_both_title.csv",index=False)

In [106]:
data.loc[(data.title == '벚꽃 엔딩')&(data.season == 1)]

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
17986,87,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2017.02.27.01,345228,2017,2,27,1,1
20471,75,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2016.02.29.01,345228,2016,2,29,1,1
23091,95,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2015.02.23.04,345230,2015,2,23,4,1
25789,93,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2014.02.24.04,345231,2014,2,24,4,1
28382,87,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2013.02.25.04,345231,2013,2,25,4,1


In [104]:
result_data.loc[result_data.title=='벚꽃 엔딩']

Unnamed: 0,rank,title,singer,album,date,like,year_x,month,day,week,season


In [91]:
data.loc[data.title=='Thanks To']

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
37852,99,Thanks To,"용준형, 양요섭",My Story,2010.12.26.05,18397,2010,12,26,5,1


In [79]:
tmp_df = pd.merge(spring_songs,winter_songs,on=['title','year']).drop_duplicates(subset=['title','singer_x'],kepp=False)
both_title_list = tmp_df.title.unique()

In [80]:
tmp_df

Unnamed: 0,rank_x,title,singer_x,album_x,date_x,like_x,year,month_x,day_x,week_x,season_x,rank_y,singer_y,album_y,date_y,like_y,month_y,day_y,week_y,season_y
0,1,밤양갱,비비 (BIBI),밤양갱,2024.03.04.01,133504,2024,3,4,1,0,21,비비 (BIBI),밤양갱,2024.02.12.03,133503,2,12,3,1
3,2,첫 만남은 계획대로 되지 않아,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.03.04.01,114368,2024,3,4,1,0,61,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.01.29.01,114367,1,29,1,1
8,3,Love wins all,아이유,The Winning,2024.03.04.01,160796,2024,3,4,1,0,3,아이유,The Winning,2024.01.22.04,160797,1,22,4,1
14,4,EASY,LE SSERAFIM (르세라핌),EASY,2024.03.04.01,68378,2024,3,4,1,0,17,LE SSERAFIM (르세라핌),EASY,2024.02.19.04,68378,2,19,4,1
16,5,나는 아픈 건 딱 질색이니까,(여자)아이들,2,2024.03.04.01,115129,2024,3,4,1,0,39,(여자)아이들,2,2024.02.19.04,115129,2,19,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75872,95,내 여자친구를 부탁해 (Say No),비스트,Shock Of The New Era,2010.03.07.02,14854,2010,3,7,2,0,59,비스트,Shock Of The New Era,2010.02.28.01,14854,2,28,1,1
75873,96,JoJo,SHINee (샤이니),"2009, Year Of Us",2010.03.07.02,20943,2010,3,7,2,0,34,SHINee (샤이니),"2009, Year Of Us",2010.01.03.01,20943,1,3,1,1
75882,99,Everything Is Nothing,Sweetbox,Everything Is Nothing,2010.03.07.02,862,2010,3,7,2,0,88,Sweetbox,Everything Is Nothing,2010.02.21.04,862,2,21,4,1
75884,100,Sign,브라운아이드걸스,Sound-G Sign,2010.03.07.02,25604,2010,3,7,2,0,17,브라운아이드걸스,Sound-G Sign,2010.01.03.01,25604,1,3,1,1


In [78]:
tmp_df

Unnamed: 0,rank_x,title,singer_x,album_x,date_x,like_x,year,month_x,day_x,week_x,season_x,rank_y,singer_y,album_y,date_y,like_y,month_y,day_y,week_y,season_y
0,1,밤양갱,비비 (BIBI),밤양갱,2024.03.04.01,133504,2024,3,4,1,0,21,비비 (BIBI),밤양갱,2024.02.12.03,133503,2,12,3,1
1,1,밤양갱,비비 (BIBI),밤양갱,2024.03.04.01,133504,2024,3,4,1,0,2,비비 (BIBI),밤양갱,2024.02.19.04,133503,2,19,4,1
2,1,밤양갱,비비 (BIBI),밤양갱,2024.03.04.01,133504,2024,3,4,1,0,1,비비 (BIBI),밤양갱,2024.02.26.05,133503,2,26,5,1
3,2,첫 만남은 계획대로 되지 않아,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.03.04.01,114368,2024,3,4,1,0,61,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.01.29.01,114367,1,29,1,1
4,2,첫 만남은 계획대로 되지 않아,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.03.04.01,114368,2024,3,4,1,0,11,TWS (투어스),TWS 1st Mini Album ‘Sparkling Blue’,2024.02.05.02,114367,2,5,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77870,94,죽어도 못 보내,2am,죽어도 못 보내,2010.05.23.04,27194,2010,5,23,4,0,2,2am,죽어도 못 보내,2010.01.31.01,27194,1,31,1,1
77871,94,죽어도 못 보내,2am,죽어도 못 보내,2010.05.23.04,27194,2010,5,23,4,0,1,2am,죽어도 못 보내,2010.02.07.02,27194,2,7,2,1
77872,94,죽어도 못 보내,2am,죽어도 못 보내,2010.05.23.04,27194,2010,5,23,4,0,1,2am,죽어도 못 보내,2010.02.14.03,27194,2,14,3,1
77873,94,죽어도 못 보내,2am,죽어도 못 보내,2010.05.23.04,27194,2010,5,23,4,0,2,2am,죽어도 못 보내,2010.02.21.04,27194,2,21,4,1


In [54]:
title_list[:10]

array(['밤양갱', '첫 만남은 계획대로 되지 않아', 'Love wins all', 'EASY',
       '나는 아픈 건 딱 질색이니까', 'To. X', '비의 랩소디', '홀씨', 'Love 119',
       'Perfect Night'], dtype=object)

In [75]:
data.loc[data.title == '벚꽃 엔딩']

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
1351,52,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2024.04.01.01,345227,2024,4,1,1,0
1448,49,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2024.04.08.02,345227,2024,4,8,2,0
2752,53,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2023.03.27.05,345227,2023,3,27,5,0
2866,67,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2023.04.03.01,345227,2023,4,3,1,0
2995,96,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2023.04.10.02,345227,2023,4,10,2,0
...,...,...,...,...,...,...,...,...,...,...,...
31784,3,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2012.04.29.01,345231,2012,4,29,1,0
31886,5,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2012.05.06.02,345231,2012,5,6,2,0
31987,7,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2012.05.13.03,345231,2012,5,13,3,0
32090,11,벚꽃 엔딩,버스커 버스커,버스커 버스커 1집,2012.05.20.04,345231,2012,5,20,4,0


# 1. 연도-계절 별 차트 등장회수 TOP5 조회

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01.01,114191,2024,1,1,1,1
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01.01,85818,2024,1,1,1,1
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01.01,143931,2024,1,1,1,1
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01.01,68931,2024,1,1,1,1
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01.01,287040,2024,1,1,1,1


In [28]:
# 연도별 계절에 따른 가장 많이 등장한 title,singer,date 
# date ???

# 결과를 저장할 빈 데이터 프레임을 생성
columns=['year', 'season','title', 'singer', 'count']
result_df = pd.DataFrame(columns=columns)

temp = []

# grouped_data = data.groupby(['year','season','title','singer']).size().reset_index(name='count')
grouped_data = data.groupby(['year','season'])

for name,group in grouped_data:
    year,season = name

    # 노래 제목과 가수로 튜플로 만들어 등장회수 세기.
    song_counter = Counter(zip(group['title'],group['singer'])) # {(노래1,가수1):2,(노래2,가수1):1, ....}
    most_song = song_counter.most_common(5) # 상위 5개 [((노래1,가수1),2).... ]

    for (title,singer),count in most_song:
        temp_df = pd.DataFrame(
            {
                'year' : [year],
                'season' : [season],
                'title' : [title],
                'singer' : [singer],
                'count' : [count]
            }
        )
        temp.append(temp_df)
result_df = pd.concat(temp,ignore_index=True)
        



In [29]:
result_df

Unnamed: 0,year,season,title,singer,count
0,2010,0,너 때문에 미쳐,티아라,12
1,2010,0,루팡 (Lupin),카라,12
2,2010,0,Bubble Love,"MC몽, 서인영",12
3,2010,0,주변인,아웃사이더,12
4,2010,0,Oh!,소녀시대 (GIRLS' GENERATION),12
...,...,...,...,...,...
145,2024,1,Perfect Night,LE SSERAFIM (르세라핌),9
146,2024,1,Drama,aespa,9
147,2024,1,To. X,태연 (TAEYEON),9
148,2024,1,비의 랩소디,임재현,9


# 2. 3년 이상 연속으로 등장한 노래 추출

In [30]:
raw_data.head()

Unnamed: 0,rank,title,singer,album,date,like,year,month,day,week,season
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01.01,114191,2024,1,1,1,1
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01.01,85818,2024,1,1,1,1
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01.01,143931,2024,1,1,1,1
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01.01,68931,2024,1,1,1,1
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01.01,287040,2024,1,1,1,1


In [31]:
def is_consecutive(years_set):
    sorted_years = sorted(years_set) # 연도 오름차순 정렬
    consecutive_count = 1 # 최소 1년은 등장하므로 1로 초기화.
    max_consecutive = 1 # 연속연도 count 변수

    for i in range(len(sorted_years)-1):
        # 현재연도와 다음연도가 연속적인지 확인. 2023+1 == 2024 
        if sorted_years[i] + 1 == sorted_years[i + 1] :
            consecutive_count += 1
            max_consecutive = max(max_consecutive,consecutive_count)
        else : 
            consecutive_count = 1
    return max_consecutive >= 3 # 3년연속 등장여부

In [37]:
# 연도를 기준으로 등장 연도 조회 ( 중복제거를 위해 set 사용 )
# data = data
grouped_data = data.groupby(['title','singer'])['year'].apply(set).reset_index()

In [40]:
# 3년이상 연속출현 여부 , 1 : 출현 0 : 미출현
grouped_data['more_than_3'] = grouped_data['year'].apply(lambda x : 1 if is_consecutive(x) else 0 )

In [41]:
# top100 차트인 했던 연도수 리스트컬럼 
grouped_data['year'] = grouped_data['year'].apply(lambda x :list(x))

In [42]:
result_df_2 = grouped_data.rename(columns = {'year':'years'})

In [43]:
result_df_2.loc[result_df_2.more_than_3 == 1]

Unnamed: 0,title,singer,years,more_than_3
38,2002,Anne-Marie,"[2019, 2020, 2021]",1
71,A bientot,임영웅,"[2024, 2022, 2023]",1
88,After LIKE,IVE (아이브),"[2024, 2022, 2023]",1
101,All I Want for Christmas Is You,Mariah Carey,"[2016, 2017, 2018, 2019, 2020, 2021, 2022, 202...",1
122,Attention,NewJeans,"[2024, 2022, 2023]",1
...,...,...,...,...
4713,크리스마스니까,"성시경, 박효신, 이석훈, 서인국, VIXX (빅스)","[2016, 2017, 2018, 2019, 2020, 2021, 2022, 202...",1
4770,피 땀 눈물,방탄소년단,"[2016, 2017, 2018]",1
4837,한숨,이하이,"[2016, 2017, 2018]",1
4856,해요 (2022),#안녕,"[2024, 2022, 2023]",1


In [16]:
raw_data.loc[(raw_data['title']=='한숨')&(raw_data['singer']=='이하이')]

Unnamed: 0,rank,title,singer,album,date,like,month,year,season
14532,33,한숨,이하이,SEOULITE,2018.01.01 ~ 2018.01.07,287328,1,2018,1
14633,34,한숨,이하이,SEOULITE,2018.01.08 ~ 2018.01.14,287329,1,2018,1
14733,34,한숨,이하이,SEOULITE,2018.01.15 ~ 2018.01.21,287329,1,2018,1
14844,45,한숨,이하이,SEOULITE,2018.01.22 ~ 2018.01.28,287329,1,2018,1
14951,52,한숨,이하이,SEOULITE,2018.01.29 ~ 2018.02.04,287329,1,2018,1
15057,58,한숨,이하이,SEOULITE,2018.02.05 ~ 2018.02.11,287329,2,2018,1
15165,66,한숨,이하이,SEOULITE,2018.02.12 ~ 2018.02.18,287329,2,2018,1
15269,70,한숨,이하이,SEOULITE,2018.02.19 ~ 2018.02.25,287329,2,2018,1
15372,73,한숨,이하이,SEOULITE,2018.02.26 ~ 2018.03.04,287329,2,2018,1
15486,87,한숨,이하이,SEOULITE,2018.03.05 ~ 2018.03.11,287329,3,2018,0


# 3. 각 노래별 최초 등장일 조회

In [17]:
data.head()

Unnamed: 0,rank,title,singer,album,date,like,month,year,season
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01 ~ 2024.01.07,114126,1,2024,1
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01 ~ 2024.01.07,85715,1,2024,1
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01 ~ 2024.01.07,143705,1,2024,1
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01 ~ 2024.01.07,68730,1,2024,1
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01 ~ 2024.01.07,287039,1,2024,1


In [18]:
data = raw_data
data['start_date'] = data['date'].apply(lambda x : x.split('~')[0].strip())
data['start_date'] = pd.to_datetime(data['start_date'])

result_df_3 = data.groupby(['title','singer'])['start_date'].min().reset_index()

In [19]:
result_df_3

Unnamed: 0,title,singer,start_date
0,#결별,"길구봉구, 박보람",2018-04-16
1,#첫사랑,볼빨간사춘기,2018-01-08
2,%% (응응),Apink (에이핑크),2019-01-07
3,...하고 싶다,V.One,2010-03-07
4,..IS YOU,XIA (준수),2016-05-16
...,...,...,...
4961,히히하헤호,"마마무 (Mamamoo), 긱스 (Geeks)",2014-05-26
4962,힐링이 필요해,로이킴,2012-12-03
4963,힘든 건 사랑이 아니다,임창정,2020-11-30
4964,"힘을 내요, 그대",더필름,2011-01-02


# 4. 3년 이상 등장한 음원 목록 조회.

In [20]:
data = raw_data
data.head()

Unnamed: 0,rank,title,singer,album,date,like,month,year,season,start_date
0,1,Perfect Night,LE SSERAFIM (르세라핌),Perfect Night,2024.01.01 ~ 2024.01.07,114126,1,2024,1,2024-01-01
1,2,Drama,aespa,Drama - The 4th Mini Album,2024.01.01 ~ 2024.01.07,85715,1,2024,1,2024-01-01
2,3,To. X,태연 (TAEYEON),To. X - The 5th Mini Album,2024.01.01 ~ 2024.01.07,143705,1,2024,1,2024-01-01
3,4,비의 랩소디,임재현,비의 랩소디,2024.01.01 ~ 2024.01.07,68730,1,2024,1,2024-01-01
4,5,첫 눈,EXO,겨울 스페셜 앨범 '12월의 기적 (Miracles In December)',2024.01.01 ~ 2024.01.07,287039,1,2024,1,2024-01-01


In [21]:
# 중복 연도수 제거
grouped_data = data.groupby(['title','singer'])['year'].apply(set).reset_index()
grouped_data['year'] = grouped_data['year'].apply(lambda x : list(x))
grouped_data.head()

Unnamed: 0,title,singer,year
0,#결별,"길구봉구, 박보람",[2018]
1,#첫사랑,볼빨간사춘기,[2018]
2,%% (응응),Apink (에이핑크),[2019]
3,...하고 싶다,V.One,[2010]
4,..IS YOU,XIA (준수),[2016]


In [28]:
# 등장년도 회수 칼럼
grouped_data['chartin_counts'] = grouped_data['year'].apply(lambda x : len(x))

# 등장년도 3이상 음원 조회
result_df_4 = grouped_data[grouped_data['chartin_counts'] >= 3]



Unnamed: 0,title,singer,years,chartin_counts
38,2002,Anne-Marie,"[2019, 2020, 2021]",3
69,A,Ariana Grande,"[2018, 2014, 2015]",3
79,A,Maroon 5,"[2016, 2017, 2018, 2011, 2012, 2014, 2015]",7
108,A,이해리 (다비치),"[2012, 2013, 2014]",3
115,A bientot,임영웅,"[2024, 2022, 2023]",3
...,...,...,...,...
4696,크리스마스니까,"성시경, 박효신, 이석훈, 서인국, VIXX (빅스)","[2016, 2017, 2018, 2019, 2020, 2021, 2022, 202...",13
4753,피 땀 눈물,방탄소년단,"[2016, 2017, 2018]",3
4820,한숨,이하이,"[2016, 2017, 2018]",3
4838,해요 (2022),#안녕,"[2024, 2022, 2023]",3


# 5. 3년 이상 등장한 데이터 + 3년 연속등장한 데이터 JOIN

In [29]:
result_df_2.head()

Unnamed: 0,title,singer,years,more_than_3
0,#결별,"길구봉구, 박보람",[2018],0
1,#첫사랑,볼빨간사춘기,[2018],0
2,%% (응응),Apink (에이핑크),[2019],0
3,...하고 싶다,V.One,[2010],0
4,..IS YOU,XIA (준수),[2016],0


In [32]:
print(result_df_2.shape,result_df_4.shape)

(4966, 4) (105, 4)


In [33]:
tmp_result_df_4 = grouped_data

tmp_result_df_4 = tmp_result_df_4.rename(columns={'year':'years'})

In [34]:
print(result_df_2.shape,tmp_result_df_4.shape)

(4966, 4) (4966, 4)


In [37]:
result_df_5 = pd.merge(left=result_df_2,right=tmp_result_df_4,on=['title','singer'])

In [39]:
result_df_5.columns

Index(['title', 'singer', 'years_x', 'more_than_3', 'years_y',
       'chartin_counts'],
      dtype='object')

In [40]:
result_df_5.drop(['years_x'],axis=1,inplace=True)

In [42]:
result_df_5 = result_df_5.rename(columns={'years_y':'years'})

In [43]:
result_df_5

Unnamed: 0,title,singer,more_than_3,years,chartin_counts
0,#결별,"길구봉구, 박보람",0,[2018],1
1,#첫사랑,볼빨간사춘기,0,[2018],1
2,%% (응응),Apink (에이핑크),0,[2019],1
3,...하고 싶다,V.One,0,[2010],1
4,..IS YOU,XIA (준수),0,[2016],1
...,...,...,...,...,...
4961,히히하헤호,"마마무 (Mamamoo), 긱스 (Geeks)",0,[2014],1
4962,힐링이 필요해,로이킴,0,"[2012, 2013]",2
4963,힘든 건 사랑이 아니다,임창정,0,"[2020, 2021]",2
4964,"힘을 내요, 그대",더필름,0,[2011],1


In [44]:
# 3년 이상 등장 데이터 조회
result_df_5.loc[result_df_5.chartin_counts >= 3]

Unnamed: 0,title,singer,more_than_3,years,chartin_counts
38,2002,Anne-Marie,1,"[2019, 2020, 2021]",3
69,A,Ariana Grande,0,"[2018, 2014, 2015]",3
79,A,Maroon 5,1,"[2016, 2017, 2018, 2011, 2012, 2014, 2015]",7
108,A,이해리 (다비치),1,"[2012, 2013, 2014]",3
115,A bientot,임영웅,1,"[2024, 2022, 2023]",3
...,...,...,...,...,...
4696,크리스마스니까,"성시경, 박효신, 이석훈, 서인국, VIXX (빅스)",1,"[2016, 2017, 2018, 2019, 2020, 2021, 2022, 202...",13
4753,피 땀 눈물,방탄소년단,1,"[2016, 2017, 2018]",3
4820,한숨,이하이,1,"[2016, 2017, 2018]",3
4838,해요 (2022),#안녕,1,"[2024, 2022, 2023]",3


In [45]:
# 3년 연속 등장 데이터 조회
result_df_5.loc[result_df_5.more_than_3 == 1]

Unnamed: 0,title,singer,more_than_3,years,chartin_counts
38,2002,Anne-Marie,1,"[2019, 2020, 2021]",3
79,A,Maroon 5,1,"[2016, 2017, 2018, 2011, 2012, 2014, 2015]",7
108,A,이해리 (다비치),1,"[2012, 2013, 2014]",3
115,A bientot,임영웅,1,"[2024, 2022, 2023]",3
131,After LIKE,IVE (아이브),1,"[2024, 2022, 2023]",3
...,...,...,...,...,...
4696,크리스마스니까,"성시경, 박효신, 이석훈, 서인국, VIXX (빅스)",1,"[2016, 2017, 2018, 2019, 2020, 2021, 2022, 202...",13
4753,피 땀 눈물,방탄소년단,1,"[2016, 2017, 2018]",3
4820,한숨,이하이,1,"[2016, 2017, 2018]",3
4838,해요 (2022),#안녕,1,"[2024, 2022, 2023]",3
