In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.set_option('display.max_columns', None) 

In [22]:
df = pd.read_csv('data.csv')

In [23]:
df = df.drop(columns=['Ended', 'Explore link'])
df['Started'] = pd.to_datetime(df['Started'])

In [24]:
# Function to convert string to numeric
def convert_to_number(s):
    s = s.replace('+', '')  # Remove the '+' sign
    if 'M' in s:
        return int(float(s.replace('M', '')) * 1_000_000)
    elif 'K' in s:
        return int(float(s.replace('K', '')) * 1_000)
    else:
        return int(s)  # For plain numbers

# Apply the function to the array
numeric_data = np.array([convert_to_number(value) for value in df['Search volume']])

# Output the result
df['Search volume'] = numeric_data

In [25]:
df.head()

Unnamed: 0,Trends,Search volume,Started,Trend breakdown
0,wicked book,2000000,2024-11-20 22:00:00+05:00,"wicked book,elphaba,wicked musical,wicked trai..."
1,moana 2,1000000,2024-11-22 07:20:00+05:00,"moana 2,moana 2 release date,when does moana 2..."
2,who won dancing with the stars 2024,1000000,2024-11-25 22:00:00+05:00,"who won dancing with the stars 2024,dancing wi..."
3,pam bondi,1000000,2024-11-21 13:40:00+05:00,pam bondi
4,lake effect snow warning,1000000,2024-11-27 12:20:00+05:00,"lake effect snow warning,thanksgiving,thanksgi..."


In [26]:
from transformers import pipeline

# Use a zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [27]:
categories = [
    "Technology",
    "Health",
    "Travel",
    "Food",
    "Fashion",
    "Home",
    "Education",
    "Sports",
    "Entertainment",
    "Finance",
    "Automotive",
    "Shopping",
    "Parenting",
    "Career",
    "Relationships",
    "Pets",
    "Science",
    "News",
    "Art",
    "Religion",
    "Hobbies",
    "Events",
    "Real Estate",
    "Environment",
    "Politics",
    "Gaming",
    "Social Media",
    "Self-Improvement",
    "Legal",
    "Nonprofit"
]


In [28]:
#setting empty values for the columns
index = -1
for label in categories:
    df[label + '_m'] = -1

for j in range(len(df)):
    #counter for progress/debugging
    index+=1
    if(index%10 == 0): 
        print(index)
        
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['Trends'],
        candidate_labels = categories,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]


10
20
30
40
50
60
70
80
90
100
110
120
130


In [29]:
# df_demo['max'] = df_demo[['cost_m', 'efficient_m', 'effective_m', 'ease of use_m']].max(axis=1)

In [30]:
df['max'] = df.iloc[:,4:].max(axis=1)

In [33]:
len(df)
len(df[df['max'] > 0.8])

138

60

In [34]:
df.head()

Unnamed: 0,Trends,Search volume,Started,Trend breakdown,Technology_m,Health_m,Travel_m,Food_m,Fashion_m,Home_m,Education_m,Sports_m,Entertainment_m,Finance_m,Automotive_m,Shopping_m,Parenting_m,Career_m,Relationships_m,Pets_m,Science_m,News_m,Art_m,Religion_m,Hobbies_m,Events_m,Real Estate_m,Environment_m,Politics_m,Gaming_m,Social Media_m,Self-Improvement_m,Legal_m,Nonprofit_m,max
0,wicked book,2000000,2024-11-20 22:00:00+05:00,"wicked book,elphaba,wicked musical,wicked trai...",0.004464,0.130361,0.015257,0.000243,0.00067,0.044068,0.003254,0.000524,0.377782,0.027361,0.000223,0.000212,0.254407,0.031447,0.37224,0.007593,0.040934,0.000826,0.006676,0.00449,0.003745,0.00427,0.000927,0.017872,0.002616,0.000436,0.000403,0.262591,0.00933,0.010718,0.377782
1,moana 2,1000000,2024-11-22 07:20:00+05:00,"moana 2,moana 2 release date,when does moana 2...",0.080616,0.033912,0.116222,0.077787,0.007832,0.081237,0.031292,0.014186,0.81542,0.036195,0.008236,0.019201,0.029675,0.052537,0.170155,0.002242,0.008386,0.006615,0.05375,0.019532,0.02238,0.035427,0.040113,0.086581,0.009448,0.009407,0.084731,0.360741,0.152338,0.070228,0.81542
2,who won dancing with the stars 2024,1000000,2024-11-25 22:00:00+05:00,"who won dancing with the stars 2024,dancing wi...",0.203445,0.169386,0.026686,0.001071,0.008314,0.138849,0.007007,0.031543,0.882638,0.046771,0.013208,0.000169,0.003898,0.105946,0.092128,0.007526,0.00507,0.037264,0.058221,0.005675,0.019706,0.339277,0.017035,0.144254,0.009806,0.001517,0.054112,0.092895,0.131959,0.016144,0.882638
3,pam bondi,1000000,2024-11-21 13:40:00+05:00,pam bondi,0.215622,0.191876,0.181266,0.020587,0.068756,0.61362,0.117742,0.029908,0.593471,0.754589,0.027454,0.043891,0.276577,0.321844,0.812681,0.035099,0.02972,0.032185,0.184779,0.05695,0.099532,0.069706,0.134555,0.08921,0.055392,0.081734,0.173835,0.491311,0.410397,0.137016,0.812681
4,lake effect snow warning,1000000,2024-11-27 12:20:00+05:00,"lake effect snow warning,thanksgiving,thanksgi...",0.011787,0.251646,0.877081,0.002682,0.000443,0.144784,0.004874,0.003223,0.001545,0.028698,0.034216,0.001645,0.034591,0.101806,0.029764,0.076818,0.004937,0.588595,0.000813,0.000792,0.028213,0.516546,0.001424,0.904508,0.001216,0.00354,0.006912,0.105119,0.001953,0.009298,0.904508
