# Import the necessary libraries

In [1]:
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import re

# Configure logging
logging.basicConfig(filename='../logs/data_processing.log',
                    level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')
  

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))
from load_data import Load_Data


In [2]:
df = pd.read_excel('../data/telegram_data.xlsx')



In [28]:
print("Checking for NaN values in the 'Message' column:")
nan_count = df['Message'].isnull().sum()
print(f"Number of NaN values in 'Message' column: {nan_count}")

Checking for NaN values in the 'Message' column:
Number of NaN values in 'Message' column: 1849


In [29]:
df = df.dropna(subset=['Message'])

# Print the shape of the dataset after dropping NaN values in the "Message" column
print(f"Dataset shape after dropping NaN values in 'Message' column: {df.shape}")

Dataset shape after dropping NaN values in 'Message' column: (3166, 6)


In [30]:
df.head()

Unnamed: 0,Channel Title,Channel Username,ID,Message,Date,Media Path
5,Sheger online-store,@Shageronlinestore,5328.0,💥3pcs silicon brush spatulas\n\n\n \n ...,2024-09-20 11:50:02+00:00,photos/@Shageronlinestore_5328.jpg
6,Sheger online-store,@Shageronlinestore,5327.0,💥Mandoline Slicer\n\n👉 ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n👉 ...,2024-09-20 08:11:40+00:00,
7,Sheger online-store,@Shageronlinestore,5326.0,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:23:18+00:00,
8,Sheger online-store,@Shageronlinestore,5325.0,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:21:14+00:00,photos/@Shageronlinestore_5325.jpg
10,Sheger online-store,@Shageronlinestore,5323.0,"💥Only baby 3in1 double bottle milk warmer,ster...",2024-09-19 13:54:46+00:00,photos/@Shageronlinestore_5323.jpg


In [32]:
message_df=df['Message']
message_df

5       💥3pcs silicon brush spatulas\n\n\n      \n    ...
6       💥Mandoline Slicer\n\n👉 ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n👉  ...
7       💥Table Desk Edge Guard Strip\n       💯 High Qu...
8       💥Table Desk Edge Guard Strip\n       💯 High Qu...
10      💥Only baby 3in1 double bottle milk warmer,ster...
                              ...                        
5009    🎯 Kitchen Sticker\n\nለኪችንዎ ውበት እጅግ ተመራጭ \n🔰ውሀ ...
5010    🎯 3in1 One Step Hair Dryer & Styler \n\n👉 ከርል ...
5011    ✅ Home GYM - X5 slimming vibrator \n\n📢📢📢 ታላቅ ...
5012    ለጤናችን-Health & Personal Care\n\n📍FingerTip Pul...
5013    #Finger_tip_pulse_oximeter\n       #በተመጣጣኝ_ዋጋ\...
Name: Message, Length: 3166, dtype: object

In [33]:

# Example of your DataFrame
# df = pd.DataFrame({'Message': ['💥3pcs silicon brush spatulas...', '💥Mandoline Slicer...', ...]})

# Define a function to remove emojis
def remove_emojis(text):
    emoji_pattern = re.compile(
        "[" 
        "\U0001F600-\U0001F64F"  # emoticons
        "\U0001F300-\U0001F5FF"  # symbols & pictographs
        "\U0001F680-\U0001F6FF"  # transport & map symbols
        "\U0001F700-\U0001F77F"  # alchemical symbols
        "\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
        "\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
        "\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
        "\U0001FA00-\U0001FA6F"  # Chess Symbols
        "\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
        "\U00002702-\U000027B0"  # Dingbats
        "\U000024C2-\U0001F251" 
        "]+", 
        flags=re.UNICODE
    )
    return emoji_pattern.sub(r'', text)

# Apply the function to the 'Message' column
df['Message'] = df['Message'].apply(remove_emojis)

# Display the updated DataFrame
print(df.head())


          Channel Title    Channel Username      ID  \
5   Sheger online-store  @Shageronlinestore  5328.0   
6   Sheger online-store  @Shageronlinestore  5327.0   
7   Sheger online-store  @Shageronlinestore  5326.0   
8   Sheger online-store  @Shageronlinestore  5325.0   
10  Sheger online-store  @Shageronlinestore  5323.0   

                                              Message  \
5   3pcs silicon brush spatulas\n\n\n      \n     ...   
6   Mandoline Slicer\n\n ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n  ለእጅ...   
7   Table Desk Edge Guard Strip\n        High Qual...   
8   Table Desk Edge Guard Strip\n        High Qual...   
10  Only baby 3in1 double bottle milk warmer,steri...   

                         Date                          Media Path  
5   2024-09-20 11:50:02+00:00  photos/@Shageronlinestore_5328.jpg  
6   2024-09-20 08:11:40+00:00                                 NaN  
7   2024-09-20 05:23:18+00:00                                 NaN  
8   2024-09-20 05:21:14+00:00  photos/@Shageronlinesto

In [34]:
df = df.to_csv('../data/clean_data.csv')

# Token Labeling for Product, Price, and Location Recognition in UTF-8 Messages

In [41]:
import re

def label_message_utf8_with_birr(message):
    # Check if the message is None or empty
    if not isinstance(message, str) or message.strip() == "":
        return ""
    
    # Split the message at the first occurrence of '\n'
    if '\n' in message:
        first_line, remaining_message = message.split('\n', 1)
    else:
        first_line, remaining_message = message, ""
    
    labeled_tokens = []
    
    # Tokenize the first line
    first_line_tokens = re.findall(r'\S+', first_line)
    
    # Label the first token as B-PRODUCT and the rest as I-PRODUCT
    if first_line_tokens:
        labeled_tokens.append(f"{first_line_tokens[0]} B-PRODUCT")  # First token as B-PRODUCT
        for token in first_line_tokens[1:]:
            labeled_tokens.append(f"{token} I-PRODUCT")  # Remaining tokens as I-PRODUCT
    
    # Process the remaining message normally
    if remaining_message:
        lines = remaining_message.split('\n')
        for line in lines:
            tokens = re.findall(r'\S+', line)  # Tokenize each line
            
            for token in tokens:
                # Check if token is a price (e.g., 500 ETB, $100, or ብር)
                if re.match(r'^\d{10,}$', token):
                    labeled_tokens.append(f"{token} O")  # Label as O for "other" or outside of any entity
                elif re.match(r'^\d+(\.\d{1,2})?$', token) or 'ETB' in token or 'ዋጋ' in token or '$' in token or 'ብር' in token:
                    labeled_tokens.append(f"{token} I-PRICE")
                # Check if token could be a location (e.g., cities or general location names)
                elif any(loc in token for loc in ['Addis Ababa', 'ለቡ', 'ለቡ መዳህኒዓለም', 'መገናኛ', 'ቦሌ', 'ሜክሲኮ']):
                    labeled_tokens.append(f"{token} I-LOC")
                # Assume other tokens are part of a product name or general text
                else:
                    labeled_tokens.append(f"{token} O")
    
    return "\n".join(labeled_tokens)

# Apply the updated function to the non-null messages
df['Labeled_Message'] = df['Message'].apply(lambda x: label_message_utf8_with_birr(x) if x is not None else "")

# Display the updated DataFrame
df.head()


TypeError: 'NoneType' object is not subscriptable

In [35]:
# Save the updated labeled dataset to a file in CoNLL format
labeled_data_birr_path = 'labeled_telegram_product_price_location.txt-'
with open(labeled_data_birr_path, 'w', encoding='utf-8') as f:
    for index, row in df.iterrows():
        f.write(f"{row['Labeled_Message']}\n\n")

In [92]:
# categories = {
#     'kids': [
#         'toy', 'children', 'kids', 'መጫወቻ', 'play', 'games', 'fun', 'educational', 
#         'puzzle', 'doll', 'action figure', 'stuffed animal', 'arts and crafts', 
#         'books', 'outdoor toys', 'building blocks', 'baby', 'toddler', 'Baby',
#         'መጫወቻዎች'
#     ],
#     'men': [
#         'men', 'grooming', 'shaving', 'beard', 'razor', 'aftershave', 
#         'scent', 'deodorant', 'grooming kit', 'haircut', 'fashion', 'suits', 
#         'wallet', 'watch', 'accessories', 'fitness', 'shoes', 
#         'አስተካክል', 'የብርሃን ዕቃዎች'
#     ],
#     'women': [
#         'women', 'makeup', 'hair dryer', 'lipstick', 'foundation', 'mascara', 
#         'skincare', 'nails', 'jewelry', 'dresses', 'handbags', 'accessories', 
#         'fashion', 'shoes', 'perfume', 'hairstyle', 'wellness', 'beauty', 'style','Hair Drye',
#         'እንቅስቃሴ', 'የፀጉር እቃዎች', 'የውበት እቃዎች'
#     ],
#     'sport': [
#         'gym', 'GYM','fitness', 'exercise', 'እንቅስቃሴ', 'workout', 'training', 'yoga', 
#         'running', 'cycling', 'sportswear', 'equipment', 'weights', 'cardio', 
#         'aerobics', 'team sports', 'outdoor activities', 'athletics', 'health',  'workout', 'sports',
#         'ስፖርት', 'የእንቅስቃሴ መሳሪያዎች'
#     ],
#     'groceries': [
#         'food', 'snacks', 'grocery', 'ምግብ', 'produce', 'fruits', 'vegetables', 
#         'meat', 'dairy', 'bread', 'cereal', 'beverages', 'frozen', 'canned', 
#         'organic', 'bulk', 'condiments', 'spices', 'snack bars', 'breakfast', 
#         'እንቁላል', 'ወተር', 'የምግብ እቃዎች'
#     ],
#     'accessories': [
#         'jewelry', 'bags', 'accessory', 'ቀለበት', 'belts', 'hats', 'scarves', 
#         'sunglasses', 'watches', 'hair accessories', 'wallets', 'phone cases', 
#         'keychains', 'pins', 'brooches', 'fashion', 'style', 'gifts', 'decor', 'የልብስ መቶከሻ\n\n',
#         'የመልክዕ እቃዎች', 'የምታወቅ እቃዎች','Anti-theft ',' Earbuds','PowerBank','Grip Tape','humidifier'
#     ],
#     'health': [
#         'health', 'ጤና', 'wellness', 'nutrition', 'vitamins', 'supplements', 
#         'exercise', 'fitness', 'mental health', 'meditation', 'stress relief', 
#         'doctor', 'check-up', 'first aid', 'hygiene', 'immune system', 'balance', 
#         'self-care', 'አንደኛ ጤና', 'የጤና እቃዎች','pulse'
#     ],
#     'household': [
#         'cleaning', 'furniture', 'decor', 'appliances', 'utensils', 'kitchen', 
#         'bathroom', 'laundry', 'storage', 'organization', 'home improvement', 'pan', 
#         'gardening', 'tools', 'supplies', 'safety', 'maintenance', 'pets', 'spatulas','Kitchen','Mop',
#         'spatulas\n\n','nለኪችንዎ','home', 'comfort', 'ቤት', 'የቤት እቃዎች', 'እንቅስቃሴ','bottle','ፔርሙስ','knife',
#         'Glass','የላዛኛ','stove','Ironing Board','Slicer','BLENDER','MULTIFUNCTIONAL BLENDER','Toilet Brush',
#         'የቢላ ስብስብ','ቢላ','Oven','fridge', 'መጥበሻ','Toilet','Mob','cookware','Blender','KITCHENWARE','ምንጣፍ','Tablemats'
#     ]
# }


In [111]:
def label_message_utf8_with_birr(message):
    tokens = re.findall(r'\S+', message)  # Tokenize while considering non-ASCII characters
    labeled_tokens = []
    
    for token in tokens:
        # Check if token is a price (e.g., 500 ETB, $100, or ብር)
        
        if re.match(r'^\d{10,}$', token):
            labeled_tokens.append(f"{token} O")  # Label as O for "other" or outside of any entity
        elif re.match(r'^\d+(\.\d{1,2})?$', token) or 'ETB' in token or '$' in token or 'ብር' in token:
            labeled_tokens.append(f"{token} I-PRICE")
        
        # Check if token could be a location (e.g., cities or general location names)
        elif any(loc in token for loc in ['Addis Ababa', 'ለቡ', 'ለቡ  መዳህኒዓለም', 'መገናኛ','ቦሌ','ሜክሲኮ']):
            labeled_tokens.append(f"{token} I-LOC")
        
        elif any(loc in token for loc in ['💥']):
            labeled_tokens.append(f"{token} B-Product")
        
        # Assume other tokens are part of a product name (this can be refined)
        else:
            labeled_tokens.append(f"{token} O")
    
    return "\n".join(labeled_tokens)

# Apply the updated function to the non-null messages
df['Labeled_Message'] = df['Message'].apply(label_message_utf8_with_birr)



In [112]:
# Save the updated labeled dataset to a file in CoNLL format
labeled_data_birr_path = 'labeled_telegram_data_price_product_location_birr.txt'
with open(labeled_data_birr_path, 'w', encoding='utf-8') as f:
    for index, row in df.iterrows():
        f.write(f"{row['Labeled_Message']}\n\n")

In [13]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
model = AutoModelForTokenClassification.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = df['Message'][10]
ner_results = nlp(example)
print(ner_results)

[{'entity': 'B-LOC', 'score': 0.77822363, 'index': 43, 'word': '▁ስ', 'start': 117, 'end': 118}, {'entity': 'I-LOC', 'score': 0.58282, 'index': 44, 'word': 'ሪ', 'start': 118, 'end': 119}, {'entity': 'I-LOC', 'score': 0.8872894, 'index': 45, 'word': '▁', 'start': 120, 'end': 121}, {'entity': 'I-LOC', 'score': 0.7864612, 'index': 46, 'word': 'ኤም', 'start': 120, 'end': 122}, {'entity': 'I-LOC', 'score': 0.8502413, 'index': 47, 'word': '▁ሲ', 'start': 123, 'end': 124}, {'entity': 'I-LOC', 'score': 0.5737138, 'index': 48, 'word': 'ቲ', 'start': 124, 'end': 125}, {'entity': 'I-LOC', 'score': 0.5543835, 'index': 49, 'word': '▁ሞ', 'start': 126, 'end': 127}, {'entity': 'I-LOC', 'score': 0.8828338, 'index': 50, 'word': 'ል', 'start': 127, 'end': 128}, {'entity': 'I-LOC', 'score': 0.76899004, 'index': 51, 'word': '▁', 'start': 130, 'end': 131}, {'entity': 'I-LOC', 'score': 0.5774763, 'index': 52, 'word': 'ሁለተኛ', 'start': 130, 'end': 134}, {'entity': 'B-LOC', 'score': 0.9574409, 'index': 64, 'word': '

In [14]:
df['Message'][10]


'💥Only baby 3in1 double bottle milk warmer,sterilizer,food steamer\n\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0 ዋጋ:-3000ብር✅\n\n❌ውስን ፍሬ ነው ያለው\n\n🏢 አድራሻ\xa0 ቁ.1👉 ስሪ ኤም ሲቲ ሞል\xa0 ሁለተኛ ፎቅ ቢሮ ቁ. SL-05A(ከ ሊፍቱ ፊት ለ ፊት)\n\n📍ቁ.2 👉ለቡ\xa0 መዳህኒዓለም ቤተ/ክርስቲያን ፊት ለፊት\xa0 #ዛም_ሞል 2ኛ ፎቅ ቢሮ ቁጥር.214\n\n👍ለቡ\xa0ቅርንጫፍ📲0973611819\n\n\n\n\xa0\xa0\xa0\xa0 💧💧💧💧\n\n\n\xa0\xa0\xa0 📲 0909522840\n\xa0\xa0\xa0 📲 0923350054\n\n🔖\n💬\xa0 በTelegram ለማዘዝ ⤵️ ይጠቀሙ\n@shager_onlinestore\n\xa0 \nለተጨማሪ ማብራሪያ የቴሌግራም ገፃችን⤵️\nhttps://t.me/Shageronlinestore'

In [23]:
df

Unnamed: 0,Channel Title,Channel Username,ID,Message,Date,Media Path
5,Sheger online-store,@Shageronlinestore,5328,💥3pcs silicon brush spatulas\n\n⚡እስከ 260°c ሙቀት...,2024-09-20 11:50:02+00:00,photos/@Shageronlinestore_5328.jpg
6,Sheger online-store,@Shageronlinestore,5327,💥Mandoline Slicer\n\n👉 ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n👉 ...,2024-09-20 08:11:40+00:00,
7,Sheger online-store,@Shageronlinestore,5326,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:23:18+00:00,
8,Sheger online-store,@Shageronlinestore,5325,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:21:14+00:00,photos/@Shageronlinestore_5325.jpg
10,Sheger online-store,@Shageronlinestore,5323,"💥Only baby 3in1 double bottle milk warmer,ster...",2024-09-19 13:54:46+00:00,photos/@Shageronlinestore_5323.jpg
...,...,...,...,...,...,...
5009,Sheger online-store,@Shageronlinestore,12,🎯 Kitchen Sticker\n\nለኪችንዎ ውበት እጅግ ተመራጭ \n🔰ውሀ ...,2021-04-27 05:58:59+00:00,photos/@Shageronlinestore_12.jpg
5010,Sheger online-store,@Shageronlinestore,10,🎯 3in1 One Step Hair Dryer & Styler \n\n👉 ከርል ...,2021-04-27 05:57:12+00:00,photos/@Shageronlinestore_10.jpg
5011,Sheger online-store,@Shageronlinestore,9,✅ Home GYM - X5 slimming vibrator \n\n📢📢📢 ታላቅ ...,2021-04-27 05:45:57+00:00,photos/@Shageronlinestore_9.jpg
5012,Sheger online-store,@Shageronlinestore,4,ለጤናችን-Health & Personal Care\n\n📍FingerTip Pul...,2021-04-12 08:36:40+00:00,photos/@Shageronlinestore_4.jpg


In [27]:
df.to_csv('clean_data.csv')

In [93]:
# Function to check if a string contains Amharic characters
def is_amharic(message):
    return bool(re.search(r'[\u1200-\u137F]', message))



In [94]:
# Function to classify messages
def classify_message(message):
    if pd.isna(message):  # Check for NaN or None
        return 'uncategorized'
    
    if is_amharic(message):
        for category, keywords in categories.items():
            if any(keyword in message for keyword in keywords):
                return category
    else:
        for category, keywords in categories.items():
            if any(keyword in message.lower() for keyword in keywords):
                return category
    return 'uncategorized'



In [95]:
# Apply classification to the Message column
df['Category'] = df['Message'].apply(classify_message)

# Display the updated DataFrame with categories
print(df[['Message', 'Category']])

                                                Message   Category
5     💥3pcs silicon brush spatulas\n\n⚡እስከ 260°c ሙቀት...  household
6     💥Mandoline Slicer\n\n👉 ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n👉  ...  household
7     💥Table Desk Edge Guard Strip\n       💯 High Qu...        men
8     💥Table Desk Edge Guard Strip\n       💯 High Qu...        men
10    💥Only baby 3in1 double bottle milk warmer,ster...       kids
...                                                 ...        ...
5009  🎯 Kitchen Sticker\n\nለኪችንዎ ውበት እጅግ ተመራጭ \n🔰ውሀ ...  household
5010  🎯 3in1 One Step Hair Dryer & Styler \n\n👉 ከርል ...      women
5011  ✅ Home GYM - X5 slimming vibrator \n\n📢📢📢 ታላቅ ...      sport
5012  ለጤናችን-Health & Personal Care\n\n📍FingerTip Pul...       kids
5013  #Finger_tip_pulse_oximeter\n       #በተመጣጣኝ_ዋጋ\...     health

[3166 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Category'] = df['Message'].apply(classify_message)


In [96]:
# Display counts of unique values in the Category column
category_counts = df['Category'].value_counts()
print(category_counts)


Category
uncategorized    1337
household         821
kids              473
men               174
groceries         152
health             70
women              50
sport              45
accessories        44
Name: count, dtype: int64


In [97]:
df.head(100)

Unnamed: 0,Channel Title,Channel Username,ID,Message,Date,Media Path,Category
5,Sheger online-store,@Shageronlinestore,5328,💥3pcs silicon brush spatulas\n\n⚡እስከ 260°c ሙቀት...,2024-09-20 11:50:02+00:00,photos/@Shageronlinestore_5328.jpg,household
6,Sheger online-store,@Shageronlinestore,5327,💥Mandoline Slicer\n\n👉 ጊዜ ቆጣቢ ስላይስ ማድረጊያ \n👉 ...,2024-09-20 08:11:40+00:00,,household
7,Sheger online-store,@Shageronlinestore,5326,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:23:18+00:00,,men
8,Sheger online-store,@Shageronlinestore,5325,💥Table Desk Edge Guard Strip\n 💯 High Qu...,2024-09-20 05:21:14+00:00,photos/@Shageronlinestore_5325.jpg,men
10,Sheger online-store,@Shageronlinestore,5323,"💥Only baby 3in1 double bottle milk warmer,ster...",2024-09-19 13:54:46+00:00,photos/@Shageronlinestore_5323.jpg,kids
...,...,...,...,...,...,...,...
181,Sheger online-store,@Shageronlinestore,5142,🌟WINNING STAR® 2in1 MULTIFUNCTIONAL BLENDER\n\...,2024-08-29 09:12:06+00:00,photos/@Shageronlinestore_5142.jpg,kids
182,Sheger online-store,@Shageronlinestore,5141,🥂3.6L Glass dispenser jar with Bamboo stand\n\...,2024-08-29 06:02:13+00:00,,household
183,Sheger online-store,@Shageronlinestore,5140,🥂3.6L Glass dispenser jar with Bamboo stand\n\...,2024-08-29 06:01:02+00:00,photos/@Shageronlinestore_5140.jpg,household
184,Sheger online-store,@Shageronlinestore,5139,💥44CM HAOCHU® CERAMIC PIZZA PAN\n\n⚡️ለቤትና ለሬስቶ...,2024-08-28 18:38:44+00:00,photos/@Shageronlinestore_5139.jpg,household


In [98]:
# from wordcloud import WordCloud
# import matplotlib.pyplot as plt
# # Filter for uncategorized items
# uncategorized_items = df[df['Category'] == 'uncategorized']

# # Combine all messages into a single string
# text = ' '.join(uncategorized_items['Message'])

# # Generate the word cloud
# wordcloud = WordCloud(width=800, height=400, background_color='white', 
#                       colormap='viridis', max_words=200).generate(text)

# # Display the word cloud
# plt.figure(figsize=(10, 5))
# plt.imshow(wordcloud, interpolation='bilinear')
# plt.axis('off')  # Turn off the axis
# plt.title('Word Cloud of Messages for Uncategorized Items')
# plt.show()

In [99]:
uncategorized_items = df[df['Category'] == 'uncategorized']
uncategorized_items.head(100)

Unnamed: 0,Channel Title,Channel Username,ID,Message,Date,Media Path,Category
47,Sheger online-store,@Shageronlinestore,5284,⭐️💫እንኳን ለመውሊድ በዓል በሰላም አደረሰዎ\n\nመልካም በዓል,2024-09-15 08:53:29+00:00,photos/@Shageronlinestore_5284.jpg,uncategorized
48,Sheger online-store,@Shageronlinestore,5283,💥 ውድ ደንበኞቻችን ሱቃችን ዛሬ እሁድ ከ5:00-9:00 ስዓት ክፍት መ...,2024-09-15 07:06:34+00:00,photos/@Shageronlinestore_5283.jpg,uncategorized
72,Sheger online-store,@Shageronlinestore,5256,💥ውስን ፍሬ የቀሩን ዕቃዎች\n🌼🌼.................🌼🌼\n\n✨ፈ...,2024-09-10 05:31:03+00:00,photos/@Shageronlinestore_5256.jpg,uncategorized
91,Sheger online-store,@Shageronlinestore,5237,🌼🌼........................🌼🌼\n💥በረፍት ቀንዎ ሱቅ ላይ ...,2024-09-07 16:58:25+00:00,photos/@Shageronlinestore_5237.jpg,uncategorized
120,Sheger online-store,@Shageronlinestore,5206,🌼🌼........................🌼🌼\n💥Ethereum® Washi...,2024-09-05 08:07:01+00:00,photos/@Shageronlinestore_5206.jpg,uncategorized
...,...,...,...,...,...,...,...
628,Sheger online-store,@Shageronlinestore,4654,💥Painless hair eraser\n\n ዋጋ-500ብር\n\n🏢 ...,2024-07-04 07:53:00+00:00,,uncategorized
633,Sheger online-store,@Shageronlinestore,4649,👶Kids knitted winter warm hat with scarf🧣\n\n⚡...,2024-07-03 14:43:54+00:00,photos/@Shageronlinestore_4649.jpg,uncategorized
642,Sheger online-store,@Shageronlinestore,4640,❇️ Wall Mounted Phone Holder Charging Stand La...,2024-07-03 07:43:48+00:00,,uncategorized
643,Sheger online-store,@Shageronlinestore,4639,❇️ Wall Mounted Phone Holder Charging Stand La...,2024-07-03 07:38:21+00:00,photos/@Shageronlinestore_4639.jpg,uncategorized


In [100]:
df.to_csv('labeled_data.csv')

In [101]:
uncategorized_items.to_csv('uncategorized_data.csv')