In [1]:
import re
import pandas as pd
import fasttext

from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("data/ecommerceDataset.csv", header=None)

In [3]:
data.head(5)

Unnamed: 0,0,1
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...


In [4]:
ecommerce_data = data.copy()

In [5]:
ecommerce_data.columns = ["Label", "Text"]

In [6]:
ecommerce_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50425 entries, 0 to 50424
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Label   50425 non-null  object
 1   Text    50424 non-null  object
dtypes: object(2)
memory usage: 788.0+ KB


In [7]:
ecommerce_data.dropna(inplace=True)

In [8]:
ecommerce_data.duplicated().sum()

22622

In [9]:
ecommerce_data.drop_duplicates(inplace=True)

In [11]:
ecommerce_data["Label"].value_counts()

Label
Household                 10564
Books                      6256
Clothing & Accessories     5674
Electronics                5308
Name: count, dtype: int64

In [12]:
ecommerce_data["Label"].replace("Clothing & Accessories", "Clothing_Accessories" , inplace=True)

In [13]:
ecommerce_data["Label"].unique()

array(['Household', 'Books', 'Clothing_Accessories', 'Electronics'],
      dtype=object)

In [14]:
ecommerce_data["Label"] = "__label__" + ecommerce_data["Label"].astype("str")
ecommerce_data.head(3)

Unnamed: 0,Label,Text
0,__label__Household,Paper Plane Design Framed Wall Hanging Motivat...
1,__label__Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,__label__Household,SAF 'UV Textured Modern Art Print Framed' Pain...


In [15]:
ecommerce_data["label_text"] = ecommerce_data["Label"] + " " + ecommerce_data["Text"]

In [16]:
ecommerce_data.head()

Unnamed: 0,Label,Text,label_text
0,__label__Household,Paper Plane Design Framed Wall Hanging Motivat...,__label__Household Paper Plane Design Framed W...
1,__label__Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ...",__label__Household SAF 'Floral' Framed Paintin...
2,__label__Household,SAF 'UV Textured Modern Art Print Framed' Pain...,__label__Household SAF 'UV Textured Modern Art...
3,__label__Household,"SAF Flower Print Framed Painting (Synthetic, 1...",__label__Household SAF Flower Print Framed Pai...
4,__label__Household,Incredible Gifts India Wooden Happy Birthday U...,__label__Household Incredible Gifts India Wood...


In [18]:
text = ecommerce_data["Text"][0]

In [21]:
def preprocess(text):
    text = re.sub(r"[^\w\s]", " ", text)
    text = re.sub(r" +", " ", text)
    return text.strip().lower()

In [22]:
preprocess(text)

'paper plane design framed wall hanging motivational office decor art prints 8 7 x 8 7 inch set of 4 painting made up in synthetic frame with uv textured print which gives multi effects and attracts towards it this is an special series of paintings which makes your wall very beautiful and gives a royal touch this painting is ready to hang you would be proud to possess this unique painting that is a niche apart we use only the most modern and efficient printing technology on our prints with only the and inks and precision epson roland and hp printers this innovative hd printing technique results in durable and spectacular looking prints of the highest that last a lifetime we print solely with top notch 100 inks to achieve brilliant and true colours due to their high level of uv resistance our prints retain their beautiful colours for many years add colour and style to your living space with this digitally printed painting some are for pleasure and some for eternal bliss so bring home th

In [23]:
ecommerce_data["label_text"] = ecommerce_data["label_text"].map(preprocess)

In [26]:
ecommerce_data["label_text"][1]

'__label__household saf floral framed painting wood 30 inch x 10 inch special effect uv print textured sao297 painting made up in synthetic frame with uv textured print which gives multi effects and attracts towards it this is an special series of paintings which makes your wall very beautiful and gives a royal touch a perfect gift for your special ones'

In [27]:
train, test = train_test_split(ecommerce_data, test_size=0.2, random_state=42, stratify=ecommerce_data.Label)

In [28]:
train.shape, test.shape

((22241, 3), (5561, 3))

In [30]:
train.head()

Unnamed: 0,Label,Text,label_text
44940,__label__Electronics,Zaptin 2nd Gen MP4 Player with Voice Recorder/...,__label__electronics zaptin 2nd gen mp4 player...
6752,__label__Household,"Hokipo Pvc Dining Table Kitchen Placemats, 45 ...",__label__household hokipo pvc dining table kit...
26448,__label__Books,IIMA - Strategies for Future: Understanding In...,__label__books iima strategies for future unde...
31538,__label__Clothing_Accessories,"LUXCY Girls' Panties (Multi-Coloured, Pack of ...",__label__clothing_accessories luxcy girls pant...
15855,__label__Household,iRobot 300 Series Braava 390t Floor Mopping Ro...,__label__household irobot 300 series braava 39...


In [None]:
train.to_csv("data/ecommerce.train", columns=["label_text"], index=False, header=False)
test.to_csv("data/ecommerce.test", columns=["label_text"], index=False, header=False)

In [33]:
model = fasttext.train_supervised(input="data/ecommerce.train")

In [34]:
model.test("data/ecommerce.test")

(5561, 0.9458730444164719, 0.9458730444164719)

In [36]:
train.Text[2]

"SAF 'UV Textured Modern Art Print Framed' Painting (Synthetic, 35 cm x 50 cm x 3 cm, Set of 3) Color:Multicolor                                                                                \xa0|\xa0                           Size:35 cm x 50 cm x 3 cm   Overview a beautiful painting involves the action or skill of using paint in the right manner; hence, the end product will be a picture that can speak a thousand words they say. Arts have been in trend for quite some time now. It can give different viewer different meanings style and design the SAF wood matte abstract painting with frame is quite abstract and mysteriously beautiful. The painting has a nice frame to it. You can gift this to a family or a friend. The painting has various forms of certain figures on it as seen in the image. You can add a good set of lights to the place where the painting is and the decor will give a different feel and look to the place. Quality and durability the painting has a matte finish and includes 

In [37]:
model.predict("SAF 'UV Textured Modern Art Print Framed' Painting (Synthetic, 35 cm x 50 cm x 3 cm, Set of 3) Color:Multicolor                                                                                \xa0|\xa0                           Size:35 cm x 50 cm x 3 cm   Overview a beautiful painting involves the action or skill of using paint in the right manner; hence, the end product will be a picture that can speak a thousand words they say. Arts have been in trend for quite some time now. It can give different viewer different meanings style and design the SAF wood matte abstract painting with frame is quite abstract and mysteriously beautiful. The painting has a nice frame to it. You can gift this to a family or a friend. The painting has various forms of certain figures on it as seen in the image. You can add a good set of lights to the place where the painting is and the decor will give a different feel and look to the place. Quality and durability the painting has a matte finish and includes a good quality frame and will last for a long period. However, it does not include glass along with the frame. Specifications you can purchase SAF wood matte abstract painting with frame on amazon.in. It is the most customer-friendly platform with a wide range of products to choose from, and shopping is just a click away!")

(('__label__household',), array([0.94893509]))

In [38]:
model.predict("wintech assemble desktop pc cpu 500 gb sata hdd 4 gb ram intel c2d processor 3")

(('__label__electronics',), array([0.99833411]))

In [41]:
model.get_nearest_neighbors("apple")

[(0.9858565330505371, 'top'),
 (0.9831443428993225, 'lenses'),
 (0.9827314019203186, 'buttons'),
 (0.9803778529167175, 'contrast'),
 (0.9789760708808899, '22'),
 (0.9780031442642212, 'n'),
 (0.9768291115760803, 'cross'),
 (0.9744910001754761, 'spf'),
 (0.974184513092041, 'w2_d3'),
 (0.9710919260978699, 'soundboss')]