# Importing Libraries and Data

In [1]:
%matplotlib inline
import numba

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
from IPython.display import display

from nltk.tokenize import word_tokenize, RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import FreqDist
from nltk.corpus import wordnet as wn

import string



In [2]:
attributes_raw = pd.read_csv('attributes.csv')
product_desc_raw = pd.read_csv('product_descriptions.csv')
test_raw = pd.read_csv('test.csv')
train_raw = pd.read_csv('train.csv')

In [3]:
attributes_raw.head(20)

Unnamed: 0,product_uid,name,value
0,100001.0,Bullet01,Versatile connector for various 90° connection...
1,100001.0,Bullet02,Stronger than angled nailing or screw fastenin...
2,100001.0,Bullet03,Help ensure joints are consistently straight a...
3,100001.0,Bullet04,Dimensions: 3 in. x 3 in. x 1-1/2 in.
4,100001.0,Bullet05,Made from 12-Gauge steel
5,100001.0,Bullet06,Galvanized for extra corrosion resistance
6,100001.0,Bullet07,Install with 10d common nails or #9 x 1-1/2 in...
7,100001.0,Gauge,12
8,100001.0,Material,Galvanized Steel
9,100001.0,MFG Brand Name,Simpson Strong-Tie


In [4]:
attributes_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2044803 entries, 0 to 2044802
Data columns (total 3 columns):
product_uid    float64
name           object
value          object
dtypes: float64(1), object(2)
memory usage: 46.8+ MB


In [5]:
product_desc_raw.head(20)

Unnamed: 0,product_uid,product_description
0,100001,"Not only do angles make joints stronger, they ..."
1,100002,BEHR Premium Textured DECKOVER is an innovativ...
2,100003,Classic architecture meets contemporary design...
3,100004,The Grape Solar 265-Watt Polycrystalline PV So...
4,100005,Update your bathroom with the Delta Vero Singl...
5,100006,Achieving delicious results is almost effortle...
6,100007,The Quantum Adjustable 2-Light LED Black Emerg...
7,100008,The Teks #10 x 1-1/2 in. Zinc-Plated Steel Was...
8,100009,Get the House of Fara 3/4 in. x 3 in. x 8 ft. ...
9,100010,Valley View Industries Metal Stakes (4-Pack) a...


In [6]:
product_desc_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124428 entries, 0 to 124427
Data columns (total 2 columns):
product_uid            124428 non-null int64
product_description    124428 non-null object
dtypes: int64(1), object(1)
memory usage: 1.9+ MB


In [7]:
train_raw.head(20)

Unnamed: 0,id,product_uid,product_title,search_term,relevance
0,2,100001,Simpson Strong-Tie 12-Gauge Angle,angle bracket,3.0
1,3,100001,Simpson Strong-Tie 12-Gauge Angle,l bracket,2.5
2,9,100002,BEHR Premium Textured DeckOver 1-gal. #SC-141 ...,deck over,3.0
3,16,100005,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,rain shower head,2.33
4,17,100005,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,shower only faucet,2.67
5,18,100006,Whirlpool 1.9 cu. ft. Over the Range Convectio...,convection otr,3.0
6,20,100006,Whirlpool 1.9 cu. ft. Over the Range Convectio...,microwave over stove,2.67
7,21,100006,Whirlpool 1.9 cu. ft. Over the Range Convectio...,microwaves,3.0
8,23,100007,Lithonia Lighting Quantum 2-Light Black LED Em...,emergency light,2.67
9,27,100009,House of Fara 3/4 in. x 3 in. x 8 ft. MDF Flut...,mdf 3/4,3.0


In [8]:
train_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74067 entries, 0 to 74066
Data columns (total 5 columns):
id               74067 non-null int64
product_uid      74067 non-null int64
product_title    74067 non-null object
search_term      74067 non-null object
relevance        74067 non-null float64
dtypes: float64(1), int64(2), object(2)
memory usage: 2.8+ MB


In [9]:
test_raw.head(20)

Unnamed: 0,id,product_uid,product_title,search_term
0,1,100001,Simpson Strong-Tie 12-Gauge Angle,90 degree bracket
1,4,100001,Simpson Strong-Tie 12-Gauge Angle,metal l brackets
2,5,100001,Simpson Strong-Tie 12-Gauge Angle,simpson sku able
3,6,100001,Simpson Strong-Tie 12-Gauge Angle,simpson strong ties
4,7,100001,Simpson Strong-Tie 12-Gauge Angle,simpson strong tie hcc668
5,8,100001,Simpson Strong-Tie 12-Gauge Angle,wood connectors
6,10,100003,STERLING Ensemble 33-1/4 in. x 60 in. x 75-1/4...,bath and shower kit
7,11,100003,STERLING Ensemble 33-1/4 in. x 60 in. x 75-1/4...,bath drain kit
8,12,100003,STERLING Ensemble 33-1/4 in. x 60 in. x 75-1/4...,one piece tub shower
9,13,100004,Grape Solar 265-Watt Polycrystalline Solar Pan...,solar panel


# Data Wrangling

In [10]:
train_merged = train_raw.merge(product_desc_raw, on='product_uid', how='left')
train_merged.tail(20)

Unnamed: 0,id,product_uid,product_title,search_term,relevance,product_description
74047,221413,206601,3M Tekk Protection White Vented Hard Hat with ...,hard hat with mining,1.67,3M TEKK Protection Vented Pro Ratchet Hard Hat...
74048,221415,206602,Hy-Lite Glass Block Fixed Vinyl Windows Driftw...,replace a broken glass in a vinyl window,2.0,Excellent privacy and decorative solution for ...
74049,221416,206603,Westinghouse 3 ft. Oil Rubbed Bronze Beaded Ch...,ceiling fan with chain cord,1.0,This Westinghouse Lighting Oil Rubbed Bronze B...
74050,221419,206606,BEHR Premium Plus #ICC-101 Florentine Clay Zer...,florentine clay,2.33,"BEHR Premium Plus Zero VOC, Self-Priming Inter..."
74051,221420,206607,Home Legend Palace Oak Light 3/4 in. Thick x 3...,trim a home lights,2.0,Quarter round is typically placed in front of ...
74052,221422,206609,Whitehaus Collection Isabella Wall-Mounted Bat...,whitehaus bathroom sinl,2.33,Beautify your bathroom or powder room with a W...
74053,221423,206610,Champion Power Equipment 6.5 HP Gas-Powered 3 ...,6.5 hp gas generator,2.33,The Champion Power Equipment 3 in. semi-trash ...
74054,221426,206613,Everbilt 4 in. White Wall Guard,splash guard for wall,2.0,The Everbilt 4 in. Wall Guard helps prevent da...
74055,221427,206614,Daltile Villa Valleta Calais Springs 18 in. x ...,spicewood springs floor tile,1.67,This dal-tile villa valleta calais springs por...
74056,221432,206619,Home Styles Stone Harbor 51 in. Round 7-Piece ...,laguna porcelin tile,2.0,Stone Harbor 7-Piece Dining Set includes Large...


In [11]:
def remove_non_ascii(s):
    printable = set(string.printable)
    return filter(lambda x: x in printable, s)

In [12]:
train_merged['product_title'] = train_merged['product_title'].apply(remove_non_ascii)
train_merged['product_description'] = train_merged['product_description'].apply(remove_non_ascii)
train_merged['search_term'] = train_merged['search_term'].apply(remove_non_ascii)

In [13]:
df = train_merged[train_merged['id']==1060]
print df

       id  product_uid                                      product_title  \
323  1060       100179  Paslode 3 in. x 0.120-Gauge 30 Galvanized Ring...   

                  search_term  relevance  \
323  galvanized framing nails       2.67   

                                   product_description  
323  For exterior applications like framing of sill...  


## try to find the similarity among the search term and the description and product title

### experiment on the single row:

In [50]:
texts = [train_merged.iloc[1,2].lower(), train_merged.iloc[1,3].lower(), train_merged.iloc[1,5].lower()]
texts

['simpson strong-tie 12-gauge angle',
 'l bracket',
 'not only do angles make joints stronger, they also provide more consistent, straight corners. simpson strong-tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. some can be bent (skewed) to match the project. for outdoor projects or those where moisture is present, use our zmax zinc-coated connectors, which provide extra resistance against corrosion (look for a "z" at the end of the model number).versatile connector for various 90 connections and home repair projectsstronger than angled nailing or screw fastening alonehelp ensure joints are consistently straight and strongdimensions: 3 in. x 3 in. x 1-1/2 in.made from 12-gauge steelgalvanized for extra corrosion resistanceinstall with 10d common nails or #9 x 1-1/2 in. strong-drive sd screws']

#### extract the text, tokenize the sentences and clean up the text

In [51]:
from nltk.tokenize import word_tokenize, RegexpTokenizer

In [52]:
#tokenize and remove punctuation
tokenizer = RegexpTokenizer(r'\w+')
texts_t = [tokenizer.tokenize(t) for t in texts]

In [53]:
#remove stopwords
from nltk.corpus import stopwords

def remove_stopwords(text):
    return [word for word in text if word not in stopwords.words('english')]
    
texts_t = map(remove_stopwords, texts_t)

In [54]:
#remove suffix of the words
from nltk.stem.wordnet import WordNetLemmatizer
def get_words_stem(tokenized_text):
    lemmatizer = WordNetLemmatizer()
    return map(lemmatizer.lemmatize, tokenized_text)

texts_t = map(get_words_stem, texts_t)

#### use freqdist() to check the frequencies of each word and compare it with the search term

In [55]:
from nltk import FreqDist

In [56]:
def get_freq_in_text(text, word):
#     print text
    freq = FreqDist(text)
    return freq[word]

In [57]:
texts_t

[['simpson', 'strong', 'tie', '12', 'gauge', 'angle'],
 ['l', 'bracket'],
 [u'angle',
  'make',
  u'joint',
  'stronger',
  'also',
  'provide',
  'consistent',
  'straight',
  u'corner',
  'simpson',
  'strong',
  'tie',
  u'offer',
  'wide',
  'variety',
  u'angle',
  'various',
  u'size',
  u'thickness',
  'handle',
  'light',
  'duty',
  u'job',
  u'project',
  'structural',
  'connection',
  'needed',
  'bent',
  'skewed',
  'match',
  'project',
  'outdoor',
  u'project',
  'moisture',
  'present',
  'use',
  'zmax',
  'zinc',
  'coated',
  u'connector',
  'provide',
  'extra',
  'resistance',
  'corrosion',
  'look',
  'z',
  'end',
  'model',
  'number',
  'versatile',
  'connector',
  'various',
  '90',
  u'connection',
  'home',
  'repair',
  'projectsstronger',
  'angled',
  'nailing',
  'screw',
  'fastening',
  'alonehelp',
  'ensure',
  u'joint',
  'consistently',
  'straight',
  'strongdimensions',
  '3',
  'x',
  '3',
  'x',
  '1',
  '1',
  '2',
  'made',
  '12',
  'gau

In [58]:
for word in texts_t[1]:
    print word
    print 'freq in title: ', get_freq_in_text(texts_t[0], word)
    print 'freq in desc: ', get_freq_in_text(texts_t[2], word)

l
freq in title:  0
freq in desc:  0
bracket
freq in title:  0
freq in desc:  0


#### use synsets module to check simlilarity

 ##### method:
* text_words
    * word
    * word
    * word
    * word
        * synset1 <-loop through each synset
        * synset2
        * synset3
        
 compared to:
    * ref_word
        * synset1
        * synset2
 
 find the max similarity between eg word:synset1 and ref_word:synset2
 
 append this simliarity into word's syn_sims
 
 find the max similarity between word:synset2 and the ref_word's synsets
 ...
 until each synset in the word has found the max similarity to the ref_word
 
 then return the max value of the word's syn_sims list to represent the similarity of the word to the ref_word
 

In [129]:
kw = texts_t[1][1]
kw

'bracket'

In [130]:
kw_syn = wn.synsets(kw)
kw_syn

[Synset('bracket.n.01'),
 Synset('bracket.n.02'),
 Synset('bracket.n.03'),
 Synset('bracket.n.04'),
 Synset('bracket.v.01'),
 Synset('bracket.v.02'),
 Synset('bracket.v.03')]

In [131]:
def compare_synsets(synsets1, synsets2):
    comparisons = [syn1.path_similarity(syn2) for syn2 in synsets2 for syn1 in synsets1]
    comparisons = [v for v in comparisons if v is not None]
    return sum(comparisons)

In [132]:
def get_synsets(word):
    return wn.synsets(word)

In [133]:
def check_similarity_word_words(ref_word_synsets, words):
    synsets_of_all_words = [wn.synsets(word) for word in words]
#     print synsets_of_all_words
    sim_word_to_word  = [compare_synsets(ref_word_synsets, synsets) for synsets in synsets_of_all_words]
    return sum(sim_word_to_word)

In [134]:
print kw, kw_syn, texts_t[0]
sim_kw_title = check_similarity_word_words(kw_syn, texts_t[0])
kw, sim_kw_title

bracket [Synset('bracket.n.01'), Synset('bracket.n.02'), Synset('bracket.n.03'), Synset('bracket.n.04'), Synset('bracket.v.01'), Synset('bracket.v.02'), Synset('bracket.v.03')] ['simpson', 'strong', 'tie', '12', 'gauge', 'angle']


('bracket', 25.43110843404961)

* check similarity between search key word and the product description

In [156]:
sim_kw_desc = check_similarity_word_list(kw_syn, texts_t[2])
kw, sim_kw_desc

sim: 0.0833333333333
sim: 0.0769230769231
sim: 0.0714285714286
sim: None
sim: None
sim: None
sim: None
sim: None
sim: 0.0666666666667
sim: 0.0555555555556
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: 0.0769230769231
sim: 0.0588235294118
sim: 0.0909090909091
sim: 0.0666666666667
sim: 0.0625
sim: 0.0588235294118
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: None
sim: Non

('l',
 {'1': 0.2,
  '12': 0.3333333333333333,
  '2': 0.2,
  '3': 0.2,
  '9': 0.2,
  '90': 0.3333333333333333,
  'also': None,
  u'angle': 0.08333333333333333,
  'angled': None,
  'bent': 0.08333333333333333,
  'coated': None,
  'common': 0.06666666666666667,
  'connection': 0.1111111111111111,
  u'connector': 0.07142857142857142,
  'consistent': None,
  'consistently': None,
  u'corner': 0.08333333333333333,
  'corrosion': 0.07692307692307693,
  'drive': 0.1,
  'duty': 0.08333333333333333,
  'end': 0.125,
  'ensure': None,
  'extra': 0.07692307692307693,
  'fastening': 0.06666666666666667,
  'gauge': 0.1111111111111111,
  'handle': 0.07692307692307693,
  'home': 0.08333333333333333,
  u'job': 0.08333333333333333,
  u'joint': 0.09090909090909091,
  'light': 0.125,
  'look': 0.09090909090909091,
  'made': None,
  'make': 0.06666666666666667,
  'match': 0.16666666666666666,
  'model': 0.08333333333333333,
  'moisture': 0.08333333333333333,
  u'nail': 0.16666666666666666,
  'nailing': None

* mean values of the similarities

In [157]:
def cal_similarities_mean(similarities_list):
    sims = [v for k,v in similarities_list.iteritems()]
    #drop na
    sims = np.array([e for e in sims if e != None])
    return sims.mean()

In [158]:
sim_kw_title_mean =cal_similarities_mean(sim_kw_title)
print sim_kw_title_mean

sim_kw_desc_mean = cal_similarities_mean(sim_kw_desc)
print sim_kw_desc_mean

0.137070707071
0.123636759482


### run on all rows

In [155]:
def remove_stopwords(text):
    return np.array([word for word in text if word not in stopwords.words('english')])

def get_words_stem(tokenized_text):
    lemmatizer = WordNetLemmatizer()
    return map(lemmatizer.lemmatize, tokenized_text)

def compare_synsets(synsets1, synsets2):
    comparisons = [syn1.path_similarity(syn2) for syn2 in synsets2 for syn1 in synsets1]
    comparisons = [v for v in comparisons if v is not None]
    if len(comparisons) > 0:
        return max(comparisons)
    else:
        return 0

def get_synsets(word):
    return wn.synsets(word) 

def check_similarity_word_words(ref_word_synsets, words):
    synsets_of_all_words = [wn.synsets(word) for word in words]
#     print synsets_of_all_words
    sim_word_to_word  = [compare_synsets(ref_word_synsets, synsets) for synsets in synsets_of_all_words]
    return max(sim_word_to_word)



In [156]:
def find_search_similarity_title_desc(row,title_col_name,search_col_name, desc_col_name, mode):
    print 'row id: ', row.id
    texts = np.array([row[title_col_name],row[search_col_name],row[desc_col_name] ])
#     print texts

    #tokenize and remove punctuation
    tokenizer = RegexpTokenizer(r'\w+')
    texts_t = np.array([tokenizer.tokenize(t) for t in texts])
    
    #remove stopwords
    texts_t = map(remove_stopwords, texts_t)
    
    #remove suffix of the words
    texts_t = map(get_words_stem, texts_t)
    
    sim_kw_title_mean_all = {}
    sim_kw_desc_mean_all = {}
    
    for kw in texts_t[1]:
        print 'keyword: ', kw
        #get the synsets of the keyword
        kw_syn = wn.synsets(kw)
        #get the similarity matrix of kw:product_title
        sim_kw_title_mean = check_similarity_word_words(kw_syn, texts_t[0])
        #get the similarity matrix of kw:product_description
        sim_kw_desc_mean = check_similarity_word_words(kw_syn, texts_t[2])
    
        sim_kw_title_mean_all[kw] = sim_kw_title_mean
        sim_kw_desc_mean_all[kw] = sim_kw_desc_mean
    
    
    sim_title_mean_np = np.array(sim_kw_title_mean_all.values())
    sim_desc_mean_np = np.array(sim_kw_desc_mean_all.values())
    
    sim_title_mean_val = np.mean(sim_title_mean_np)
    sim_desc_mean_val = np.mean(sim_desc_mean_np)
    
    print 'sim means: ', sim_title_mean_val, sim_desc_mean_val
    if mode =='title':
        print 'return: ', sim_title_mean_val
        return sim_title_mean_val
    elif mode == 'desc':
        print 'return: ', sim_desc_mean_val
        return sim_desc_mean_val
    elif mode == 'avg':
        print 'return: ', (sim_title_mean_val + sim_desc_mean_val)/2
        return (sim_title_mean_val + sim_desc_mean_val)/2

In [157]:
train_merged_sub = train_merged.iloc[0:10, :]
train_merged_sub.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Data columns (total 6 columns):
id                     10 non-null int64
product_uid            10 non-null int64
product_title          10 non-null object
search_term            10 non-null object
relevance              10 non-null float64
product_description    10 non-null object
dtypes: float64(1), int64(2), object(3)
memory usage: 560.0+ bytes


In [158]:
%timeit train_merged_sub['sim_title'] = train_merged_sub[['id','product_title', 'product_description', 'search_term']].apply(lambda row: find_search_similarity_title_desc(row, 'product_title','search_term', 'product_description', 'title'), axis=1)

row id:  2
keyword:  angle
keyword:  bracket
sime means:  0.625 0.625
return:  0.625
row id:  3
keyword:  l
keyword:  bracket
sime means:  0.291666666667 0.291666666667
return:  0.291666666667
row id:  9
keyword:  deck
sime means:  0.25 1.0
return:  0.25
row id:  16
keyword:  rain
keyword:  shower
keyword:  head
sime means:  0.611111111111 0.666666666667
return:  0.611111111111
row id:  17
keyword:  shower
keyword:  faucet
sime means:  1.0 1.0
return:  1.0
row id:  18
keyword:  convection
keyword:  otr
sime means:  0.5 0.5
return:  0.5
row id:  20
keyword:  microwave
keyword:  stove
sime means:  1.0 1.0
return:  1.0
row id:  21
keyword:  microwave
sime means:  1.0 1.0
return:  1.0
row id:  23
keyword:  emergency
keyword:  light
sime means:  1.0 1.0
return:  1.0
row id:  27
keyword:  mdf
keyword:  3
keyword:  4
sime means:  0.666666666667 0.666666666667
return:  0.666666666667
row id:  2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


keyword:  angle
keyword:  bracket
sime means:  0.625 0.625
return:  0.625
row id:  3
keyword:  l
keyword:  bracket
sime means:  0.291666666667 0.291666666667
return:  0.291666666667
row id:  9
keyword:  deck
sime means:  0.25 1.0
return:  0.25
row id:  16
keyword:  rain
keyword:  shower
keyword:  head
sime means:  0.611111111111 0.666666666667
return:  0.611111111111
row id:  17
keyword:  shower
keyword:  faucet
sime means:  1.0 1.0
return:  1.0
row id:  18
keyword:  convection
keyword:  otr
sime means:  0.5 0.5
return:  0.5
row id:  20
keyword:  microwave
keyword:  stove
sime means:  1.0 1.0
return:  1.0
row id:  21
keyword:  microwave
sime means:  1.0 1.0
return:  1.0
row id:  23
keyword:  emergency
keyword:  light
sime means:  1.0 1.0
return:  1.0
row id:  27
keyword:  mdf
keyword:  3
keyword:  4
sime means:  0.666666666667 0.666666666667
return:  0.666666666667
row id:  2
keyword:  angle
keyword:  bracket
sime means:  0.625 0.625
return:  0.625
row id:  3
keyword:  l
keyword:  brac

In [19]:
train_merged_sub['sim_desc'] = train_merged_sub.apply(lambda row: find_search_similarity_title_desc(row, 'product_title','search_term', 'product_description', 'desc'), axis=1)

63857
fiberglass
shingle
0.235416666667 0.175254629004
return:  0.175254629004
142524
floor
dust
cloth
0.222577628133 0.210276898653
return:  0.210276898653
189368
2amps
250v
fuse
0.0777777777778 0.0649816133687
return:  0.0649816133687
121495
size
louvered
closet
door
nan nan
return:  nan
126941
55
gallon
trash
bag
0.42837383576 0.402804516542
return:  0.402804516542
58203
range
top
microwave
ge
advantium
0.204286616162 0.171142669258
return:  0.171142669258
165452
square
bannister
0.280439814815 0.197354415861
return:  0.197354415861
151541
100
watt
incandescent
0.46962832692 0.43173016621
return:  0.43173016621
47271
naples
white
0.252380952381 0.156112179972
return:  0.156112179972
118181
72
inch
vanity
top
0.429654095904 nan
return:  nan
147033
backsplash
black
brown
white
0.184262265512 0.146260534337
return:  0.146260534337
75386
electrical
adapter
nan nan
return:  nan
140033
egg
rock
0.238732563733 0.222871878592
return:  0.222871878592
43569
wire
type
thw
0.185634920635 0.1407

In [22]:
train_merged_sub.tail(20)

Unnamed: 0,id,product_uid,product_title,search_term,relevance,product_description,sim_title,sim_desc
69779,209201,196313,Ryobi 0.080 in. Pre-Cut Twisted Trimmer Line,pre cut riser,1.33,For cleaner cuts in thicker grass choose Ryobi...,0.214815,0.160866
55111,166961,166069,Amerimax Home Products 3 in. x 4 in. White Alu...,3x4 aluminum downspout straps,3.0,Amerimax home products downspout clip is one o...,0.107883,0.081214
17860,55100,114426,Hampton Bay 1-Light Oil Rubbed Bronze Outdoor ...,To,1.67,Use the Hampton Bay Wall-Mount 1-Light Oil Rub...,0.0,0.0
62237,187656,179892,LIFAN 2500 psi 2.5 GPM AR Axial Cam Pump Heavy...,lifan pump,2.33,Lifan Power USA is a full line of professional...,0.127199,0.109644
21685,67016,118532,BESSEY H-Style Pipe Clamp Fixture Set for 3/4 ...,pipe saver clamp,1.67,They are exceptionally affordable you buy the ...,0.221064,0.177908
44953,136647,148812,Wyndham Collection Avara 60 in. Vanity in Espr...,aqua glass,2.0,"With 2 doors, 4 drawers, 2 open compartments a...",0.223062,0.168959
73952,221152,206381,Easy Gardener 6 ft. x 50 ft. Saddle Tan Sun Sc...,roller sun screening,2.33,Keep your cool when temperatures soar with the...,0.223223,0.211576
6837,21426,104019,Melamine White Shelf Board (Common: 3/4 in. x ...,shelving closet,2.33,"Shelving is a quick, easy, and affordable way ...",0.151045,0.179274
6294,19766,103571,Philips 4 ft. T12 40-Watt Daylight Deluxe Line...,fluorescent light ballet,2.0,The Philips Daylight Deluxe 40-Watt Linear Flu...,0.191699,0.15804
21684,67015,118532,BESSEY H-Style Pipe Clamp Fixture Set for 3/4 ...,pipe over pipe clamps,3.0,They are exceptionally affordable you buy the ...,0.26345,0.204517


In [None]:
multiple linear regression
SVM
random forest

vis: scatter matrix
    confusion matrix
    roc curve
    word cloud: https://github.com/shubhabrataroy/Thinkful/blob/master/Curriculum/SetNoteBook.ipynb
