# Building a song recommender

**About Book Crossing Dataset**<br>

This dataset has been compiled by Cai-Nicolas Ziegler in 2004, and it comprises of three tables for users, books and ratings. Explicit ratings are expressed on a scale from 1-10 (higher values denoting higher appreciation) and implicit rating is expressed by 0.

In [43]:
%matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import time
from sklearn.externals import joblib
import Recommenders as Recommenders
import Evaluation as Evaluation
#Loading data
books = pd.read_csv("books.csv", sep=";", error_bad_lines=False, encoding="latin-1")
books.columns = ['ISBN', 'bookTitle', 'bookAuthor', 'yearOfPublication', 'publisher', 'imageUrlS', 'imageUrlM', 'imageUrlL']

users = pd.read_csv('users.csv', sep=';', error_bad_lines=False, encoding="latin-1")
users.columns = ['userID', 'Location', 'Age']

ratings = pd.read_csv('ratings1.csv', sep=';', error_bad_lines=False, encoding="latin-1")
ratings.columns = ['userID', 'ISBN', 'bookRating']

b'Skipping line 6452: expected 8 fields, saw 9\nSkipping line 43667: expected 8 fields, saw 10\nSkipping line 51751: expected 8 fields, saw 9\n'
b'Skipping line 92038: expected 8 fields, saw 9\nSkipping line 104319: expected 8 fields, saw 9\nSkipping line 121768: expected 8 fields, saw 9\n'
b'Skipping line 144058: expected 8 fields, saw 9\nSkipping line 150789: expected 8 fields, saw 9\nSkipping line 157128: expected 8 fields, saw 9\nSkipping line 180189: expected 8 fields, saw 9\nSkipping line 185738: expected 8 fields, saw 9\n'
b'Skipping line 209388: expected 8 fields, saw 9\nSkipping line 220626: expected 8 fields, saw 9\nSkipping line 227933: expected 8 fields, saw 11\nSkipping line 228957: expected 8 fields, saw 10\nSkipping line 245933: expected 8 fields, saw 9\nSkipping line 251296: expected 8 fields, saw 9\nSkipping line 259941: expected 8 fields, saw 9\nSkipping line 261529: expected 8 fields, saw 9\n'
  interactivity=interactivity, compiler=compiler, result=result)


### Check no.of records and features given in each dataset

In [2]:
print ("No. of records in Books directory:", books.shape)
print ("No. of records in Users directory:", users.shape)
print ("No. of records in Ratings directory:", ratings.shape)

No. of records in Books directory: (271360, 8)
No. of records in Users directory: (278858, 3)
No. of records in Ratings directory: (1149780, 3)


In [3]:
books.head(5)

Unnamed: 0,ISBN,bookTitle,bookAuthor,yearOfPublication,publisher,imageUrlS,imageUrlM,imageUrlL
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [4]:
users.head()

Unnamed: 0,userID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [34]:
ratings.head()

Unnamed: 0,userID,ISBN,bookRating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


### Drop last three columns containing image URLs which will not be required for analysis

In [35]:
books=books.drop('imageUrlS',axis=1)
books=books.drop('imageUrlM',axis=1)
books=books.drop('imageUrlL',axis=1)

In [36]:
books.head()

Unnamed: 0,ISBN,bookTitle,bookAuthor,yearOfPublication,publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company


### Check unique values of yearOfPublication

In [9]:
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 5 columns):
ISBN                 271360 non-null object
bookTitle            271360 non-null object
bookAuthor           271359 non-null object
yearOfPublication    271360 non-null object
publisher            271358 non-null object
dtypes: object(5)
memory usage: 10.4+ MB


In [10]:
books['yearOfPublication'].unique()

array([2002, 2001, 1991, 1999, 2000, 1993, 1996, 1988, 2004, 1998, 1994,
       2003, 1997, 1983, 1979, 1995, 1982, 1985, 1992, 1986, 1978, 1980,
       1952, 1987, 1990, 1981, 1989, 1984, 0, 1968, 1961, 1958, 1974,
       1976, 1971, 1977, 1975, 1965, 1941, 1970, 1962, 1973, 1972, 1960,
       1966, 1920, 1956, 1959, 1953, 1951, 1942, 1963, 1964, 1969, 1954,
       1950, 1967, 2005, 1957, 1940, 1937, 1955, 1946, 1936, 1930, 2011,
       1925, 1948, 1943, 1947, 1945, 1923, 2020, 1939, 1926, 1938, 2030,
       1911, 1904, 1949, 1932, 1928, 1929, 1927, 1931, 1914, 2050, 1934,
       1910, 1933, 1902, 1924, 1921, 1900, 2038, 2026, 1944, 1917, 1901,
       2010, 1908, 1906, 1935, 1806, 2021, '2000', '1995', '1999', '2004',
       '2003', '1990', '1994', '1986', '1989', '2002', '1981', '1993',
       '1983', '1982', '1976', '1991', '1977', '1998', '1992', '1996',
       '0', '1997', '2001', '1974', '1968', '1987', '1984', '1988',
       '1963', '1956', '1970', '1985', '1978', '1973', '1980'

As it can be seen from above that there are some incorrect entries in this field. It looks like Publisher names 'DK Publishing Inc' and 'Gallimard' have been incorrectly loaded as yearOfPublication in dataset due to some errors in csv file.


Also some of the entries are strings and same years have been entered as numbers in some places. We will try to fix these things in the coming questions.

### Drop the rows having `'DK Publishing Inc'` and `'Gallimard'` as `yearOfPublication`

In [37]:
books=books[books.yearOfPublication!='DK Publishing Inc']
books=books[books.yearOfPublication!='Gallimard']

In [12]:
books['yearOfPublication'].unique()

array([2002, 2001, 1991, 1999, 2000, 1993, 1996, 1988, 2004, 1998, 1994,
       2003, 1997, 1983, 1979, 1995, 1982, 1985, 1992, 1986, 1978, 1980,
       1952, 1987, 1990, 1981, 1989, 1984, 0, 1968, 1961, 1958, 1974,
       1976, 1971, 1977, 1975, 1965, 1941, 1970, 1962, 1973, 1972, 1960,
       1966, 1920, 1956, 1959, 1953, 1951, 1942, 1963, 1964, 1969, 1954,
       1950, 1967, 2005, 1957, 1940, 1937, 1955, 1946, 1936, 1930, 2011,
       1925, 1948, 1943, 1947, 1945, 1923, 2020, 1939, 1926, 1938, 2030,
       1911, 1904, 1949, 1932, 1928, 1929, 1927, 1931, 1914, 2050, 1934,
       1910, 1933, 1902, 1924, 1921, 1900, 2038, 2026, 1944, 1917, 1901,
       2010, 1908, 1906, 1935, 1806, 2021, '2000', '1995', '1999', '2004',
       '2003', '1990', '1994', '1986', '1989', '2002', '1981', '1993',
       '1983', '1982', '1976', '1991', '1977', '1998', '1992', '1996',
       '0', '1997', '2001', '1974', '1968', '1987', '1984', '1988',
       '1963', '1956', '1970', '1985', '1978', '1973', '1980'

### Change the datatype of yearOfPublication to 'int'

In [13]:
books['yearOfPublication']=books.yearOfPublication.astype(int)

In [14]:
books.dtypes

ISBN                 object
bookTitle            object
bookAuthor           object
yearOfPublication     int32
publisher            object
dtype: object

### Drop NaNs in `'publisher'` column

In [15]:
sum(books['publisher']== 'NaN')

0

In [16]:
books['publisher'].unique()

array(['Oxford University Press', 'HarperFlamingo Canada',
       'HarperPerennial', ..., 'Tempo', 'Life Works Books', 'Connaught'],
      dtype=object)

In [17]:
books=books.dropna()

## Exploring Users dataset

In [18]:
print(users.shape)
users.head()

(278858, 3)


Unnamed: 0,userID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


### Get all unique values in ascending order for column `Age`

In [19]:
Uusers=users['Age'].unique()
Uusers.sort()
a = pd.DataFrame(Uusers)
a

Unnamed: 0,0
0,0.0
1,1.0
2,2.0
3,3.0
4,4.0
5,5.0
6,6.0
7,7.0
8,8.0
9,9.0


Age column has some invalid entries like nan, 0 and very high values like 100 and above

### Values below 5 and above 90 do not make much sense for our book rating case...hence replace these by NaNs

### Replace null values in column `Age` with mean

In [45]:
users['Age']=pd.DataFrame(users[(users['Age']>4) & (users['Age'] <91)])
users['Age']=round(users['Age'].fillna(round(users['Age'].mean(),0)),0)
users

Unnamed: 0,userID,Location,Age
0,1,"nyc, new york, usa",46.0
1,2,"stockton, california, usa",46.0
2,3,"moscow, yukon territory, russia",46.0
3,4,"porto, v.n.gaia, portugal",46.0
4,5,"farnborough, hants, united kingdom",46.0
5,6,"santa monica, california, usa",6.0
6,7,"washington, dc, usa",46.0
7,8,"timmins, ontario, canada",46.0
8,9,"germantown, tennessee, usa",46.0
9,10,"albacete, wisconsin, spain",10.0


### Change the datatype of `Age` to `int`

In [22]:
users.dtypes

userID        int64
Location     object
Age         float64
dtype: object

In [46]:
users['Age']=users.Age.astype(int)
users.dtypes

userID       int64
Location    object
Age          int32
dtype: object

In [47]:
print(sorted(users.Age.unique()))

[6, 10, 11, 13, 18, 19, 20, 21, 24, 25, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 40, 41, 42, 44, 46, 47, 50, 51, 54, 56, 59, 61, 63, 64, 66, 67, 70, 71, 72, 74, 75, 78, 79, 85, 89, 90]


## Exploring the Ratings Dataset

### check the shape

In [25]:
ratings.shape

(1149780, 3)

In [26]:
n_users = users.shape[0]
n_books = books.shape[0]

In [27]:
ratings.head(5)

Unnamed: 0,userID,ISBN,bookRating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [28]:
books.shape

(271354, 5)

### Ratings dataset should have books only which exist in our books dataset. Drop the remaining rows

In [48]:
BookRatings = pd.merge(books, ratings.drop_duplicates(['ISBN']), on="ISBN", how="left") 

In [49]:
BookRatings.shape

(271360, 10)

### Ratings dataset should have ratings from users which exist in users dataset. Drop the remaining rows

In [50]:
BookRatings = pd.merge(users, ratings.drop_duplicates(['userID']), on="userID", how="left")

In [51]:
BookRatings=BookRatings.dropna()

### Consider only ratings from 1-10 and leave 0s in column `bookRating`

In [52]:
BookRatings=BookRatings[BookRatings.bookRating!=0]
BookRatings.head()

Unnamed: 0,userID,Location,Age,ISBN,bookRating
7,8,"timmins, ontario, canada",46,2005018,5.0
11,12,"fort bragg, california, usa",46,1879384493,10.0
13,14,"mediapolis, iowa, usa",46,61076031,5.0
15,16,"albuquerque, new mexico, usa",46,345402871,9.0
18,19,"weston, ,",19,375759778,7.0


In [53]:
BookRatings.userID.unique().shape


(61614,)

There is only about 61614 Users

### Find out which rating has been given highest number of times

In [54]:
BookRatings.groupby('bookRating').count()

Unnamed: 0_level_0,userID,Location,Age,ISBN
bookRating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,309,309,309,309
2.0,460,460,460,460
3.0,1072,1072,1072,1072
4.0,1512,1512,1512,1512
5.0,6526,6526,6526,6526
6.0,5690,5690,5690,5690
7.0,11681,11681,11681,11681
8.0,16027,16027,16027,16027
9.0,8716,8716,8716,8716
10.0,9621,9621,9621,9621


Rating 8 has been given highest number of times

### **Collaborative Filtering Based Recommendation Systems**

### For more accurate results only consider users who have rated atleast 100 books

In [36]:
counts1=BookRatings['userID'].value_counts()
BookRatings=BookRatings[BookRatings['userID'].isin(counts1[counts1>=100].index)]
BookRatings.head()

Unnamed: 0,userID,ISBN,bookRating,Location,Age
1456,277427,002542730X,10,"gilbert, arizona, usa",277427
1458,277427,003008685X,8,"gilbert, arizona, usa",277427
1461,277427,0060006641,10,"gilbert, arizona, usa",277427
1465,277427,0060542128,7,"gilbert, arizona, usa",277427
1474,277427,0061009059,9,"gilbert, arizona, usa",277427


### Generating ratings matrix from explicit ratings

In [37]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(ratings, test_size = 0.2, random_state=0)
testDF = test_data.copy()
test_data.rating = np.nan
print(train_data.head(5))

         userID        ISBN  bookRating
1013398  242824  0553587064           0
785440   189835  0933635842           5
505873   123094  0345331966           9
190693    42093  0679728430           0
962528   232131  0805051309           0


In [38]:
#### Note: since NaNs cannot be handled by training algorithms, replace these by 0, which indicates absence of ratings

In [39]:
testDF = testDF.dropna()
testDF.head()

Unnamed: 0,userID,ISBN,bookRating
482674,115002,0263779920,5
589776,142454,3426618257,0
996403,238961,069401110X,0
185455,40889,0553564528,0
1046525,250367,0892132701,0


In [40]:
ratings = pd.concat([train_data, test_data]).reset_index()
ratings.head()

Unnamed: 0,index,userID,ISBN,bookRating
0,1013398,242824,553587064,0
1,785440,189835,933635842,5
2,505873,123094,345331966,9
3,190693,42093,679728430,0
4,962528,232131,805051309,0


In [41]:
ratings.shape

(1149780, 4)

In [42]:
# We want the format of ratings matrix to be one row per user and one column per Book. 
#we can pivot ratings_df to get that and call the new variable R_df.
R_df = BookRatings.pivot(index = 'userID', columns ='ISBN', values = 'bookRating').fillna(0)
R_df.head

<bound method NDFrame.head of ISBN     0375404120   9022906116  0*708880258  0.330241664  0000000000  \
userID                                                                   
2033            0.0          0.0          0.0          0.0         0.0   
2110            0.0          0.0          0.0          0.0         0.0   
2276            0.0          0.0          0.0          0.0         0.0   
3757            0.0          0.0          0.0          0.0         0.0   
4017            0.0          0.0          0.0          0.0         0.0   
4385            0.0          0.0          0.0          0.0         0.0   
5582            0.0          0.0          0.0          0.0         0.0   
6242            0.0          0.0          0.0          0.0         0.0   
6251            0.0          0.0          0.0          0.0         0.0   
6543            0.0          0.0          0.0          0.0         0.0   
6575            0.0          0.0          0.0          0.0         0.0   
7286    

### Generate the predicted ratings using SVD with no.of singular values to be 50

In [43]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_df, k = 50)

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)
preds_df.head()

ISBN,0375404120,9022906116,0*708880258,0.330241664,0000000000,00000000000,0000000000000,0000000029841,0000000051,0000018030,...,O67174142X,O9088446X,Q380708353,SBN67001026X,UNGRANDHOMMED,X000000000,ZR903CX0003,"\0432534220\""""","\2842053052\""""",Ô½crosoft
0,0.000257,-0.009825,-0.000331,0.036243,-0.012632,-0.011229,0.001827,0.006273,0.004211,0.004392,...,0.056523,-0.011229,0.002159,0.0069,0.000173,-0.014036,-0.001404,-0.008422,-0.009825,0.001075
1,5.7e-05,0.000457,0.000853,-0.005987,0.000588,0.000523,5.2e-05,0.001139,0.000404,0.002004,...,0.018512,0.000523,9e-05,0.002887,1.1e-05,0.000653,6.5e-05,0.000392,0.000457,-3.8e-05
2,5.2e-05,0.003483,0.003033,0.029764,0.004479,0.003981,0.000563,0.001191,0.012415,0.007998,...,0.044935,0.003981,0.000232,0.00549,0.000214,0.004976,0.000498,0.002986,0.003483,0.007754
3,-2.3e-05,-0.000173,-0.000265,-0.00078,-0.000223,-0.000198,-0.000106,0.002102,-0.000104,-5.5e-05,...,-0.002966,-0.000198,8.8e-05,-0.000628,0.000687,-0.000247,-2.5e-05,-0.000148,-0.000173,0.00012
4,0.001275,-0.005365,0.002881,0.076353,-0.006898,-0.006132,0.001901,0.011483,0.013445,0.011508,...,0.307472,-0.006132,0.007165,0.046193,0.000671,-0.007665,-0.000766,-0.004599,-0.005365,0.014067



### Lets find the recommendations for user with id `2110`

#### Note: Execute the below cells to get the variables loaded

In [44]:
sigma

array([[152.26201212,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        , 153.62983913,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        , 154.512652  , ...,   0.        ,
          0.        ,   0.        ],
       ...,
       [  0.        ,   0.        ,   0.        , ..., 391.46660507,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
        674.15806585,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        , 708.9715843 ]])

### Create a dataframe with name `user_data` containing userID `2110` explicitly interacted books

In [67]:
pm = Recommenders.popularity_recommender_py()
pm.create(train_data, 'userID', 'ISBN')

In [76]:
userID = 2110
a=pm.recommend(userID)
user_data=pd.DataFrame(a)
user_data=pd.merge(user_data,books,how='inner',on='ISBN')
user_data

Unnamed: 0,userID,ISBN,score,Rank,bookTitle,bookAuthor,yearOfPublication,publisher
0,2110,0971880107,2010,1.0,Wild Animus,Rich Shapero,2004,Too Far
1,2110,0316666343,1020,2.0,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown"
2,2110,0385504209,702,3.0,The Da Vinci Code,Dan Brown,2003,Doubleday
3,2110,0060928336,597,4.0,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Rebecca Wells,1997,Perennial
4,2110,0312195516,574,5.0,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA
5,2110,067976402X,518,6.0,Snow Falling on Cedars,David Guterson,1995,Vintage Books USA
6,2110,0142001740,510,8.0,The Secret Life of Bees,Sue Monk Kidd,2003,Penguin Books
7,2110,044023722X,498,9.0,A Painted House,John Grisham,2001,Dell Publishing Company
8,2110,0446672211,478,10.0,Where the Heart Is (Oprah's Book Club (Paperba...,Billie Letts,1998,Warner Books


In [46]:
#user_id = 2 #2nd row in ratings matrix and predicted matrix
###Fill in the code here
print("2nd row of ratings matrix:\n", R_df.iloc[1:2])
print("2nd row of Predicted matrix:\n",preds_df.iloc[1:2] )

2nd row of ratings matrix:
 ISBN     0375404120   9022906116  0*708880258  0.330241664  0000000000  \
userID                                                                   
2110            0.0          0.0          0.0          0.0         0.0   

ISBN    00000000000  0000000000000  0000000029841  0000000051  0000018030  \
userID                                                                      
2110            0.0            0.0            0.0         0.0         0.0   

ISBN      ...      O67174142X  O9088446X  Q380708353  SBN67001026X  \
userID    ...                                                        
2110      ...             0.0        0.0         0.0           0.0   

ISBN    UNGRANDHOMMED  X000000000  ZR903CX0003  \0432534220\""  \
userID                                                           
2110              0.0         0.0          0.0             0.0   

ISBN    \2842053052\""  Ô½crosoft  
userID                             
2110               0.0        0.0  

In [64]:
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'userID', 'ISBN')

In [60]:
userID = 2110
b = is_model.get_user_items(userID)
book_data=pd.DataFrame(b)
book_data.columns=['ISBN']
book_data=pd.merge(book_data,books,how='inner',on='ISBN')

In [63]:
book_data

Unnamed: 0,ISBN,bookTitle,bookAuthor,yearOfPublication,publisher
0,0441693237,Quantum Leap: Too Close for Comfort (Quantum L...,Ashley McConnell,1993,Ace Books
1,0373512066,Sweet Enemy,Diana Palmer,2002,Silhouette
2,059047054X,Claudia and the Clue in the Photograph (Baby-S...,Ann M. Martin,1994,Scholastic
3,0671795937,ROGUE WARRIOR: ROGUE WARRIOR I (PAPERBACK),Richard Marcinko,1993,Pocket
4,0515134384,The Cat Who Went Up the Creek,Lilian Jackson Braun,2003,Jove Books
5,0590448293,Karen's Big Joke (Baby-Sitters Little Sister (...,Ann M. Martin,1992,Scholastic
6,0373765649,Breathless For The Bachelor (Silhouette Desire...,Cindy Gerard,2004,Silhouette
7,0679811761,The Secret of Terror Castle (Three Investigato...,Robert Arthur,1991,Random House Children's Books
8,0373619502,Axis Of Conflict The Terror File,James Axler,2003,Gold Eagle
9,097089726x,Jasper,Michelle Groce,2003,Novello Festival Press


### Combine the user_data and and corresponding book data(`book_data`) in a single dataframe with name `user_full_info`

In [84]:
user_full_info=pd.concat([user_data,book_data])
user_full_info

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,ISBN,Rank,bookAuthor,bookTitle,publisher,score,userID,yearOfPublication
0,0971880107,1.0,Rich Shapero,Wild Animus,Too Far,2010.0,2110.0,2004
1,0316666343,2.0,Alice Sebold,The Lovely Bones: A Novel,"Little, Brown",1020.0,2110.0,2002
2,0385504209,3.0,Dan Brown,The Da Vinci Code,Doubleday,702.0,2110.0,2003
3,0060928336,4.0,Rebecca Wells,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Perennial,597.0,2110.0,1997
4,0312195516,5.0,Anita Diamant,The Red Tent (Bestselling Backlist),Picador USA,574.0,2110.0,1998
5,067976402X,6.0,David Guterson,Snow Falling on Cedars,Vintage Books USA,518.0,2110.0,1995
6,0142001740,8.0,Sue Monk Kidd,The Secret Life of Bees,Penguin Books,510.0,2110.0,2003
7,044023722X,9.0,John Grisham,A Painted House,Dell Publishing Company,498.0,2110.0,2001
8,0446672211,10.0,Billie Letts,Where the Heart Is (Oprah's Book Club (Paperba...,Warner Books,478.0,2110.0,1998
0,0441693237,,Ashley McConnell,Quantum Leap: Too Close for Comfort (Quantum L...,Ace Books,,,1993


### Get top 10 recommendations for above given userID from the books not already rated by that user

In [85]:
user_full_info.head(10)

Unnamed: 0,ISBN,Rank,bookAuthor,bookTitle,publisher,score,userID,yearOfPublication
0,0971880107,1.0,Rich Shapero,Wild Animus,Too Far,2010.0,2110.0,2004
1,0316666343,2.0,Alice Sebold,The Lovely Bones: A Novel,"Little, Brown",1020.0,2110.0,2002
2,0385504209,3.0,Dan Brown,The Da Vinci Code,Doubleday,702.0,2110.0,2003
3,0060928336,4.0,Rebecca Wells,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Perennial,597.0,2110.0,1997
4,0312195516,5.0,Anita Diamant,The Red Tent (Bestselling Backlist),Picador USA,574.0,2110.0,1998
5,067976402X,6.0,David Guterson,Snow Falling on Cedars,Vintage Books USA,518.0,2110.0,1995
6,0142001740,8.0,Sue Monk Kidd,The Secret Life of Bees,Penguin Books,510.0,2110.0,2003
7,044023722X,9.0,John Grisham,A Painted House,Dell Publishing Company,498.0,2110.0,2001
8,0446672211,10.0,Billie Letts,Where the Heart Is (Oprah's Book Club (Paperba...,Warner Books,478.0,2110.0,1998
0,0441693237,,Ashley McConnell,Quantum Leap: Too Close for Comfort (Quantum L...,Ace Books,,,1993
