# Spark Lab Session

## Initialize the environment

In [1]:
import findspark
findspark.init()

import pyspark
import random

sc = pyspark.SparkContext(appName="Pi")

## Compute the list L of integers, with L = { 0 ... 499 }

In [2]:
ints = sc.parallelize(range(3000))
ints.take(20)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

## Compute the list C = { x^3 | x ∈ L } and then sum of elements in C

In [7]:
cubes = ints.map(lambda x: x*x*x)

In [8]:
cubes.reduce(lambda x, y: x+y)
cubes.sum()

20236502250000

## What is the repartition for the last digits of integers in C? i.e. how many end with a 0? with a 1? etc.

In [9]:
# Première étape : calculer la liste contenant
# les derniers chiffres
lastDigits = cubes.map(lambda x: x%10)

# Deuxième étape : compter combien de fois chaque
# items apparait 
countLastDigits = lastDigits.map(lambda x:(x,1)) \
                            .reduceByKey(lambda x,y: x+y)

In [10]:
cubes.map(lambda x:(x % 10,1)).reduceByKey(lambda x, y: x+y).collect()

[(0, 300),
 (8, 300),
 (1, 300),
 (9, 300),
 (2, 300),
 (3, 300),
 (4, 300),
 (5, 300),
 (6, 300),
 (7, 300)]

In [14]:
#other method

def myFold(acc, element):
    new_acc = acc.copy()
    new_acc[element]+=1
    return new_acc

def myReduce(acc1, acc2):
    new_acc = []
    for i in range(10):
        new_acc.append(acc1[i]+acc2[i])
    return new_acc
        
val_init = [0]*10


myReduce(myFold(val_init,1),myFold(val_init,2))

myFold(myFold(val_init,1),2)
lastDigits = cubes.map(lambda x:x%10)

lastDigits.aggregate(val_init,myFold,myReduce)

[300, 300, 300, 300, 300, 300, 300, 300, 300, 300]

## What is the repartition of digits for the integers in C?

In [12]:
cubes.flatMap(lambda x: [ (e,1) for e in str(x)]).reduceByKey(lambda x, y: x+y).collect()

#cubes.flatMap(lambda x: [int(e) for e in str(x)]).aggregate(val_init,myFold,myReduce)

[('4', 2762),
 ('7', 2787),
 ('0', 3127),
 ('1', 3667),
 ('8', 2639),
 ('9', 2521),
 ('5', 2653),
 ('6', 2713),
 ('3', 2814),
 ('2', 3294)]

# Computation of π

To compute the value of π, you will generate the list of all pairs (x,y) of integers from 0 to K. Then you will compute the number of such pairs such that (2x+1)^2+(2y+1)^2 is less then (2*K)^2. The ratio between the number of such pairs and the number of total pairs is an approximation of π.
For K=3000 you should obtain a value close to 3.14159.

In [13]:
K=1000
intUpToK = sc.parallelize(range(K))
pairs = intUpToK.cartesian(intUpToK)
nbTotal = pairs.count()

def isOk(v):
    x,y = v
    return (2*x+1)**2+(2*y+1)**2 <= 4*K*K

nbOk = pairs.filter(isOk).count()
print(4*float(nbOk)/nbTotal)
print(nbOk)

3.141676
785419


# Readings files into RDD

In [6]:
import re
future_pattern = re.compile("""([^,"]+|"[^"]+")(?=,|$)""")

def parseCSV(line):
     return future_pattern.findall(line)


#ratingsFile = sc.textFile("/home/jachiet/Downloads/ml-latest-small/ratings.csv")
#moviesFile = sc.textFile("/home/jachiet/Downloads/ml-latest-small/movies.csv")
ratingsFile = sc.textFile("/home/savoga/ml-latest-small/ratings.csv")
moviesFile = sc.textFile("/home/savoga/ml-latest-small/movies.csv")

In [7]:
ratings = ratingsFile.map(parseCSV).filter(lambda x: x[0]!="userId")
movies = moviesFile.map(parseCSV).filter(lambda x:x[0]!="movieId")
movies.take(30)

[['1', 'Toy Story (1995)', 'Adventure|Animation|Children|Comedy|Fantasy'],
 ['2', 'Jumanji (1995)', 'Adventure|Children|Fantasy'],
 ['3', 'Grumpier Old Men (1995)', 'Comedy|Romance'],
 ['4', 'Waiting to Exhale (1995)', 'Comedy|Drama|Romance'],
 ['5', 'Father of the Bride Part II (1995)', 'Comedy'],
 ['6', 'Heat (1995)', 'Action|Crime|Thriller'],
 ['7', 'Sabrina (1995)', 'Comedy|Romance'],
 ['8', 'Tom and Huck (1995)', 'Adventure|Children'],
 ['9', 'Sudden Death (1995)', 'Action'],
 ['10', 'GoldenEye (1995)', 'Action|Adventure|Thriller'],
 ['11', '"American President, The (1995)"', 'Comedy|Drama|Romance'],
 ['12', 'Dracula: Dead and Loving It (1995)', 'Comedy|Horror'],
 ['13', 'Balto (1995)', 'Adventure|Animation|Children'],
 ['14', 'Nixon (1995)', 'Drama'],
 ['15', 'Cutthroat Island (1995)', 'Action|Adventure|Romance'],
 ['16', 'Casino (1995)', 'Crime|Drama'],
 ['17', 'Sense and Sensibility (1995)', 'Drama|Romance'],
 ['18', 'Four Rooms (1995)', 'Comedy'],
 ['19', 'Ace Ventura: When Na

# Compute the 10 best rated movies 
Try the following functions to determine the average ratings:
 1. sum(ratings)/numberOfRatings
 2. sum(ratings)/(1+numberOfRatings)
 3. sum(ratings)/max(20,numberOfRatings)

In [10]:
import math

ratedMovie = ratings.map(lambda x: (x[1],(float(x[2]),1))).reduceByKey(lambda x,y: (x[0]+y[0],x[1]+y[1]))
movieJoin = movies.map(lambda x: (x[0],x[1]))

ratedMovie3=ratedMovie.mapValues(lambda x:math.log(x[1])*(x[0])/(x[1]))
movieJoin.join(ratedMovie3).map(lambda x: x[1]).sortBy(lambda x: -x[1]).take(10)

[('"Shawshank Redemption, The (1994)"', 25.506303124680446),
 ('Forrest Gump (1994)', 24.135559630846686),
 ('Pulp Fiction (1994)', 24.035971735394472),
 ('"Matrix, The (1999)"', 23.593497870526754),
 ('"Silence of the Lambs, The (1991)"', 23.43310709209536),
 ('Star Wars: Episode IV - A New Hope (1977)', 23.37860964684444),
 ('Fight Club (1999)', 23.00760161003686),
 ("Schindler's List (1993)", 22.78807638333873),
 ('Star Wars: Episode V - The Empire Strikes Back (1980)', 22.561506207236782),
 ('"Godfather, The (1972)"', 22.549726244087907)]

In [11]:
ratedMovies = \
    ratings.map(lambda x: (x[1],(float(x[2]),1))) \
           .reduceByKey(lambda x,y: (x[0]+y[0],x[1]+y[1])) \
           .mapValues(lambda x: x[0]/x[1])
           

ratedMovies.take(3)

[('1', 3.9209302325581397),
 ('50', 4.237745098039215),
 ('70', 3.5090909090909093)]

In [12]:
movieJoin = movies.map(lambda x: (x[0],x[1]))

moviesWithAvg = movieJoin.join(ratedMovies) \
                        .map(lambda x: x[1])

bestMovies = moviesWithAvg.sortBy(lambda x: x[1],ascending=False)
bestMovies.take(10)

[('Lamerica (1994)', 5.0),
 ('What Happened Was... (1994)', 5.0),
 ('Denise Calls Up (1995)', 5.0),
 ('Lesson Faust (1994)', 5.0),
 ('"Sandpiper, The (1965)"', 5.0),
 ('My Man Godfrey (1957)', 5.0),
 ('Black Tar Heroin: The Dark End of the Street (2000)', 5.0),
 ('Slumber Party Massacre II (1987)', 5.0),
 ('Moscow Does Not Believe in Tears (Moskva slezam ne verit) (1979)', 5.0),
 ('Cherish (2002)', 5.0)]

In [13]:
#with log metric

import math

ratedMovies = \
    ratings.map(lambda x: (x[1],(float(x[2]),1))) \
           .reduceByKey(lambda x,y: (x[0]+y[0],x[1]+y[1])) \
           .mapValues(lambda x: math.log(x[1])*x[0]/(x[1]))
           
movieJoin = movies.map(lambda x: (x[0],x[1]))

moviesWithAvg = movieJoin.join(ratedMovies) \
                        .map(lambda x: x[1])

bestMovies2 = moviesWithAvg.sortBy(lambda x: x[1],ascending=False)
bestMovies2.take(10)

[('"Shawshank Redemption, The (1994)"', 25.506303124680446),
 ('Forrest Gump (1994)', 24.135559630846686),
 ('Pulp Fiction (1994)', 24.035971735394472),
 ('"Matrix, The (1999)"', 23.593497870526754),
 ('"Silence of the Lambs, The (1991)"', 23.43310709209536),
 ('Star Wars: Episode IV - A New Hope (1977)', 23.37860964684444),
 ('Fight Club (1999)', 23.00760161003686),
 ("Schindler's List (1993)", 22.78807638333873),
 ('Star Wars: Episode V - The Empire Strikes Back (1980)', 22.561506207236782),
 ('"Godfather, The (1972)"', 22.549726244087907)]

In [20]:
# with one 0 vote

ratedMovie2=ratedMovie.mapValues(lambda x:(x[0])/((1+x[1])))
movieJoin.join(ratedMovie2).map(lambda x: x[1]).sortBy(lambda x: -x[1]).take(10)

[('"Shawshank Redemption, The (1994)"', 4.415094339622642),
 ('"Godfather, The (1972)"', 4.266839378238342),
 ('"Streetcar Named Desire, A (1951)"', 4.261904761904762),
 ('Fight Club (1999)', 4.2534246575342465),
 ('"Godfather: Part II, The (1974)"', 4.226923076923077),
 ('Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)',
  4.224489795918367),
 ('"Three Billboards Outside Ebbing, Missouri (2017)"', 4.222222222222222),
 ('"Usual Suspects, The (1995)"', 4.217073170731707),
 ('Goodfellas (1990)', 4.216535433070866),
 ('Star Wars: Episode IV - A New Hope (1977)', 4.214285714285714)]

In [21]:
ratedMovie1=ratedMovie.mapValues(lambda x:(x[0])/((1+x[1])))
movieJoin.join(ratedMovie1).map(lambda x: x[1]).filter(lambda x: x[1]>4).sortBy(lambda x: -x[1]).take(10)

[('"Shawshank Redemption, The (1994)"', 4.415094339622642),
 ('"Godfather, The (1972)"', 4.266839378238342),
 ('"Streetcar Named Desire, A (1951)"', 4.261904761904762),
 ('Fight Club (1999)', 4.2534246575342465),
 ('"Godfather: Part II, The (1974)"', 4.226923076923077),
 ('Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)',
  4.224489795918367),
 ('"Three Billboards Outside Ebbing, Missouri (2017)"', 4.222222222222222),
 ('"Usual Suspects, The (1995)"', 4.217073170731707),
 ('Goodfellas (1990)', 4.216535433070866),
 ('Star Wars: Episode IV - A New Hope (1977)', 4.214285714285714)]

In [22]:
import math

ratedMovie1=ratedMovie.mapValues(lambda x:(x[0]/x[1])*math.log(x[1]))
                                 
movieJoin.join(ratedMovie1).map(lambda x: x[1]).filter(lambda x: x[1]>4).sortBy(lambda x: -x[1]).take(10)

[('"Shawshank Redemption, The (1994)"', 25.506303124680446),
 ('Forrest Gump (1994)', 24.135559630846686),
 ('Pulp Fiction (1994)', 24.035971735394476),
 ('"Matrix, The (1999)"', 23.593497870526754),
 ('"Silence of the Lambs, The (1991)"', 23.43310709209536),
 ('Star Wars: Episode IV - A New Hope (1977)', 23.37860964684444),
 ('Fight Club (1999)', 23.007601610036865),
 ("Schindler's List (1993)", 22.788076383338726),
 ('Star Wars: Episode V - The Empire Strikes Back (1980)', 22.561506207236786),
 ('"Godfather, The (1972)"', 22.549726244087907)]

## What are the names of movies seen by the userId=1?

In [14]:
moviesUser1 = ratings.filter(lambda x:  x[0]=="1").map(lambda x: (x[1],x[2])).join(movies).map(lambda x: (x[1][1],x[1][0])).sortBy(lambda x:x[0])
for m in moviesUser1.collect():
    print(m)

('"13th Warrior, The (1999)"', '4.0')
('"Abyss, The (1989)"', '4.0')
('"Adventures of Robin Hood, The (1938)"', '5.0')
('"American Tail, An (1986)"', '5.0')
('"Big Lebowski, The (1998)"', '5.0')
('"Black Cauldron, The (1985)"', '5.0')
('"Blues Brothers, The (1980)"', '5.0')
('"Clockwork Orange, A (1971)"', '5.0')
('"Dirty Dozen, The (1967)"', '5.0')
('"Few Good Men, A (1992)"', '4.0')
('"Fugitive, The (1993)"', '5.0')
('"Game, The (1997)"', '5.0')
('"Ghost and Mrs. Muir, The (1947)"', '4.0')
('"Ghost and the Darkness, The (1996)"', '5.0')
('"Good Morning, Vietnam (1987)"', '5.0')
('"Goonies, The (1985)"', '5.0')
('"Great Mouse Detective, The (1986)"', '5.0')
('"Green Mile, The (1999)"', '5.0')
('"Honey, I Shrunk the Kids (1989)"', '4.0')
('"Iron Giant, The (1999)"', '5.0')
('"Jungle Book, The (1967)"', '5.0')
('"Jungle Book, The (1994)"', '5.0')
('"Last of the Mohicans, The (1992)"', '3.0')
('"Lock, Stock & Two Smoking Barrels (1998)"', '5.0')
('"Longest Day, The (1962)"', '4.0')
('"Lo

## Compute the similarity coefficient between userId=1 and all other users. What is the similarity between userId=1 and userId=1? userId=2? userId=3? userId=4? 

In [15]:
def avg(dic, items):
    s=0
    for i in items:
        s += dic[i]
    return s/len(items)

def safeDivide(x,y):
    if y==0:
        return 0
    return x/y


def simil(x,y): # x is the list of movieId/rating for the first user
    x = [e for e in x]
    y = [e for e in y]
    if x==y:
        return 0
    dX = dict(x)
    dY = dict(y)
    common = set(dX.keys()) & set(dY.keys())
    if len(common)<1:
        return 0
    avgX = avg(dX,common)
    avgY = avg(dY,common)
    varX = 0
    varY = 0
    res = 0 
    for c in common:
        res += (dX[c]-avgX)*(dY[c]-avgY)
        varX += (dX[c]-avgX)**2
        varY += (dY[c]-avgY)**2
    if varX*varY == 0:
        return 0
    if res < 0:
        return 0
    #return res / (1+(varX*varY)**(0.5))
    return math.log(1+len(common))*res / ((varX*varY)**(0.5))

userRatings = ratings.map(lambda x: (x[0],(x[1],float(x[2])))).groupByKey()
user1 = userRatings.filter(lambda x:x[0]=='1').take(1)[0]
userSimil = userRatings.mapValues(lambda x: simil(x,user1[1])) # user1[1] is the list ((1,3.0),...,(158,4.0))

output = userSimil.filter(lambda x: x[0] in ['1','2','3','4']).collect() # (user, simil)
for (k,v) in output:
    print(str(k)+" -> "+str(v))

1 -> 0
4 -> 0.7962921953903238
2 -> 0
3 -> 0.16597864726681164


In [30]:
for m in userSimil.collect():
    print(m)

('1', 0)
('4', 0.7962921953903238)
('8', 1.3021968052914652)
('9', 1.6458361655282818)
('10', 0)
('12', 0)
('14', 0.6045580623351945)
('16', 0.6323082612198457)
('17', 0.13885218765886073)
('19', 1.5674603326251801)
('20', 1.4798062863831714)
('21', 0.4146783261088008)
('22', 0)
('24', 0.8114170851816215)
('26', 0.2940774430405641)
('29', 0)
('33', 0.2949751791361563)
('34', 0.08970947094849752)
('40', 0)
('44', 1.80629636167389)
('45', 1.234093512385323)
('48', 0)
('50', 0)
('53', 0)
('54', 0)
('56', 0.7040930522865744)
('57', 1.6747239181504423)
('60', 0)
('63', 1.129900956989291)
('64', 0.9504362702356051)
('66', 0.654933426678369)
('68', 0.14233504039670775)
('69', 0.83596695360366)
('70', 0)
('73', 0)
('74', 0)
('77', 0)
('82', 0.399866416635122)
('83', 0.44824063670235775)
('84', 0)
('86', 0.7112700663625092)
('88', 0.47821027410463074)
('91', 0.4563746432146068)
('93', 0.7068074410643237)
('96', 0.32083881674322207)
('98', 0)
('100', 0.5063602835666813)
('102', 0)
('106', 1.0986

In [142]:
userSimil.map(lambda x:x[1]).reduce(lambda x,y: x+y) / userSimil.count()

0.5171211853962832

## Compute the 10 top movies recommended by collaborative filtering using pearson correlation for userId=1

In [43]:

userMovieRatings = ratings.map(lambda x: (x[0],(x[1],float(x[2])))) # (user, (movie,rating))
movieRatingSimil = userMovieRatings.join(userSimil).map(lambda x:x[1]) # (movie,rating),simil
moviePearsonWeight = movieRatingSimil.map(lambda x: (x[0][0],(x[0][1]*x[1],x[1]))) # movie, (rating*simil,simil)



In [44]:
moviePearson = moviePearsonWeight.reduceByKey(lambda x,y: (x[0]+y[0],x[1]+y[1])).mapValues(lambda x: safeDivide(x[0],0.5+x[1]))
moviePearson.sortBy(lambda x: -x[1]).take(20)

[('318', 4.536635892289907),
 ('3030', 4.434424835574195),
 ('260', 4.431828042208414),
 ('1235', 4.429626031654389),
 ('1196', 4.425127195922486),
 ('3983', 4.406293660279343),
 ('527', 4.401320700492619),
 ('858', 4.397350914935063),
 ('1178', 4.3885014100181605),
 ('6442', 4.385843025893944),
 ('48516', 4.381036957849871),
 ('177593', 4.3803680316756655),
 ('1136', 4.369367624272602),
 ('912', 4.365948042565985),
 ('1208', 4.3450753526950185),
 ('750', 4.33420656292983),
 ('5618', 4.33370523670445),
 ('1217', 4.332198889360908),
 ('50', 4.326823154051749),
 ('1276', 4.323868264719015)]

In [52]:
res = movies.join(moviePearson).map(lambda x: x[1]).sortBy(lambda x: -x[1])
for i in res.take(300):
        print(str(i[1])+"\t"+str(i[0]))

4.536635892289907	"Shawshank Redemption, The (1994)"
4.434424835574195	Yojimbo (1961)
4.431828042208414	Star Wars: Episode IV - A New Hope (1977)
4.429626031654389	Harold and Maude (1971)
4.425127195922486	Star Wars: Episode V - The Empire Strikes Back (1980)
4.406293660279343	You Can Count on Me (2000)
4.401320700492619	Schindler's List (1993)
4.397350914935063	"Godfather, The (1972)"
4.3885014100181605	Paths of Glory (1957)
4.385843025893944	Belle époque (1992)
4.381036957849871	"Departed, The (2006)"
4.3803680316756655	"Three Billboards Outside Ebbing, Missouri (2017)"
4.369367624272602	Monty Python and the Holy Grail (1975)
4.365948042565985	Casablanca (1942)
4.3450753526950185	Apocalypse Now (1979)
4.33420656292983	Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
4.33370523670445	Spirited Away (Sen to Chihiro no kamikakushi) (2001)
4.332198889360908	Ran (1985)
4.326823154051749	"Usual Suspects, The (1995)"
4.323868264719015	Cool Hand Luke (1967)
4.320550

## Remove from the previous list the movies already rated (and thus seen) by userId=1

In [39]:
moviesUser1.count()

232

In [54]:
for i in moviesUser1.take(232):
        print(str(i))

('"13th Warrior, The (1999)"', '4.0')
('"Abyss, The (1989)"', '4.0')
('"Adventures of Robin Hood, The (1938)"', '5.0')
('"American Tail, An (1986)"', '5.0')
('"Big Lebowski, The (1998)"', '5.0')
('"Black Cauldron, The (1985)"', '5.0')
('"Blues Brothers, The (1980)"', '5.0')
('"Clockwork Orange, A (1971)"', '5.0')
('"Dirty Dozen, The (1967)"', '5.0')
('"Few Good Men, A (1992)"', '4.0')
('"Fugitive, The (1993)"', '5.0')
('"Game, The (1997)"', '5.0')
('"Ghost and Mrs. Muir, The (1947)"', '4.0')
('"Ghost and the Darkness, The (1996)"', '5.0')
('"Good Morning, Vietnam (1987)"', '5.0')
('"Goonies, The (1985)"', '5.0')
('"Great Mouse Detective, The (1986)"', '5.0')
('"Green Mile, The (1999)"', '5.0')
('"Honey, I Shrunk the Kids (1989)"', '4.0')
('"Iron Giant, The (1999)"', '5.0')
('"Jungle Book, The (1967)"', '5.0')
('"Jungle Book, The (1994)"', '5.0')
('"Last of the Mohicans, The (1992)"', '3.0')
('"Lock, Stock & Two Smoking Barrels (1998)"', '5.0')
('"Longest Day, The (1962)"', '4.0')
('"Lo

In [63]:
res2 = res.subtractByKey(moviesUser1.map(lambda x: (x,'#'))).sortBy(lambda x: (-x[1],x[0]))
for i in res2.take(300):
        print(str(i[1])+"\t"+str(i[0]))

4.536635892289907	"Shawshank Redemption, The (1994)"
4.434424835574195	Yojimbo (1961)
4.431828042208414	Star Wars: Episode IV - A New Hope (1977)
4.429626031654389	Harold and Maude (1971)
4.425127195922486	Star Wars: Episode V - The Empire Strikes Back (1980)
4.406293660279343	You Can Count on Me (2000)
4.401320700492619	Schindler's List (1993)
4.397350914935063	"Godfather, The (1972)"
4.3885014100181605	Paths of Glory (1957)
4.385843025893944	Belle époque (1992)
4.381036957849871	"Departed, The (2006)"
4.3803680316756655	"Three Billboards Outside Ebbing, Missouri (2017)"
4.369367624272602	Monty Python and the Holy Grail (1975)
4.365948042565985	Casablanca (1942)
4.3450753526950185	Apocalypse Now (1979)
4.33420656292983	Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
4.33370523670445	Spirited Away (Sen to Chihiro no kamikakushi) (2001)
4.332198889360908	Ran (1985)
4.326823154051749	"Usual Suspects, The (1995)"
4.323868264719015	Cool Hand Luke (1967)
4.320550

In [64]:
res.count()

9724

In [56]:
res2.count()

9724

In [58]:
res2 = res.join(moviesUser1).sortBy(lambda x: -float(x[1][0]))
for i in res2.collect():
        print(str(i[1][0])+"\t"+str(i[1][1])+"\t"+str(i[0]))
        

4.431828042208414	5.0	Star Wars: Episode IV - A New Hope (1977)
4.425127195922486	5.0	Star Wars: Episode V - The Empire Strikes Back (1980)
4.401320700492619	5.0	Schindler's List (1993)
4.369367624272602	5.0	Monty Python and the Holy Grail (1975)
4.3450753526950185	4.0	Apocalypse Now (1979)
4.326823154051749	5.0	"Usual Suspects, The (1995)"
4.3167435675676895	5.0	Fight Club (1999)
4.304568368384023	5.0	"Princess Bride, The (1987)"
4.301772537356533	5.0	"Matrix, The (1999)"
4.277214990723313	5.0	American History X (1998)
4.274192403524024	5.0	Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
4.236389246051358	5.0	American Beauty (1999)
4.234688230035718	5.0	Goodfellas (1990)
4.2227883052851976	5.0	Star Wars: Episode VI - Return of the Jedi (1983)
4.2096758223008335	5.0	Office Space (1999)
4.202068850479713	5.0	"Green Mile, The (1999)"
4.171642332425308	5.0	L.A. Confidential (1997)
4.169111395053907	5.0	Reservoir Dogs (1992)
4.168726786577663	5.0	Monty Python

In [62]:
res2.count()

232