# Purpose: the fastest way to find unique values in a list

There are several ways to find unique values in a list.  
In this file we evaluate the performance of different ways to find unique values.  
Data set contains 300000 values between 560 and 999997617. You can access the data from the link below:  

https://github.com/yusufcelik49/Python_Performance/blob/master/Unique_Data.txt

### Data

In [None]:
scores = list(map(int, input().rstrip().split()))

In [31]:
scores.sort(reverse=True)

In [14]:
len(scores), min(scores), max(scores)

(300000, 560, 999997617)

# Rank 8: for

In [98]:
import timeit

average_time=[]
for i in range(1):    # it is just 1 times because it is toooooo slow
    start = timeit.default_timer()

    D=[]
    for x in scores:
        if x not in D:
            D.append(x)

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(D))
print('Average Time: ', sum(average_time)/1 ) 

Number of Unique values : 299948
Average Time:  696.859400899999


# Rank 7: OrderedDict

In [86]:
from collections import OrderedDict
import timeit

average_time=[]
for i in range(20):
    start = timeit.default_timer()

    D = list(OrderedDict( (x,1) for x in scores ).keys())

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(D))
print('Average Time: ', sum(average_time)/20 ) 

Number of Unique values : 299948
Average Time:  0.2751819999999043


# Rank 6: pandas

In [94]:
import pandas
import timeit

average_time=[]
for i in range(20):
    start = timeit.default_timer()

    D=pandas.DataFrame(scores)
    E=list(D[0].unique())

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(E))
print('Average Time: ', sum(average_time)/20 )   

Number of Unique values : 299948
Average Time:  0.15470474999988254


# Rank 5: sets

In [93]:
import timeit


average_time=[]
for i in range(20):
    start = timeit.default_timer()

    A=list(set(scores))
    A.sort(reverse=True)
       
    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(A))
print('Average Time: ', sum(average_time)/20 ) 

Number of Unique values : 299948
Average Time:  0.1323434299999917


# Rank 4: dic

In [87]:
import timeit

average_time=[]
for i in range(20):
    start = timeit.default_timer()

    e = {}
    for x in scores:
        e[x] = 1
    D = list(e.keys())

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(D))
print('Average Time: ', sum(average_time)/20 ) 

Number of Unique values : 299948
Average Time:  0.09168927999994594


# Rank 3: for

In [91]:
import timeit


average_time=[]
for i in range(20):
    start = timeit.default_timer()

    B=[scores[i] for i in range(len(scores)-1) if scores[i]>scores[i+1]]
    B.insert(0,scores[-1])
       
    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(B))
print('Average Time: ', sum(average_time)/20 ) 

Number of Unique values : 299948
Average Time:  0.08515901499990833


# Rank 2: numpy

In [73]:
import numpy
import timeit

average_time=[]
for i in range(20):
    start = timeit.default_timer()

    D=numpy.unique(scores)
    E=list(D)

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(E))
print('Average Time: ', sum(average_time)/20 )


Number of Unique values : 299948
Average Time:  0.07184400999999525


# Rank 1: Counter

In [78]:
from collections import Counter
import timeit

average_time=[]
for i in range(20):
    
    start = timeit.default_timer()
    D=list(Counter(scores).keys())

    stop = timeit.default_timer()
    finish = (stop - start)
    average_time.append(finish)

print("Number of Unique values :", len(E))
print('Average Time: ', sum(average_time)/20 )

Number of Unique values : 299948
Average Time:  0.06609903000007762
