# Python Code Profilng
- Code / Example taken from Sebastian Mathot: 
    - https://www.youtube.com/watch?v=8qEnExGLZfY
- Adapted / annotated / slightly edited for live demo as part of Code Profiling lesson.

## Goal: Find all duplicate movies from a text file of 10,000 movie titles.

In [None]:
def read_movies(src):
    """Read movies from a text file, return the movie titles as a list"""
    
    with open(src) as f:
        movie_list = f.read().splitlines() 
        return movie_list

In [None]:
def is_duplicate(item:str, collection:list) -> bool:
    
    """Determine (True or False) whether a given item (i.e. movie)
       is in a collection of other movie titles (i.e. list).
       
       If you've exhausted the list of movies and found no matches, return False."""
    
    for movie in collection:
        if movie.lower() == item.lower():
            return True
        
    return False

In [None]:
def find_duplicate_movies(src='movies.txt') -> list:
    
    """Return all movies that appear twice (i.e. duplicates) in the text file.
       Search through the list of movies systematically, collecting duplicates as you go."""
    
    movie_list = read_movies(src)
    duplicates = []
    
    while movie_list: 
        
        movie = movie_list.pop()
        
        if is_duplicate(movie, movie_list): 
            
            duplicates.append(movie)
            
    
    return duplicates

---

---

---

### cProfile decorator:

In [None]:
import cProfile, pstats, io


def profile(fnc):
    
    """A decorator that uses cProfile to profile a function. 
       Starts the profile before executing a function, then exeuctes the function,
       then stops the profile, then prints out a diagnostics report.
       
       Lots of boilerplate code from the Python 3 documentation:
       https://docs.python.org/3/library/profile.html#profile.Profile
       """
    
    def inner(*args, **kwargs):
        
        pr = cProfile.Profile()
        pr.enable()  
        retval = fnc(*args, **kwargs)       
        pr.disable() 
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())

        return retval

    return inner