# 1. Download MovieLens 20M Dataset
The data will be download to this path: "**./data**" and unzip automatically.

In [1]:
from urllib.request import urlretrieve
import os
from tqdm import tqdm
import zipfile


class DLProgress(tqdm):
    """
    Handle Progress Bar while Downloading
    """
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        """
        A hook function that will be called once on establishment of the network connection and
        once after each block read thereafter.
        :param block_num: A count of blocks transferred so far
        :param block_size: Block size in bytes
        :param total_size: The total size of the file. This may be -1 on older FTP servers which do not return
                            a file size in response to a retrieval request.
        """
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

        
def download_extract():
    """
    Download and extract database
    
    """
    url = 'http://files.grouplens.org/datasets/movielens/ml-20m.zip'
    data_path = './data'
    
    if not os.path.exists(data_path):
        os.makedirs(data_path)
    
    file_path = os.path.join(data_path, 'ml-20m.zip')
    
    # download data:
    if not os.path.exists(file_path):
        with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Downloading ml-20m.zip') as pbar:
            urlretrieve(
                url,
                file_path,
                pbar.hook)
    else:
        return
    
    print('Extracting data...')
    with zipfile.ZipFile(file_path) as zf:
        zf.extractall(data_path)

    print('Done.')

#-------------------------------------------------------
download_extract()

Downloading ml-20m.zip: 199MB [02:00, 1.65MB/s]                              


Extracting data...
Done.


# 2. Preview Data
Load data and do some simple statistical analysis

In [2]:
import pandas as pd
