In [1]:
import urllib, os
from tqdm import tqdm
import requests
from zipfile import ZipFile
import time
import yaml
import sys

In [2]:
yaml_path = "/home/vajira/DL/singan-polyp-aug/config.yaml"

In [3]:
with open(yaml_path) as f:
    output = yaml.safe_load(f)

In [4]:
output

{'links': {'link1': 'https://www.dropbox.com/s/yc3tn8sgn3m0v3r/TrainedModels_1_clean.zip?dl=1',
  'link2': 'https://www.dropbox.com/s/0i7io4tkpoccmq4/TrainedModels_2_clean.zip?dl=1',
  'link3': 'https://www.dropbox.com/s/hsdpkifid9prtst/TrainedModels_3_clean.zip?dl=1',
  'link4': 'https://www.dropbox.com/s/hzu470zcmy5ygf1/TrainedModels_4_clean.zip?dl=1'}}

In [5]:
def load_configs()->dict:
    
    with open("config.yaml") as f:
        output = yaml.safe_load(f)
    f.close()
    
    return dict(output)
    

In [6]:
configs = load_configs()

In [7]:
configs["links"]

{'link1': 'https://www.dropbox.com/s/yc3tn8sgn3m0v3r/TrainedModels_1_clean.zip?dl=1',
 'link2': 'https://www.dropbox.com/s/0i7io4tkpoccmq4/TrainedModels_2_clean.zip?dl=1',
 'link3': 'https://www.dropbox.com/s/hsdpkifid9prtst/TrainedModels_3_clean.zip?dl=1',
 'link4': 'https://www.dropbox.com/s/hzu470zcmy5ygf1/TrainedModels_4_clean.zip?dl=1'}

In [8]:
def extract_zip_file(zip_path:str, dst_dir:str):
    
    print("=== Extracting files ===")
    time.sleep(2)
    with ZipFile(file=zip_path) as zip_file:

        # Loop over each file
        for file in tqdm(iterable=zip_file.namelist(), total=len(zip_file.namelist())):

            # Extract each file to another directory
            # If you want to extract to current working directory, don't specify path
            zip_file.extract(member=file, path=dst_dir)

In [9]:
def download_and_extract_single_file(url:str, path_to_extract:str, extracting:bool = True, clean:bool =False):
    
    response = getattr(urllib, 'request', urllib).urlopen(url)
    
    filesize = int(response.headers.get('content-length', 0))#int(requests.head(url).headers["Content-Length"])
    chunk_size = 1024
    
    filename = os.path.basename(url).split("?")[0]
    os.makedirs(path_to_extract, exist_ok=True)
    
    abs_path = os.path.join(path_to_extract, filename)
    
    directory = abs_path[:-4]
    
    #print(directory)
    
    if os.path.exists(directory) and os.path.isdir(directory):
        print(f"The directory:{directory} is already exists.")
        return directory
    
    elif os.path.exists(abs_path):
        print(f"The zip file: {abs_path} is already exists.")
        
        if extracting:
            print("Extracting TRUE...!")
            extract_zip_file(abs_path, path_to_extract)
        return directory
    
    else:
    
   
        with requests.get(url, stream=True) as r, open(abs_path, "wb") as f, tqdm(
            unit="B",  # unit string to be displayed.
            unit_scale=True,  # let tqdm to determine the scale in kilo, mega..etc.
            unit_divisor=1024,  # is used when unit_scale is true
            total=filesize,  # the total iteration.
            file=sys.stdout,  # default goes to stderr, this is the display on console.
            desc=filename  # prefix to be displayed on progress bar.
        ) as progress:
            for chunk in r.iter_content(chunk_size=chunk_size):
                # download the file chunk by chunk
                datasize = f.write(chunk)
                # on each chunk update the progress bar.
                progress.update(datasize)
        f.close()
        
        if extracting:
            print("Extracting TRUE...!")
            extract_zip_file(abs_path, path_to_extract)
        
    if clean:
        os.remove(abs_path)
        
    return directory

In [10]:
def prepare_checkpoints(path_to_checkpoints:str, link_keys=["link1", "link2"],*args, **kwargs)-> str:
    
    all_links = load_configs()["links"]
    
    for link_key in link_keys:
        print(all_links[link_key])
        download_link = all_links[link_key]
        
        download_and_extract_single_file(download_link, path_to_checkpoints)

In [11]:
prepare_checkpoints("/home/vajira/DL/temp_data/")

https://www.dropbox.com/s/yc3tn8sgn3m0v3r/TrainedModels_1_clean.zip?dl=1
The directory:/home/vajira/DL/temp_data/TrainedModels_1_clean is already exists.
https://www.dropbox.com/s/0i7io4tkpoccmq4/TrainedModels_2_clean.zip?dl=1
The directory:/home/vajira/DL/temp_data/TrainedModels_2_clean is already exists.
