In [None]:
import sys
lib_path = ['/home/wangyh/uro_biomarker/python-wsi-preprocessing/deephistopath/wsi','/home/wangyh/uro_biomarker/python-wsi-preprocessing']
for i in lib_path:
    sys.path.append(i)
    
import importlib
import glob
import pandas as pd
import numpy as np
import tifffile as tif
from func import basic
from func import visualization
from matplotlib import pyplot as plt
from pathlib import Path
import multiprocessing
from deephistopath.wsi.util import Time
import math
import PIL
%matplotlib inline
importlib.reload(visualization)

In [None]:
import filter
import slide as sl
import tiles
import util

In [None]:
df = pd.read_csv('../config/full.csv')
THUMBNAIL_SIZE = 300
SCALE_FACTOR = 32

def get_num_training_slides():
    '''
    Obtain the total number of WSI training slide images.

    Returns:
    The total number of WSI training slide images.
    '''
    uuid = df['dir_uuid'][:2]
    return len(uuid)


def get_training_slide_path(slide_number):
    """
    Convert slide number to a path to the corresponding WSI training slide file.

    Args:
    slide_number: the index of slide.

    Returns:
    Path to the WSI training slide file.
    """
    svs_path = df['svs_paths']
    slide_filepath = svs_path[slide_number]
    return slide_filepath


def save_thumbnail(pil_img, size, path, display_path=False):
    """
    Save a thumbnail of a PIL image, specifying the maximum width or height of the thumbnail.

    Args:
    pil_img: The PIL image to save as a thumbnail.
    size:  The maximum width or height of the thumbnail.
    path: The path to the thumbnail.
    display_path: If True, display thumbnail path in console.
    """
    max_size = tuple(round(size * d / max(pil_img.size)) for d in pil_img.size)
    img = pil_img.resize(max_size, PIL.Image.BILINEAR)
    if display_path:
        print("Saving thumbnail to: " + path)
#     dir = os.path.dirname(path)
#     if dir != '' and not os.path.exists(dir):
#         os.makedirs(dir)
    img.save(path)


def slide_to_scaled_pil_image(slide_number):
    """
    Convert one single WSI training slide to a scaled-down PIL image.

    Args:
    slide_number: The index of slide.

    Returns:
    Tuple consisting of scaled-down PIL image, original width, original height, new width, and new height.
    """
    slide_filepath = get_training_slide_path(slide_number)
    print("Opening Slide #%d: %s" % (slide_number, slide_filepath))
    slide = sl.open_slide(slide_filepath)

    large_w, large_h = slide.dimensions
    new_w = math.floor(large_w / SCALE_FACTOR)
    new_h = math.floor(large_h / SCALE_FACTOR)
    level = slide.get_best_level_for_downsample(SCALE_FACTOR)
    whole_slide_image = slide.read_region((0, 0), level, slide.level_dimensions[level])
    whole_slide_image = whole_slide_image.convert("RGB")
    img = whole_slide_image.resize((new_w, new_h), PIL.Image.BILINEAR)
    return img, large_w, large_h, new_w, new_h


def get_training_image_path(slide_number, large_w=None, large_h=None, small_w=None, small_h=None):
    """
    Convert slide number and optional dimensions to a training image path. If no dimensions are supplied,
    the corresponding file based on the slide number will be looked up in the file system using a wildcard.

    Example:
    5 -> ../data/training_png/TUPAC-TR-005-32x-49920x108288-1560x3384.png

    Args:
    slide_number: The slide number.
    large_w: Large image width.
    large_h: Large image height.
    small_w: Small image width.
    small_h: Small image height.

    Returns:
     Path to the image file.
    """
    uuid = df['dir_uuid']
    img_path = '/mnt/wangyh/svs_thumb_img/' + f'{slide_number}' +'.png'
    return img_path

def get_training_thumbnail_path(slide_number, large_w=None, large_h=None, small_w=None, small_h=None):
    """
    Convert slide number and optional dimensions to a training thumbnail path. If no dimensions are
    supplied, the corresponding file based on the slide number will be looked up in the file system using a wildcard.

    Example:
    5 -> ../data/training_thumbnail_jpg/TUPAC-TR-005-32x-49920x108288-1560x3384.jpg

    Args:
    slide_number: The slide number.
    large_w: Large image width.
    large_h: Large image height.
    small_w: Small image width.
    small_h: Small image height.

    Returns:
     Path to the thumbnail file.
    """
    uuid = df['dir_uuid']
    img_path = '/mnt/wangyh/svs_thumb_img/' + f'{slide_number}' +'.jpg'  ### 加一个后缀
    return img_path


def training_slide_to_image(slide_number):
    """
    Convert a WSI training slide to a saved scaled-down image in a format such as jpg or png.

    Args:
    slide_num: index of the slide
    """
    # sldie_to_pil_img
    img, large_w, large_h, new_w, new_h = slide_to_scaled_pil_image(slide_number)

    img_path = get_training_image_path(slide_number, large_w, large_h, new_w, new_h)
    print("Saving image to: " + img_path)
    img.save(img_path)

    thumbnail_path = get_training_thumbnail_path(slide_number, large_w, large_h, new_w, new_h)
    save_thumbnail(img, THUMBNAIL_SIZE, thumbnail_path)


def training_slide_range_to_images(start_ind, end_ind):
    """
    Convert a range of WSI training slides to smaller images (in a format such as jpg or png).

    Args:
    start_ind: Starting index (inclusive).
    end_ind: Ending index (inclusive).

    Returns:
    The starting index and the ending index of the slides that were converted.
    """
    print('enter training slide range\n')
    for slide_num in range(start_ind, end_ind + 1):
        training_slide_to_image(slide_num)
    return (start_ind, end_ind)


def multiprocess_training_slides_to_images():
    """
    Convert all WSI training slides to smaller images using multiple processes (one process per core).
    Each process will process a range of slide numbers.
    """
    timer = Time()

    # how many processes to use
    num_processes = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(num_processes)

    num_train_images = get_num_training_slides()
    if num_processes > num_train_images:
        num_processes = num_train_images
    images_per_process = num_train_images / num_processes

    print("Number of processes: " + str(num_processes))
    print("Number of training images: " + str(num_train_images))

    # each task specifies a range of slides
    tasks = []
    for num_process in range(1, num_processes + 1):
        start_index = (num_process - 1) * images_per_process + 1
        end_index = num_process * images_per_process
        start_index = int(start_index)
        end_index = int(end_index)
        tasks.append((start_index, end_index))
        if start_index == end_index:
            print("Task #" + str(num_process) + ": Process slide " + str(start_index))
        else:
            print("Task #" + str(num_process) + ": Process slides " + str(start_index) + " to " + str(end_index))

      # start tasks
    results = []
    for t in tasks:
    #TODO:training_slide_range_to_image
        results.append(pool.apply_async(training_slide_range_to_images, t))

    for result in results:
        (start_ind, end_ind) = result.get()
        if start_ind == end_ind:
            print("Done converting slide %d" % start_ind)
        else:
            print("Done converting slides %d through %d" % (start_ind, end_ind))

    timer.elapsed_display()
    
def singleprocess_training_slides_to_images():
    """
    Convert all WSI training slides to smaller images using a single process.
    """
    t = Time()

    num_train_images = get_num_training_slides()
    training_slide_range_to_images(1, num_train_images)

    t.elapsed_display()

In [None]:
singleprocess_training_slides_to_images()