In [None]:
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont
from wand.image import Image
from wand.color import Color
import os
import uuid
import nbformat
import cv2

DEFAULT_IMAGE_SIZE = 400
MAX_LINES_PER_IMAGE= 20

def generate_unique_filename(directory, extension):
    while True:
        unique_name = f"{uuid.uuid4()}.{extension}"
        filename = os.path.join(directory, unique_name)
        if not os.path.exists(filename):
            return filename

def is_text_file(file_path, block_size=512):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            file.read(block_size)
        return True
    except UnicodeDecodeError:
        return False
    except IOError:
        return False

def create_image_preview(input_file, output_file, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE):
    with Image(filename=f"{input_file}[{width}x{height}]") as img:
        img.save(filename=output_file)

def create_pdf_preview(input_file, output_file, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE):
    # TODO: Fail to find a way to extract first page and size transform in a single Image call
    with Image(filename=f'{input_file}[0]', resolution=300) as img:
        img = img.sequence[0]
        with Image(img) as single_page:
            single_page.format = 'png'
            single_page.background_color = single_page.background_color
            single_page.alpha_channel = 'remove'
            single_page.transform(resize=f'{width}x{height}>')
            single_page.save(filename=output_file)

def image_to_text(text, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE, font_size=20):
    image = PIL.Image.new('RGB', (width, height), color=(255, 255, 255))
    draw = PIL.ImageDraw.Draw(image)
    
    try:
        # font = PIL.ImageFont.truetype("arial.ttf", font_size)
        font = PIL.ImageFont.truetype("Symbola.ttf", font_size)
    except IOError:
        font = PIL.ImageFont.load_default()

    text_lines = text.split('\n')
    y_text = 10
    for line in text_lines:
        # 각 줄의 텍스트 바운딩 박스 계산
        bbox = draw.textbbox((0, 0), line, font=font)
        width_text = bbox[2] - bbox[0]
        height_text = bbox[3] - bbox[1]
        
        if y_text + height_text > height:
            break  # 이미지 높이를 초과하면 그만둠
        
        draw.text((10, y_text), line, font=font, fill=(0, 0, 0))
        y_text += height_text
    return image

def create_text_preview(input_file, output_file, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE, font_size=20):
    with open(input_file, 'r', encoding='utf-8') as file:
        # TODO: Read a few lines to save time when file is too big.
        text = file.read()
    image_to_text(text, width, height, font_size).save(output_file)

def create_ipynb_preview(input_file, output_file, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE, font_size=20):
    with open(input_file, 'r', encoding='utf-8') as file:
        notebook = nbformat.read(file, as_version=4)

    text = ""
    number_of_lines = 0
    for cell in notebook.cells:
        if cell.cell_type == 'code':
            text += f"### Code Cell\n{cell.source}\n"
            number_of_lines += 2

            if 'outputs' in cell:
                for output in cell.outputs:
                    if 'text' in output:
                        text += f"### Output\n{output['text']}\n"
                        number_of_lines += 2
                    elif 'data' in output and 'text/plain' in output['data']:
                        text += f"### Output\n{output['data']['text/plain']}\n"
                        number_of_lines += 2
                    if number_of_lines > MAX_LINES_PER_IMAGE:
                        break
        elif cell.cell_type == 'markdown':
            text += f"### Markdown Cell\n{cell.source}\n"
            number_of_lines += 2
        if number_of_lines > MAX_LINES_PER_IMAGE:
            break
    image_to_text(text, width, height, font_size).save(output_file)

def create_video_preview(input_file, output_file, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE):
    video = cv2.VideoCapture(input_file)
    success, frame = video.read()
    
    if success:
        resized_frame = cv2.resize(frame, (width, height))
        cv2.imwrite(output_file, resized_frame)
    else:
        raise ValueError(f"Fail to extract frame from video: {input_file}")
    
    video.release()

def create_preview(input_file, output_file=None, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE):
    if not output_file:
        output_file = generate_unique_filename("./output", "jpg")
    file_extension = input_file.split('.')[-1].lower()
    
    if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'svg']:
        create_image_preview(input_file, output_file, width, height)
    elif file_extension == 'pdf':
        create_pdf_preview(input_file, output_file, width, height)
    elif file_extension in ['txt', 'md']:
        create_text_preview(input_file, output_file, width, height)
    elif file_extension in ['ipynb']:
        create_ipynb_preview(input_file, output_file, width, height)
    elif file_extension in ['mov', 'mp4']:
        create_video_preview(input_file, output_file, width, height)
    else:
        if is_text_file(input_file):
            create_text_preview(input_file, output_file, width, height)
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
    return output_file


In [None]:
# print(create_preview('./data/[숙제]네이버 영화_ 클레멘타인 영화평점_댓글 가져오기.ipynb'))
# print(create_preview('./data/2304.12244.pdf'))
# print(create_preview('./data/changelog.txt'))
# print(create_preview('./data/KTC_User_Guide-24.03.pdf'))
# print(create_preview('./data/LICENSE'))
# print(create_preview('./data/output.jsonl'))
# print(create_preview('./data/prof.jpg'))
# print(create_preview('./data/pyproject.toml'))
# print(create_preview('./data/README.md'))
# print(create_preview('./data/SVG_Logo.svg'))
print(create_preview('./data/ts_480.mov'))
print(create_preview('./data/ts.mp4'))
print(create_preview('./data/utils.py'))
print(create_preview('./data/환자표본자료 변수설명서(2020년).xlsx'))
