In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy import signal as scps
import os
import csv
import pytesseract


In [2]:
manga1 = cv2.imread("manga1.jpg")
manga2 = cv2.imread("image1.jpg")

In [3]:
def findSpeechBubbles(binary):
    # Convert image to gray scale
    
    # Find contours and document their heirarchy for later
    contours, hierarchy = cv2.findContours(binary,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    contourMap = {}
    finalContourList = []

    contourMap = filterContoursBySize(contours)
    contourMap = filterContainingContours(contourMap, hierarchy)

    # Sort final contour list
    finalContourList = list(contourMap.values())
    finalContourList.sort(key=lambda x:get_contour_precedence(x, binary.shape[1]))

    return finalContourList

def filterContoursBySize(contours):
    # We could pass this in and update it by reference, but I prefer this sort of 'immutable' handling.
    contourMap = {}

    for i in range(len(contours)):
        # Filter out speech bubble candidates with unreasonable size
        if cv2.contourArea(contours[i]) < 120000 and cv2.contourArea(contours[i]) > 4000:
            # Smooth out contours that were found
            epsilon = 0.0025*cv2.arcLength(contours[i], True)
            approximatedContour = cv2.approxPolyDP(contours[i], epsilon, True)
            contourMap[i] = approximatedContour

    return contourMap

# Sometimes the contour algorithm identifies entire panels, which can contain speech bubbles already
#  identified causing us to parse them twice via OCR. This method attempts to remove contours that 
#  contain other speech bubble candidate contours completely inside of them.
def filterContainingContours(contourMap, hierarchy):
    # I really wish there was a better way to do this than this O(n^2) removal of all parents in
    #  the heirarchy of a contour, but with the number of contours found this is the only way I can
    #  think of to do this.
    for i in list(contourMap.keys()):
        currentIndex = i
        while hierarchy[0][currentIndex][3] > 0:
            if hierarchy[0][currentIndex][3] in contourMap.keys():
                contourMap.pop(hierarchy[0][currentIndex][3])
            currentIndex = hierarchy[0][currentIndex][3]

    # I'd prefer to handle this 'immutably' like above, but I'd rather not make an unnecessary copy of the dict.
    return contourMap

def get_contour_precedence(contour, cols):
    tolerance_factor = 200
    origin = cv2.boundingRect(contour)
    return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]

In [27]:
def resize(img):
    scale_percent = 35 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    # resize image
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized

In [4]:

img = cv2.imread('image1.jpg', cv2.IMREAD_UNCHANGED)


dst = cv2.GaussianBlur(img,(5,5),cv2.BORDER_DEFAULT)



 
scale_percent = 35 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
  
# resize image
resized = cv2.resize(dst, dim, interpolation = cv2.INTER_AREA)

"""cv2.imshow('ImageWindow', resized)
cv2.waitKey()"""

im_gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
binary = cv2.threshold(im_gray,170,255,cv2.THRESH_BINARY)[1]



bubbles = findSpeechBubbles(binary)

cv2.drawContours(im_gray, bubbles, -1, (0, 0, 255), 3)
cv2.imshow('ImageWindow', im_gray)

"""

cv2.waitKey()

for c in bubbles:
    rect = cv2.boundingRect(c)
    [x, y, w, h] = rect
    croppedImage = manga2[y:y+h, x:x+w]
    cv2.imshow('ImageWindow', croppedImage)
    cv2.waitKey()
"""
    

In [23]:
#text getting from picture with tesseract
from pytesseract import Output
from pytesseract import image_to_string
from deep_translator import GoogleTranslator

custom_config = r'-l eng+jpn --psm 6'

rgb = cv2.imread("images/proba1.png", cv2.IMREAD_UNCHANGED)
#rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#rgb1 = preprocess(rgb)

results = pytesseract.image_to_data(rgb,lang = "jpn", output_type=Output.DICT)

# loop over each of the individual text localizations
sentence_f = ""
for i in range(0, len(results["text"])):
# extract the bounding box coordinates of the text region from
# the current result
    x = results["left"][i]
    y = results["top"][i]
    w = results["width"][i]
    h = results["height"][i]
    
    
    # extract the OCR text itself along with the confidence of the
    # text localization
    text = results["text"][i]
    if(len(text.strip()) > 0 and text.strip().isascii()):
        sentence_f += text.strip().lower() + " "
sentence = sentence_f[:-1]
print(sentence)

translated = GoogleTranslator(source='auto', target='sl').translate(sentence)
print(translated)

    

the damn thin moved a oon a t fired.
prekleto tanek ganil a oon a t je streljal.


In [46]:
def remove_text(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,3))
    close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, close_kernel, iterations=1)

    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
    dilate = cv2.dilate(close, dilate_kernel, iterations=1)

    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        area = cv2.contourArea(c)
        if area > 800 and area < 15000:
            x,y,w,h = cv2.boundingRect(c)
            cv2.rectangle(image, (x, y), (x + w, y + h), (255,255,255), -1)

    cv2.imshow('image', image)
    cv2.waitKey()