Permalink
Browse files

added OCR test with pytesser

  • Loading branch information...
1 parent 059440a commit b52f7358019c2474a271e5150ef2e18d9c819887 @metadirective metadirective committed Apr 29, 2014
Showing with 16 additions and 2 deletions.
  1. +16 −2 videoFileAnalyser.py
View
@@ -13,13 +13,18 @@
import ImageChops, Image
import math, shutil
+# Only useful if OCRanalyse=True
+# https://code.google.com/p/pytesser/
+from pytesser import *
+
# Variables
sizeChangeThreshold=5000 # used only in File Size change comparison method
picsExtraction = True # extract pictures from video files
videoFilePath="enregistrement-video.mp4" # name of the video to analyse
extractionInterval= "0.1" # 1 / time interval : 1= each seconds, 0.1 each 10 secs...
outputPath="tests" # Output folder
cleanFiles=True # remove extacted files to only keep different ones renamed D1.jpg, D2.jpg, etc
+OCRanalyse=True # perform OCR on "slides"
print "> videoFileAnalyser started..."
@@ -67,7 +72,7 @@ def rmsdiff(im1, im2):
if diffSize > sizeChangeThreshold:
slideIndex+=1
#print ">>>>>>>>>>>>>> " + str(fileName)
- shutil.copy2(outputPath+"/"+fileName, outputPath+"/D"+str(slideIndex)+".jpg")
+ shutil.copy2(outputPath+"/"+fileName, outputPath+"/D"+str(slideIndex)+".jpg")
previousFileSize=currentFileSize
if 0: # Root Mean Square comparison method (don't work well on my test sample)
@@ -86,4 +91,13 @@ def rmsdiff(im1, im2):
if "D" in fileName:
pass
else:
- os.remove(outputPath+"/"+fileName)
+ os.remove(outputPath+"/"+fileName)
+
+if OCRanalyse==True:
+ print "> starting OCR with pytesser"
+ for fileName in (os.listdir(outputPath)):
+ im = Image.open(outputPath+"/"+fileName)
+ text = image_to_string(im)
+ print "############## "+fileName+" ############"
+ print text
+ print "#####################################"

0 comments on commit b52f735

Please sign in to comment.