2024-08-30 16:38:22 +03:00
|
|
|
# ...
|
|
|
|
#
|
|
|
|
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
|
|
|
|
#
|
|
|
|
# This work is licensed under the terms of the GNU GPL, version 2 or
|
|
|
|
# later. See the COPYING file in the top-level directory.
|
|
|
|
|
|
|
|
import re
|
|
|
|
import logging
|
|
|
|
|
|
|
|
from . import has_cmd, run_cmd
|
|
|
|
|
|
|
|
def tesseract_available(expected_version):
|
2024-09-10 10:58:20 +03:00
|
|
|
(has_tesseract, _) = has_cmd('tesseract')
|
|
|
|
if not has_tesseract:
|
2024-08-30 16:38:22 +03:00
|
|
|
return False
|
|
|
|
(stdout, stderr, ret) = run_cmd([ 'tesseract', '--version'])
|
|
|
|
if ret:
|
|
|
|
return False
|
|
|
|
version = stdout.split()[1]
|
|
|
|
return int(version.split('.')[0]) >= expected_version
|
|
|
|
|
|
|
|
def tesseract_ocr(image_path, tesseract_args=''):
|
|
|
|
console_logger = logging.getLogger('console')
|
|
|
|
console_logger.debug(image_path)
|
|
|
|
(stdout, stderr, ret) = run_cmd(['tesseract', image_path,
|
|
|
|
'stdout'])
|
|
|
|
if ret:
|
|
|
|
return None
|
|
|
|
lines = []
|
|
|
|
for line in stdout.split('\n'):
|
|
|
|
sline = line.strip()
|
|
|
|
if len(sline):
|
|
|
|
console_logger.debug(sline)
|
|
|
|
lines += [sline]
|
|
|
|
return lines
|