pythonPackages.pyocr: 0.5.3 -> 0.7.2
This commit is contained in:
parent
99dfc57bce
commit
bcb40a5f04
@ -1,10 +1,10 @@
|
||||
{ lib, fetchFromGitLab, buildPythonPackage, pillow, six
|
||||
, tesseract, cuneiform, isPy3k, substituteAll, pytest, tox
|
||||
}:
|
||||
{ lib, fetchFromGitLab, buildPythonPackage, pillow, setuptools_scm,
|
||||
setuptools-scm-git-archive , tesseract, cuneiform, isPy3k, substituteAll,
|
||||
pytest, tox }:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "pyocr";
|
||||
version = "0.5.3";
|
||||
version = "0.7.2";
|
||||
disabled = !isPy3k;
|
||||
|
||||
# Don't fetch from PYPI because it doesn't contain tests.
|
||||
@ -14,7 +14,7 @@ buildPythonPackage rec {
|
||||
owner = "OpenPaperwork";
|
||||
repo = "pyocr";
|
||||
rev = version;
|
||||
sha256 = "1nihf0qmbpg3yj3yp11jp6hp5z5dqf39nz6j9lqbvgi1nqbs7x15";
|
||||
sha256 = "09ab86bmizpv94w3mdvdqkjyyvk1vafw3jqhkiw5xx7p180xn3il";
|
||||
};
|
||||
|
||||
patches = [ (substituteAll {
|
||||
@ -23,38 +23,8 @@ buildPythonPackage rec {
|
||||
})
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
echo 'version = "${version}"' > src/pyocr/_version.py
|
||||
|
||||
# Disable specific tests that are probably failing because of this issue:
|
||||
# https://github.com/jflesch/pyocr/issues/52
|
||||
for test in $disabledTests; do
|
||||
file="''${test%%:*}"
|
||||
fun="''${test#*:}"
|
||||
echo "import pytest" >> "tests/tests_$file.py"
|
||||
echo "$fun = pytest.mark.skip($fun)" >> "tests/tests_$file.py"
|
||||
done
|
||||
'';
|
||||
|
||||
disabledTests = [
|
||||
"cuneiform:TestTxt.test_basic"
|
||||
"cuneiform:TestTxt.test_european"
|
||||
"cuneiform:TestTxt.test_french"
|
||||
"cuneiform:TestWordBox.test_basic"
|
||||
"cuneiform:TestWordBox.test_european"
|
||||
"cuneiform:TestWordBox.test_french"
|
||||
"libtesseract:TestBasicDoc.test_basic"
|
||||
"libtesseract:TestDigitLineBox.test_digits"
|
||||
"libtesseract:TestLineBox.test_japanese"
|
||||
"libtesseract:TestTxt.test_japanese"
|
||||
"libtesseract:TestWordBox.test_japanese"
|
||||
"libtesseract:TestTxt.test_multi"
|
||||
"tesseract:TestTxt.test_multi"
|
||||
"tesseract:TestDigitLineBox.test_digits"
|
||||
"tesseract:TestTxt.test_japanese"
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [ pillow six ];
|
||||
buildInputs = [ setuptools_scm setuptools-scm-git-archive ];
|
||||
propagatedBuildInputs = [ pillow ];
|
||||
checkInputs = [ pytest tox ];
|
||||
checkPhase = "pytest";
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
Index: current/src/pyocr/cuneiform.py
|
||||
===================================================================
|
||||
--- current.orig/src/pyocr/cuneiform.py
|
||||
+++ current/src/pyocr/cuneiform.py
|
||||
@@ -27,13 +27,9 @@ from . import error
|
||||
from . import util
|
||||
diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py
|
||||
index 2e5b717..35647e2 100644
|
||||
--- a/src/pyocr/cuneiform.py
|
||||
+++ b/src/pyocr/cuneiform.py
|
||||
@@ -25,13 +25,9 @@ from . import builders
|
||||
from .error import CuneiformError
|
||||
|
||||
|
||||
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
||||
@ -18,25 +18,34 @@ Index: current/src/pyocr/cuneiform.py
|
||||
|
||||
LANGUAGES_LINE_PREFIX = "Supported languages: "
|
||||
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
|
||||
Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
||||
===================================================================
|
||||
--- current.orig/src/pyocr/libtesseract/tesseract_raw.py
|
||||
+++ current/src/pyocr/libtesseract/tesseract_raw.py
|
||||
@@ -1,52 +1,13 @@
|
||||
import ctypes
|
||||
diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
|
||||
index a068e73..9ebea5c 100644
|
||||
--- a/src/pyocr/libtesseract/tesseract_raw.py
|
||||
+++ b/src/pyocr/libtesseract/tesseract_raw.py
|
||||
@@ -2,7 +2,6 @@ import ctypes
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
-import sys
|
||||
|
||||
from ..error import TesseractError
|
||||
|
||||
|
||||
@@ -10,48 +9,16 @@ from ..error import TesseractError
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
-TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
|
||||
TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
|
||||
-libnames = []
|
||||
+if TESSDATA_PREFIX is None:
|
||||
+ TESSDATA_PREFIX = '@tesseract@/share/tessdata'
|
||||
+ os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
|
||||
+
|
||||
+
|
||||
# 70 is the minimum credible dpi for tesseract and force it to compute an
|
||||
# estimate of the image dpi
|
||||
DPI_DEFAULT = 70
|
||||
|
||||
-
|
||||
-if getattr(sys, 'frozen', False):
|
||||
-if getattr(sys, 'frozen', False): # pragma: no cover
|
||||
- # Pyinstaller integration
|
||||
- libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")]
|
||||
- libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")]
|
||||
@ -51,7 +60,7 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
||||
- TESSDATA_PREFIX = tessdata
|
||||
-
|
||||
-
|
||||
-if sys.platform[:3] == "win":
|
||||
-if sys.platform[:3] == "win": # pragma: no cover
|
||||
- libnames += [
|
||||
- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
|
||||
- # Windows ?
|
||||
@ -76,15 +85,16 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
||||
|
||||
g_libtesseract = None
|
||||
|
||||
@@ -346,12 +307,11 @@ def init(lang=None):
|
||||
@@ -364,12 +331,12 @@ def init(lang=None):
|
||||
try:
|
||||
if lang:
|
||||
lang = lang.encode("utf-8")
|
||||
- prefix = None
|
||||
- if TESSDATA_PREFIX:
|
||||
- if TESSDATA_PREFIX: # pragma: no cover
|
||||
- prefix = TESSDATA_PREFIX.encode("utf-8")
|
||||
+ prefix = os.getenv('TESSDATA_PREFIX', '@tesseract@/share/tessdata')
|
||||
+ os.environ['TESSDATA_PREFIX'] = prefix
|
||||
+
|
||||
+ prefix = TESSDATA_PREFIX
|
||||
+
|
||||
g_libtesseract.TessBaseAPIInit3(
|
||||
ctypes.c_void_p(handle),
|
||||
- ctypes.c_char_p(prefix),
|
||||
@ -92,11 +102,11 @@ Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
||||
ctypes.c_char_p(lang)
|
||||
)
|
||||
g_libtesseract.TessBaseAPISetVariable(
|
||||
Index: current/src/pyocr/tesseract.py
|
||||
===================================================================
|
||||
--- current.orig/src/pyocr/tesseract.py
|
||||
+++ current/src/pyocr/tesseract.py
|
||||
@@ -31,8 +31,7 @@ from .builders import DigitBuilder # ba
|
||||
diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py
|
||||
index 7c30852..44e8446 100644
|
||||
--- a/src/pyocr/tesseract.py
|
||||
+++ b/src/pyocr/tesseract.py
|
||||
@@ -28,8 +28,7 @@ from .builders import DigitBuilder # backward compatibility
|
||||
from .error import TesseractError # backward compatibility
|
||||
from .util import digits_only
|
||||
|
||||
@ -106,3 +116,233 @@ Index: current/src/pyocr/tesseract.py
|
||||
|
||||
TESSDATA_EXTENSION = ".traineddata"
|
||||
|
||||
diff --git a/tests/tests_cuneiform.py b/tests/tests_cuneiform.py
|
||||
index 45b7f6a..95f55c6 100644
|
||||
--- a/tests/tests_cuneiform.py
|
||||
+++ b/tests/tests_cuneiform.py
|
||||
@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest):
|
||||
# XXX is it useful?
|
||||
which.return_value = True
|
||||
self.assertTrue(cuneiform.is_available())
|
||||
- which.assert_called_once_with("cuneiform")
|
||||
+ which.assert_called_once_with("@cuneiform@/bin/cuneiform")
|
||||
|
||||
@patch("subprocess.Popen")
|
||||
def test_version(self, popen):
|
||||
@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest):
|
||||
self.assertIn("eng", langs)
|
||||
self.assertIn("fra", langs)
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-l"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-l"],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestCuneiformTxt(BaseTest):
|
||||
output = cuneiform.image_to_string(self.image)
|
||||
self.assertEqual(output, self._get_file_content("text").strip())
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -125,7 +125,7 @@ class TestCuneiformTxt(BaseTest):
|
||||
builder=self.builder)
|
||||
self.assertEqual(output, self._get_file_content("text").strip())
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
|
||||
+ ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
|
||||
"-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
@@ -142,7 +142,7 @@ class TestCuneiformTxt(BaseTest):
|
||||
builder=self.builder)
|
||||
self.assertEqual(output, self._get_file_content("text").strip())
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -173,7 +173,7 @@ class TestCuneiformTxt(BaseTest):
|
||||
output = cuneiform.image_to_string(image, builder=self.builder)
|
||||
self.assertEqual(output, self._get_file_content("text").strip())
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -227,7 +227,7 @@ class TestCuneiformWordBox(BaseTest):
|
||||
output = cuneiform.image_to_string(self.image,
|
||||
builder=self.builder)
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -280,7 +280,7 @@ class TestCuneiformLineBox(BaseTest):
|
||||
output = cuneiform.image_to_string(self.image,
|
||||
builder=self.builder)
|
||||
popen.assert_called_once_with(
|
||||
- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
|
||||
+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
diff --git a/tests/tests_libtesseract.py b/tests/tests_libtesseract.py
|
||||
index ad7fdc9..57e7a60 100644
|
||||
--- a/tests/tests_libtesseract.py
|
||||
+++ b/tests/tests_libtesseract.py
|
||||
@@ -165,7 +165,8 @@ class TestLibTesseractRaw(BaseTest):
|
||||
args = libtess.TessBaseAPIInit3.call_args[0]
|
||||
self.assertEqual(len(args), 3)
|
||||
self.assertEqual(args[0].value, self.handle)
|
||||
- self.assertEqual(args[1].value, None)
|
||||
+ # we hardcode tesseract data, so we don't get None
|
||||
+ #self.assertEqual(args[1].value, None)
|
||||
self.assertEqual(args[2].value, lang.encode() if lang else None)
|
||||
|
||||
self.assertEqual(
|
||||
@@ -201,7 +202,8 @@ class TestLibTesseractRaw(BaseTest):
|
||||
args = libtess.TessBaseAPIInit3.call_args[0]
|
||||
self.assertEqual(len(args), 3)
|
||||
self.assertEqual(args[0].value, self.handle)
|
||||
- self.assertEqual(args[1].value, None)
|
||||
+ # we hardcode tesseract data, so we don't get None
|
||||
+ #self.assertEqual(args[1].value, None)
|
||||
self.assertEqual(args[2].value, lang.encode() if lang else None)
|
||||
|
||||
self.assertEqual(
|
||||
diff --git a/tests/tests_tesseract.py b/tests/tests_tesseract.py
|
||||
index 1a55567..a24d96f 100644
|
||||
--- a/tests/tests_tesseract.py
|
||||
+++ b/tests/tests_tesseract.py
|
||||
@@ -36,7 +36,7 @@ class TestTesseract(BaseTest):
|
||||
def test_available(self, which):
|
||||
which.return_value = True
|
||||
self.assertTrue(tesseract.is_available())
|
||||
- which.assert_called_once_with("tesseract")
|
||||
+ which.assert_called_once_with("@tesseract@/bin/tesseract")
|
||||
|
||||
@patch("subprocess.Popen")
|
||||
def test_version_error(self, popen):
|
||||
@@ -156,7 +156,7 @@ class TestTesseract(BaseTest):
|
||||
for lang in ("eng", "fra", "jpn", "osd"):
|
||||
self.assertIn(lang, langs)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "--list-langs"],
|
||||
+ ["@tesseract@/bin/tesseract", "--list-langs"],
|
||||
startupinfo=None, creationflags=0,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -171,7 +171,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(te.exception.status, 1)
|
||||
self.assertEqual("unable to get languages", te.exception.message)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "--list-langs"],
|
||||
+ ["@tesseract@/bin/tesseract", "--list-langs"],
|
||||
startupinfo=None, creationflags=0,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
@@ -248,7 +248,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(status, 0)
|
||||
self.assertEqual(error, message)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "output"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "output"],
|
||||
cwd=tmpdir,
|
||||
startupinfo=None,
|
||||
creationflags=0,
|
||||
@@ -271,7 +271,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(status, 0)
|
||||
self.assertEqual(error, message)
|
||||
popen.assert_called_with(
|
||||
- ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
|
||||
+ ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
|
||||
cwd=tmpdir,
|
||||
startupinfo=None,
|
||||
creationflags=0,
|
||||
@@ -302,7 +302,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(result["angle"], 90)
|
||||
self.assertEqual(result["confidence"], 9.30)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -338,7 +338,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(result["angle"], 90)
|
||||
self.assertEqual(result["confidence"], 9.30)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -371,7 +371,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(result["angle"], 90)
|
||||
self.assertEqual(result["confidence"], 9.30)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout",
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout",
|
||||
"--psm", "0", "-l", "osd"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
@@ -399,7 +399,7 @@ class TestTesseract(BaseTest):
|
||||
with self.assertRaises(tesseract.TesseractError) as te:
|
||||
tesseract.detect_orientation(self.image)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -433,7 +433,7 @@ class TestTesseract(BaseTest):
|
||||
with self.assertRaises(tesseract.TesseractError) as te:
|
||||
tesseract.detect_orientation(self.image)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -467,7 +467,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(result["angle"], 90)
|
||||
self.assertEqual(result["confidence"], 9.30)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -500,7 +500,7 @@ class TestTesseract(BaseTest):
|
||||
self.assertEqual(result["angle"], 90)
|
||||
self.assertEqual(result["confidence"], 9.30)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -527,7 +527,7 @@ class TestTesseract(BaseTest):
|
||||
with self.assertRaises(tesseract.TesseractError) as te:
|
||||
tesseract.detect_orientation(self.image)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
@@ -561,7 +561,7 @@ class TestTesseract(BaseTest):
|
||||
with self.assertRaises(tesseract.TesseractError) as te:
|
||||
tesseract.detect_orientation(self.image)
|
||||
popen.assert_called_once_with(
|
||||
- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
|
||||
stdin=subprocess.PIPE,
|
||||
shell=False,
|
||||
startupinfo=None,
|
||||
|
Loading…
Reference in New Issue
Block a user