diff --git a/pkgs/development/python-modules/kaldi-active-grammar/0001-stub.patch b/pkgs/development/python-modules/kaldi-active-grammar/0001-stub.patch new file mode 100644 index 000000000000..4ebe5472b44a --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/0001-stub.patch @@ -0,0 +1,156 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 66e6d49..78f7b42 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1,23 +1,6 @@ + cmake_minimum_required(VERSION 3.13.0) + project(kaldi_binaries) + +-include(ExternalProject) +-include(ProcessorCount) +- +-ProcessorCount(NCPU) +-if(NOT NCPU EQUAL 0) +- set(MAKE_FLAGS -j${NCPU}) +-endif() +- +-set(DST ${PROJECT_SOURCE_DIR}/kaldi_active_grammar/exec) +-if ("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin") +- set(DST ${DST}/macos/) +-elseif("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Linux") +- set(DST ${DST}/linux/) +-else() +- set(DST ${DST}/windows/) +-endif() +- + set(BINARIES + tools/openfst/bin/fstarcsort${CMAKE_EXECUTABLE_SUFFIX} + tools/openfst/bin/fstcompile${CMAKE_EXECUTABLE_SUFFIX} +@@ -29,63 +12,6 @@ set(LIBRARIES + src/lib/libkaldi-dragonfly${CMAKE_SHARED_LIBRARY_SUFFIX} + ) + +-# For MacOS, we handle all the "indirect" shared libraries manually. +-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin") +- list(APPEND LIBRARIES +- tools/openfst/lib/libfst${CMAKE_SHARED_LIBRARY_SUFFIX} +- tools/openfst/lib/libfstscript${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-base${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-chain${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-cudamatrix${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-decoder${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-feat${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-fstext${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-gmm${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-hmm${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-ivector${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-lat${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-lm${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-matrix${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-nnet2${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-nnet3${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-online2${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-rnnlm${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-transform${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-tree${CMAKE_SHARED_LIBRARY_SUFFIX} +- src/lib/libkaldi-util${CMAKE_SHARED_LIBRARY_SUFFIX} +- ) +-endif() +- +-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Windows") +- message(FATAL_ERROR "CMake build not supported on Windows") +- # FIXME: copy files? +- # https://cmake.org/cmake/help/latest/command/foreach.html +- # https://stackoverflow.com/questions/34799916/copy-file-from-source-directory-to-binary-directory-using-cmake +-endif() +- +-find_program(MAKE_EXE NAMES make gmake nmake) +- +-if(DEFINED ENV{INTEL_MKL_DIR}) +- # Default: INTEL_MKL_DIR=/opt/intel/mkl/ +- message("Compiling with MKL in: $ENV{INTEL_MKL_DIR}") +- set(KALDI_CONFIG_FLAGS --shared --static-math --use-cuda=no --mathlib=MKL --mkl-root=$ENV{INTEL_MKL_DIR}) +- set(MATHLIB_BUILD_COMMAND true) +-else() +- message("Compiling with OpenBLAS") +- set(KALDI_CONFIG_FLAGS --shared --static-math --use-cuda=no --mathlib=OPENBLAS) +- set(MATHLIB_BUILD_COMMAND cd tools +- && git clone -b v0.3.13 --single-branch https://github.com/xianyi/OpenBLAS +- && ${MAKE_EXE} ${MAKE_FLAGS} -C OpenBLAS DYNAMIC_ARCH=1 TARGET=GENERIC USE_LOCKING=1 USE_THREAD=0 all +- && ${MAKE_EXE} ${MAKE_FLAGS} -C OpenBLAS PREFIX=install install +- && cd ..) +-endif() +- +-if(DEFINED ENV{KALDI_BRANCH}) +- set(KALDI_BRANCH $ENV{KALDI_BRANCH}) +-else() +- message(FATAL_ERROR "KALDI_BRANCH not set! Use 'origin/master'?") +- # set(KALDI_BRANCH "origin/master") +-endif() + + message("MAKE_EXE = ${MAKE_EXE}") + message("PYTHON_EXECUTABLE = ${PYTHON_EXECUTABLE}") +@@ -99,63 +25,4 @@ message("CMAKE_CURRENT_BINARY_DIR = ${CMAKE_CURRENT_BINARY_DIR}") + + # CXXFLAGS are set and exported in kaldi-configure-wrapper.sh + +-if(NOT "${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Windows") +- set(STRIP_LIBS_COMMAND find src/lib tools/openfst/lib -name *${CMAKE_SHARED_LIBRARY_SUFFIX} | xargs strip) +- set(STRIP_DST_COMMAND find ${DST} | xargs strip) +- if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin") +- list(APPEND STRIP_LIBS_COMMAND -x) +- list(APPEND STRIP_DST_COMMAND -x) +- endif() +- ExternalProject_Add(kaldi +- GIT_CONFIG advice.detachedHead=false +- GIT_REPOSITORY https://github.com/daanzu/kaldi-fork-active-grammar.git +- GIT_TAG ${KALDI_BRANCH} +- GIT_SHALLOW TRUE +- CONFIGURE_COMMAND sed -i.bak -e "s/status=0/exit 0/g" tools/extras/check_dependencies.sh && cp ${PROJECT_SOURCE_DIR}/building/kaldi-configure-wrapper.sh src/ +- BUILD_IN_SOURCE TRUE +- BUILD_COMMAND ${MATHLIB_BUILD_COMMAND} && cd tools && ${MAKE_EXE} ${MAKE_FLAGS} && cd openfst && autoreconf && cd ../../src && bash ./kaldi-configure-wrapper.sh ./configure ${KALDI_CONFIG_FLAGS} && ${MAKE_EXE} ${MAKE_FLAGS} depend && ${MAKE_EXE} ${MAKE_FLAGS} dragonfly dragonflybin bin fstbin lmbin +- LIST_SEPARATOR " " +- INSTALL_COMMAND ${STRIP_LIBS_COMMAND} && mkdir -p ${DST} && cp ${BINARIES} ${LIBRARIES} ${DST} +- ) +-endif() +- +-# Fix dynamic libraries loading paths on macOS. The libraries and +-# executables are built with RPATH settings embedded in them, pointing +-# to the locations in temporary directories used to build the +-# binaries. After package installation is done, these directories are +-# deleted and the dynamic libraries cannot be loaded. The following +-# commands generate a shell script that fixes the paths to the dynamic +-# libraries in the built executables and the libraries themselves. +-# Also the commands add a custom target to invoke the generated script +-# after the external project (kaldi) has been built. An alternative +-# would be to change the kaldi engine build system to accept a path to +-# where the binaries would be placed and point RPATH to that location. +-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin") +- string (REPLACE ";" " " BINARIES_STR "${BINARIES}") +- string (REPLACE ";" " " LIBRARIES_STR "${LIBRARIES}") +- file(GENERATE OUTPUT name_fixer +- CONTENT +- "for a in ${BINARIES_STR} ; do +- a_bare=$\{a##*/\} +- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfst.10.dylib \"@loader_path/libfst.dylib\" ${DST}$a_bare +- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfstscript.10.dylib \"@loader_path/libfstscript.dylib\" ${DST}$a_bare +- for b in ${LIBRARIES_STR} ; do +- b_bare=$\{b##*/\} +- install_name_tool -change \"@rpath/$b_bare\" \"@loader_path/$b_bare\" ${DST}$a_bare +- done +- done +- for a in ${LIBRARIES_STR} ; do +- a_bare=$\{a##*/\} +- install_name_tool -id \"@loader_path/$a_bare\" ${DST}$a_bare +- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfst.10.dylib \"@loader_path/libfst.dylib\" ${DST}$a_bare +- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfstscript.10.dylib \"@loader_path/libfstscript.dylib\" ${DST}$a_bare +- for b in ${LIBRARIES_STR} ; do +- b_bare=$\{b##*/\} +- install_name_tool -change \"@rpath/$b_bare\" \"@loader_path/$b_bare\" ${DST}$a_bare +- done +- done") +- add_custom_target(fixer ALL COMMAND /bin/sh name_fixer) +- add_dependencies(fixer kaldi) +-endif() +- + install(CODE "MESSAGE(\"Installed kaldi engine binaries.\")") diff --git a/pkgs/development/python-modules/kaldi-active-grammar/0002-exec-path.patch b/pkgs/development/python-modules/kaldi-active-grammar/0002-exec-path.patch new file mode 100644 index 000000000000..60f50d707db0 --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/0002-exec-path.patch @@ -0,0 +1,34 @@ +diff --git a/kaldi_active_grammar/utils.py b/kaldi_active_grammar/utils.py +index 0b70c7f..21e1d62 100644 +--- a/kaldi_active_grammar/utils.py ++++ b/kaldi_active_grammar/utils.py +@@ -79,7 +79,7 @@ elif sys.platform.startswith('linux'): platform = 'linux' + elif sys.platform.startswith('darwin'): platform = 'macos' + else: raise KaldiError("unknown sys.platform") + +-exec_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'exec', platform) ++exec_dir = '/' + library_extension = dict(windows='.dll', linux='.so', macos='.dylib')[platform] + subprocess_seperator = '^&' if platform == 'windows' else ';' + +@@ -89,13 +89,13 @@ class ExternalProcess(object): + + shell = ush.Shell(raise_on_error=True) + +- fstcompile = shell(os.path.join(exec_dir, 'fstcompile')) +- fstarcsort = shell(os.path.join(exec_dir, 'fstarcsort')) +- fstaddselfloops = shell(os.path.join(exec_dir, 'fstaddselfloops')) +- fstinfo = shell(os.path.join(exec_dir, 'fstinfo')) +- # compile_graph = shell(os.path.join(exec_dir, 'compile-graph')) +- compile_graph_agf = shell(os.path.join(exec_dir, 'compile-graph-agf')) +- # compile_graph_agf_debug = shell(os.path.join(exec_dir, 'compile-graph-agf-debug')) ++ fstcompile = shell('fstcompile') ++ fstarcsort = shell('fstarcsort') ++ fstaddselfloops = shell('fstaddselfloops') ++ fstinfo = shell('fstinfo') ++ # compile_graph = shell('compile-graph') ++ compile_graph_agf = shell('compile-graph-agf') ++ # compile_graph_agf_debug = shell('compile-graph-agf-debug') + + make_lexicon_fst = shell([sys.executable, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'kaldi', 'make_lexicon_fst%s.py' % ('_py2' if PY2 else ''))]) + diff --git a/pkgs/development/python-modules/kaldi-active-grammar/0003-ffi-path.patch b/pkgs/development/python-modules/kaldi-active-grammar/0003-ffi-path.patch new file mode 100644 index 000000000000..19fd8692e57a --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/0003-ffi-path.patch @@ -0,0 +1,13 @@ +diff --git a/kaldi_active_grammar/ffi.py b/kaldi_active_grammar/ffi.py +index 936ed35..1eb2f3d 100644 +--- a/kaldi_active_grammar/ffi.py ++++ b/kaldi_active_grammar/ffi.py +@@ -15,7 +15,7 @@ from cffi import FFI + from .utils import exec_dir, platform + + _ffi = FFI() +-_library_binary_path = os.path.join(exec_dir, dict(windows='kaldi-dragonfly.dll', linux='libkaldi-dragonfly.so', macos='libkaldi-dragonfly.dylib')[platform]) ++_library_binary_path = os.path.join('@kaldiFork@', dict(windows='kaldi-dragonfly.dll', linux='libkaldi-dragonfly.so', macos='libkaldi-dragonfly.dylib')[platform]) + _c_source_ignore_regex = re.compile(r'(\b(extern|DRAGONFLY_API)\b)|("C")|(//.*$)', re.MULTILINE) # Pattern for extraneous stuff to be removed + + def encode(text): diff --git a/pkgs/development/python-modules/kaldi-active-grammar/0004-fork-cmake.patch b/pkgs/development/python-modules/kaldi-active-grammar/0004-fork-cmake.patch new file mode 100644 index 000000000000..b5ae7bc607f3 --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/0004-fork-cmake.patch @@ -0,0 +1,20 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index ededc78b8..7ee1879a5 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -188,6 +188,7 @@ add_subdirectory(src/ivector) + add_subdirectory(src/online) + add_subdirectory(src/online2) + add_subdirectory(src/kws) ++add_subdirectory(src/dragonfly) + + add_subdirectory(src/itf) + +@@ -219,6 +220,7 @@ add_subdirectory(src/ivectorbin) + add_subdirectory(src/onlinebin) + add_subdirectory(src/online2bin) + add_subdirectory(src/kwsbin) ++add_subdirectory(src/dragonflybin) + + # add all cuda executables + if(CUDA_FOUND) diff --git a/pkgs/development/python-modules/kaldi-active-grammar/0006-fork-configure.patch b/pkgs/development/python-modules/kaldi-active-grammar/0006-fork-configure.patch new file mode 100644 index 000000000000..8467b83f00c7 --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/0006-fork-configure.patch @@ -0,0 +1,26 @@ +diff --git a/src/configure b/src/configure +index 1186c6d12..4ad355e64 100755 +--- a/src/configure ++++ b/src/configure +@@ -1045,9 +1045,6 @@ echo "AS = $AS" >> kaldi.mk + echo "RANLIB = $RANLIB" >> kaldi.mk + echo >> kaldi.mk + +-echo "Checking compiler $CXX ..." +-check_compiler $CXX +- + echo "# Base configuration" >> kaldi.mk + echo >> kaldi.mk + if $dynamic_kaldi ; then +@@ -1066,11 +1063,6 @@ if [ ! -f $FSTROOT/include/fst/fst.h ]; then + failure "Could not find file $FSTROOT/include/fst/fst.h: + you may not have installed OpenFst. See ../tools/INSTALL" + fi +-OPENFST_VER=${OPENFST_VER:-$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::')} +-OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") +-if [ $OPENFST_VER_NUM -lt 10600 ]; then +- failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.6.0.)" +-fi + echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk + if $static_fst ; then + OPENFSTLIBS="$FSTROOT/lib/libfst.a" diff --git a/pkgs/development/python-modules/kaldi-active-grammar/default.nix b/pkgs/development/python-modules/kaldi-active-grammar/default.nix new file mode 100644 index 000000000000..f7d4738ebcab --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/default.nix @@ -0,0 +1,61 @@ +{ lib +, buildPythonPackage +, fetchFromGitHub +, scikit-build +, cmake +, ush +, requests +, numpy +, cffi +, openfst +, substituteAll +, callPackage +}: + +let + kaldi = callPackage ./fork.nix { }; +in +buildPythonPackage rec { + pname = "kaldi-active-grammar"; + version = "2.1.0"; + + src = fetchFromGitHub { + owner = "daanzu"; + repo = pname; + rev = "v${version}"; + sha256 = "ArbwduoH7mMmIjlFfYAFvcpR39rrkVUJhYEyQzZqsbY="; + }; + + KALDI_BRANCH = "foo"; + KALDIAG_SETUP_RAW = "1"; + + patches = [ + # Makes sure scikit-build doesn't try to build the dependencies for us + ./0001-stub.patch + # Uses the dependencies' binaries from $PATH instead of a specific directory + ./0002-exec-path.patch + # Makes it dynamically link to the correct Kaldi library + (substituteAll { + src = ./0003-ffi-path.patch; + kaldiFork = "${kaldi}/lib"; + }) + ]; + + # scikit-build puts us in the wrong folder. That is bad. + preBuild = '' + cd .. + ''; + + buildInputs = [ openfst kaldi ]; + nativeBuildInputs = [ scikit-build cmake ]; + propagatedBuildInputs = [ ush requests numpy cffi ]; + + meta = with lib; { + description = "Python Kaldi speech recognition"; + homepage = "https://github.com/daanzu/kaldi-active-grammar"; + license = licenses.agpl3Plus; + maintainers = with maintainers; [ ckie ]; + # Other platforms are supported upstream. + platforms = platforms.linux; + }; +} diff --git a/pkgs/development/python-modules/kaldi-active-grammar/fork.nix b/pkgs/development/python-modules/kaldi-active-grammar/fork.nix new file mode 100644 index 000000000000..ba14aec768cb --- /dev/null +++ b/pkgs/development/python-modules/kaldi-active-grammar/fork.nix @@ -0,0 +1,94 @@ +{ lib +, stdenv +, blas +, lapack +, openfst +, icu +, pkg-config +, fetchFromGitHub +, git +, python3 +, openblas +, zlib +, gfortran +}: + +let + old-openfst = openfst.overrideAttrs (self: { + src = fetchFromGitHub { + owner = "kkm000"; + repo = "openfst"; + rev = "0bca6e76d24647427356dc242b0adbf3b5f1a8d9"; + sha256 = "1802rr14a03zl1wa5a0x1fa412kcvbgprgkadfj5s6s3agnn11rx"; + }; + buildInputs = [ zlib ]; + }); in + +assert blas.implementation == "openblas" && lapack.implementation == "openblas"; + +stdenv.mkDerivation rec { + pname = "kaldi"; + version = "kag-v2.1.0"; + + src = fetchFromGitHub { + owner = "daanzu"; + repo = "kaldi-fork-active-grammar"; + rev = version; + sha256 = "+kT2xJRwDj/ECv/v/J1FpsINWOK8XkP9ZvZ9moFRl70="; + }; + + patches = [ + ./0004-fork-cmake.patch + ./0006-fork-configure.patch + ]; + + enableParallelBuilding = true; + + buildInputs = [ + openblas + old-openfst + icu + ]; + + nativeBuildInputs = [ + pkg-config + python3 + gfortran + ]; + + buildFlags = [ + "dragonfly" + "dragonflybin" + "bin" + "fstbin" + "lmbin" + ]; + + postPatch = '' + # Replace the shebangs for the various build scripts + patchShebangs src + ''; + + configurePhase = '' + cd src + ./configure --shared --fst-root="${old-openfst}" --use-cuda=no --openblas-root="${openblas}" --mathlib=OPENBLAS + ''; + + installPhase = '' + # Fixes "patchelf: wrong ELF type" + find . -type f -name "*.o" -print0 | xargs -0 rm -f + mkdir -p $out/{bin,lib} + cp lib/* $out/lib/ + patchelf \ + --set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \ + $out/lib/* + ''; + + meta = with lib; { + description = "Speech Recognition Toolkit"; + homepage = "https://kaldi-asr.org"; + license = licenses.mit; + maintainers = with maintainers; [ ckie ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index c3b135c49e34..d11d765c1ba6 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -3817,6 +3817,8 @@ in { Kajiki = callPackage ../development/python-modules/kajiki { }; + kaldi-active-grammar = callPackage ../development/python-modules/kaldi-active-grammar { }; + kaptan = callPackage ../development/python-modules/kaptan { }; karton-asciimagic = callPackage ../development/python-modules/karton-asciimagic { };