kaldi-active-grammar: init at 2.1.0

Co-authored-by: Sandro <sandro.jaeckel@gmail.com>
This commit is contained in:
ckie 2021-07-26 01:55:42 +03:00
parent a584cc2156
commit 25afe58b96
No known key found for this signature in database
GPG Key ID: 13E79449C0525215
8 changed files with 406 additions and 0 deletions

View File

@ -0,0 +1,156 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66e6d49..78f7b42 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,23 +1,6 @@
cmake_minimum_required(VERSION 3.13.0)
project(kaldi_binaries)
-include(ExternalProject)
-include(ProcessorCount)
-
-ProcessorCount(NCPU)
-if(NOT NCPU EQUAL 0)
- set(MAKE_FLAGS -j${NCPU})
-endif()
-
-set(DST ${PROJECT_SOURCE_DIR}/kaldi_active_grammar/exec)
-if ("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin")
- set(DST ${DST}/macos/)
-elseif("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Linux")
- set(DST ${DST}/linux/)
-else()
- set(DST ${DST}/windows/)
-endif()
-
set(BINARIES
tools/openfst/bin/fstarcsort${CMAKE_EXECUTABLE_SUFFIX}
tools/openfst/bin/fstcompile${CMAKE_EXECUTABLE_SUFFIX}
@@ -29,63 +12,6 @@ set(LIBRARIES
src/lib/libkaldi-dragonfly${CMAKE_SHARED_LIBRARY_SUFFIX}
)
-# For MacOS, we handle all the "indirect" shared libraries manually.
-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin")
- list(APPEND LIBRARIES
- tools/openfst/lib/libfst${CMAKE_SHARED_LIBRARY_SUFFIX}
- tools/openfst/lib/libfstscript${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-base${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-chain${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-cudamatrix${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-decoder${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-feat${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-fstext${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-gmm${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-hmm${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-ivector${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-lat${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-lm${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-matrix${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-nnet2${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-nnet3${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-online2${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-rnnlm${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-transform${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-tree${CMAKE_SHARED_LIBRARY_SUFFIX}
- src/lib/libkaldi-util${CMAKE_SHARED_LIBRARY_SUFFIX}
- )
-endif()
-
-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Windows")
- message(FATAL_ERROR "CMake build not supported on Windows")
- # FIXME: copy files?
- # https://cmake.org/cmake/help/latest/command/foreach.html
- # https://stackoverflow.com/questions/34799916/copy-file-from-source-directory-to-binary-directory-using-cmake
-endif()
-
-find_program(MAKE_EXE NAMES make gmake nmake)
-
-if(DEFINED ENV{INTEL_MKL_DIR})
- # Default: INTEL_MKL_DIR=/opt/intel/mkl/
- message("Compiling with MKL in: $ENV{INTEL_MKL_DIR}")
- set(KALDI_CONFIG_FLAGS --shared --static-math --use-cuda=no --mathlib=MKL --mkl-root=$ENV{INTEL_MKL_DIR})
- set(MATHLIB_BUILD_COMMAND true)
-else()
- message("Compiling with OpenBLAS")
- set(KALDI_CONFIG_FLAGS --shared --static-math --use-cuda=no --mathlib=OPENBLAS)
- set(MATHLIB_BUILD_COMMAND cd tools
- && git clone -b v0.3.13 --single-branch https://github.com/xianyi/OpenBLAS
- && ${MAKE_EXE} ${MAKE_FLAGS} -C OpenBLAS DYNAMIC_ARCH=1 TARGET=GENERIC USE_LOCKING=1 USE_THREAD=0 all
- && ${MAKE_EXE} ${MAKE_FLAGS} -C OpenBLAS PREFIX=install install
- && cd ..)
-endif()
-
-if(DEFINED ENV{KALDI_BRANCH})
- set(KALDI_BRANCH $ENV{KALDI_BRANCH})
-else()
- message(FATAL_ERROR "KALDI_BRANCH not set! Use 'origin/master'?")
- # set(KALDI_BRANCH "origin/master")
-endif()
message("MAKE_EXE = ${MAKE_EXE}")
message("PYTHON_EXECUTABLE = ${PYTHON_EXECUTABLE}")
@@ -99,63 +25,4 @@ message("CMAKE_CURRENT_BINARY_DIR = ${CMAKE_CURRENT_BINARY_DIR}")
# CXXFLAGS are set and exported in kaldi-configure-wrapper.sh
-if(NOT "${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Windows")
- set(STRIP_LIBS_COMMAND find src/lib tools/openfst/lib -name *${CMAKE_SHARED_LIBRARY_SUFFIX} | xargs strip)
- set(STRIP_DST_COMMAND find ${DST} | xargs strip)
- if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin")
- list(APPEND STRIP_LIBS_COMMAND -x)
- list(APPEND STRIP_DST_COMMAND -x)
- endif()
- ExternalProject_Add(kaldi
- GIT_CONFIG advice.detachedHead=false
- GIT_REPOSITORY https://github.com/daanzu/kaldi-fork-active-grammar.git
- GIT_TAG ${KALDI_BRANCH}
- GIT_SHALLOW TRUE
- CONFIGURE_COMMAND sed -i.bak -e "s/status=0/exit 0/g" tools/extras/check_dependencies.sh && cp ${PROJECT_SOURCE_DIR}/building/kaldi-configure-wrapper.sh src/
- BUILD_IN_SOURCE TRUE
- BUILD_COMMAND ${MATHLIB_BUILD_COMMAND} && cd tools && ${MAKE_EXE} ${MAKE_FLAGS} && cd openfst && autoreconf && cd ../../src && bash ./kaldi-configure-wrapper.sh ./configure ${KALDI_CONFIG_FLAGS} && ${MAKE_EXE} ${MAKE_FLAGS} depend && ${MAKE_EXE} ${MAKE_FLAGS} dragonfly dragonflybin bin fstbin lmbin
- LIST_SEPARATOR " "
- INSTALL_COMMAND ${STRIP_LIBS_COMMAND} && mkdir -p ${DST} && cp ${BINARIES} ${LIBRARIES} ${DST}
- )
-endif()
-
-# Fix dynamic libraries loading paths on macOS. The libraries and
-# executables are built with RPATH settings embedded in them, pointing
-# to the locations in temporary directories used to build the
-# binaries. After package installation is done, these directories are
-# deleted and the dynamic libraries cannot be loaded. The following
-# commands generate a shell script that fixes the paths to the dynamic
-# libraries in the built executables and the libraries themselves.
-# Also the commands add a custom target to invoke the generated script
-# after the external project (kaldi) has been built. An alternative
-# would be to change the kaldi engine build system to accept a path to
-# where the binaries would be placed and point RPATH to that location.
-if("${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin")
- string (REPLACE ";" " " BINARIES_STR "${BINARIES}")
- string (REPLACE ";" " " LIBRARIES_STR "${LIBRARIES}")
- file(GENERATE OUTPUT name_fixer
- CONTENT
- "for a in ${BINARIES_STR} ; do
- a_bare=$\{a##*/\}
- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfst.10.dylib \"@loader_path/libfst.dylib\" ${DST}$a_bare
- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfstscript.10.dylib \"@loader_path/libfstscript.dylib\" ${DST}$a_bare
- for b in ${LIBRARIES_STR} ; do
- b_bare=$\{b##*/\}
- install_name_tool -change \"@rpath/$b_bare\" \"@loader_path/$b_bare\" ${DST}$a_bare
- done
- done
- for a in ${LIBRARIES_STR} ; do
- a_bare=$\{a##*/\}
- install_name_tool -id \"@loader_path/$a_bare\" ${DST}$a_bare
- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfst.10.dylib \"@loader_path/libfst.dylib\" ${DST}$a_bare
- install_name_tool -change ${PROJECT_BINARY_DIR}/kaldi-prefix/src/kaldi/tools/openfst-1.6.7/lib/libfstscript.10.dylib \"@loader_path/libfstscript.dylib\" ${DST}$a_bare
- for b in ${LIBRARIES_STR} ; do
- b_bare=$\{b##*/\}
- install_name_tool -change \"@rpath/$b_bare\" \"@loader_path/$b_bare\" ${DST}$a_bare
- done
- done")
- add_custom_target(fixer ALL COMMAND /bin/sh name_fixer)
- add_dependencies(fixer kaldi)
-endif()
-
install(CODE "MESSAGE(\"Installed kaldi engine binaries.\")")

View File

@ -0,0 +1,34 @@
diff --git a/kaldi_active_grammar/utils.py b/kaldi_active_grammar/utils.py
index 0b70c7f..21e1d62 100644
--- a/kaldi_active_grammar/utils.py
+++ b/kaldi_active_grammar/utils.py
@@ -79,7 +79,7 @@ elif sys.platform.startswith('linux'): platform = 'linux'
elif sys.platform.startswith('darwin'): platform = 'macos'
else: raise KaldiError("unknown sys.platform")
-exec_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'exec', platform)
+exec_dir = '/'
library_extension = dict(windows='.dll', linux='.so', macos='.dylib')[platform]
subprocess_seperator = '^&' if platform == 'windows' else ';'
@@ -89,13 +89,13 @@ class ExternalProcess(object):
shell = ush.Shell(raise_on_error=True)
- fstcompile = shell(os.path.join(exec_dir, 'fstcompile'))
- fstarcsort = shell(os.path.join(exec_dir, 'fstarcsort'))
- fstaddselfloops = shell(os.path.join(exec_dir, 'fstaddselfloops'))
- fstinfo = shell(os.path.join(exec_dir, 'fstinfo'))
- # compile_graph = shell(os.path.join(exec_dir, 'compile-graph'))
- compile_graph_agf = shell(os.path.join(exec_dir, 'compile-graph-agf'))
- # compile_graph_agf_debug = shell(os.path.join(exec_dir, 'compile-graph-agf-debug'))
+ fstcompile = shell('fstcompile')
+ fstarcsort = shell('fstarcsort')
+ fstaddselfloops = shell('fstaddselfloops')
+ fstinfo = shell('fstinfo')
+ # compile_graph = shell('compile-graph')
+ compile_graph_agf = shell('compile-graph-agf')
+ # compile_graph_agf_debug = shell('compile-graph-agf-debug')
make_lexicon_fst = shell([sys.executable, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'kaldi', 'make_lexicon_fst%s.py' % ('_py2' if PY2 else ''))])

View File

@ -0,0 +1,13 @@
diff --git a/kaldi_active_grammar/ffi.py b/kaldi_active_grammar/ffi.py
index 936ed35..1eb2f3d 100644
--- a/kaldi_active_grammar/ffi.py
+++ b/kaldi_active_grammar/ffi.py
@@ -15,7 +15,7 @@ from cffi import FFI
from .utils import exec_dir, platform
_ffi = FFI()
-_library_binary_path = os.path.join(exec_dir, dict(windows='kaldi-dragonfly.dll', linux='libkaldi-dragonfly.so', macos='libkaldi-dragonfly.dylib')[platform])
+_library_binary_path = os.path.join('@kaldiFork@', dict(windows='kaldi-dragonfly.dll', linux='libkaldi-dragonfly.so', macos='libkaldi-dragonfly.dylib')[platform])
_c_source_ignore_regex = re.compile(r'(\b(extern|DRAGONFLY_API)\b)|("C")|(//.*$)', re.MULTILINE) # Pattern for extraneous stuff to be removed
def encode(text):

View File

@ -0,0 +1,20 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ededc78b8..7ee1879a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -188,6 +188,7 @@ add_subdirectory(src/ivector)
add_subdirectory(src/online)
add_subdirectory(src/online2)
add_subdirectory(src/kws)
+add_subdirectory(src/dragonfly)
add_subdirectory(src/itf)
@@ -219,6 +220,7 @@ add_subdirectory(src/ivectorbin)
add_subdirectory(src/onlinebin)
add_subdirectory(src/online2bin)
add_subdirectory(src/kwsbin)
+add_subdirectory(src/dragonflybin)
# add all cuda executables
if(CUDA_FOUND)

View File

@ -0,0 +1,26 @@
diff --git a/src/configure b/src/configure
index 1186c6d12..4ad355e64 100755
--- a/src/configure
+++ b/src/configure
@@ -1045,9 +1045,6 @@ echo "AS = $AS" >> kaldi.mk
echo "RANLIB = $RANLIB" >> kaldi.mk
echo >> kaldi.mk
-echo "Checking compiler $CXX ..."
-check_compiler $CXX
-
echo "# Base configuration" >> kaldi.mk
echo >> kaldi.mk
if $dynamic_kaldi ; then
@@ -1066,11 +1063,6 @@ if [ ! -f $FSTROOT/include/fst/fst.h ]; then
failure "Could not find file $FSTROOT/include/fst/fst.h:
you may not have installed OpenFst. See ../tools/INSTALL"
fi
-OPENFST_VER=${OPENFST_VER:-$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::')}
-OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d")
-if [ $OPENFST_VER_NUM -lt 10600 ]; then
- failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.6.0.)"
-fi
echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk
if $static_fst ; then
OPENFSTLIBS="$FSTROOT/lib/libfst.a"

View File

@ -0,0 +1,61 @@
{ lib
, buildPythonPackage
, fetchFromGitHub
, scikit-build
, cmake
, ush
, requests
, numpy
, cffi
, openfst
, substituteAll
, callPackage
}:
let
kaldi = callPackage ./fork.nix { };
in
buildPythonPackage rec {
pname = "kaldi-active-grammar";
version = "2.1.0";
src = fetchFromGitHub {
owner = "daanzu";
repo = pname;
rev = "v${version}";
sha256 = "ArbwduoH7mMmIjlFfYAFvcpR39rrkVUJhYEyQzZqsbY=";
};
KALDI_BRANCH = "foo";
KALDIAG_SETUP_RAW = "1";
patches = [
# Makes sure scikit-build doesn't try to build the dependencies for us
./0001-stub.patch
# Uses the dependencies' binaries from $PATH instead of a specific directory
./0002-exec-path.patch
# Makes it dynamically link to the correct Kaldi library
(substituteAll {
src = ./0003-ffi-path.patch;
kaldiFork = "${kaldi}/lib";
})
];
# scikit-build puts us in the wrong folder. That is bad.
preBuild = ''
cd ..
'';
buildInputs = [ openfst kaldi ];
nativeBuildInputs = [ scikit-build cmake ];
propagatedBuildInputs = [ ush requests numpy cffi ];
meta = with lib; {
description = "Python Kaldi speech recognition";
homepage = "https://github.com/daanzu/kaldi-active-grammar";
license = licenses.agpl3Plus;
maintainers = with maintainers; [ ckie ];
# Other platforms are supported upstream.
platforms = platforms.linux;
};
}

View File

@ -0,0 +1,94 @@
{ lib
, stdenv
, blas
, lapack
, openfst
, icu
, pkg-config
, fetchFromGitHub
, git
, python3
, openblas
, zlib
, gfortran
}:
let
old-openfst = openfst.overrideAttrs (self: {
src = fetchFromGitHub {
owner = "kkm000";
repo = "openfst";
rev = "0bca6e76d24647427356dc242b0adbf3b5f1a8d9";
sha256 = "1802rr14a03zl1wa5a0x1fa412kcvbgprgkadfj5s6s3agnn11rx";
};
buildInputs = [ zlib ];
}); in
assert blas.implementation == "openblas" && lapack.implementation == "openblas";
stdenv.mkDerivation rec {
pname = "kaldi";
version = "kag-v2.1.0";
src = fetchFromGitHub {
owner = "daanzu";
repo = "kaldi-fork-active-grammar";
rev = version;
sha256 = "+kT2xJRwDj/ECv/v/J1FpsINWOK8XkP9ZvZ9moFRl70=";
};
patches = [
./0004-fork-cmake.patch
./0006-fork-configure.patch
];
enableParallelBuilding = true;
buildInputs = [
openblas
old-openfst
icu
];
nativeBuildInputs = [
pkg-config
python3
gfortran
];
buildFlags = [
"dragonfly"
"dragonflybin"
"bin"
"fstbin"
"lmbin"
];
postPatch = ''
# Replace the shebangs for the various build scripts
patchShebangs src
'';
configurePhase = ''
cd src
./configure --shared --fst-root="${old-openfst}" --use-cuda=no --openblas-root="${openblas}" --mathlib=OPENBLAS
'';
installPhase = ''
# Fixes "patchelf: wrong ELF type"
find . -type f -name "*.o" -print0 | xargs -0 rm -f
mkdir -p $out/{bin,lib}
cp lib/* $out/lib/
patchelf \
--set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \
$out/lib/*
'';
meta = with lib; {
description = "Speech Recognition Toolkit";
homepage = "https://kaldi-asr.org";
license = licenses.mit;
maintainers = with maintainers; [ ckie ];
platforms = platforms.linux;
};
}

View File

@ -3817,6 +3817,8 @@ in {
Kajiki = callPackage ../development/python-modules/kajiki { };
kaldi-active-grammar = callPackage ../development/python-modules/kaldi-active-grammar { };
kaptan = callPackage ../development/python-modules/kaptan { };
karton-asciimagic = callPackage ../development/python-modules/karton-asciimagic { };