sentencepiece: split into multiple outputs, optional gperftools

I am using sentencepiece in a downstream application where I want to
minimize the resulting closures. This commit makes changes to make
sentencepiece a leaner dependency:

- Split the outputs, so that the binaries/headers do not end up in the
  transitive closure in a library dependency.

- Add the `withGPerfTools` option, which is enabled by default, to
  make it possible to disable the gperftools dependency. According to
  the sentencepiece README, this dependency gives a 10-40% performance
  improvement. But in many cases this is overshadowed by the neural
  networks that use piece identifiers as input anyway.
This commit is contained in:
Daniël de Kok 2020-02-25 16:06:24 +01:00 committed by Jon
parent 24219f69a3
commit 27d0c5a07e
2 changed files with 11 additions and 5 deletions

View File

@ -1,9 +1,10 @@
{ config
{ lib
, fetchFromGitHub
, stdenv
, lib
, cmake
, gperftools
, withGPerfTools ? true
}:
stdenv.mkDerivation rec {
@ -17,7 +18,9 @@ stdenv.mkDerivation rec {
sha256 = "1ncvyw9ar0z7nd47cysxg5xrjm01y1shdlhp8l2pdpx059p3yx3w";
};
nativeBuildInputs = [ cmake gperftools ];
nativeBuildInputs = [ cmake ] ++ lib.optional withGPerfTools gperftools;
outputs = [ "bin" "dev" "out" ];
meta = with stdenv.lib; {
homepage = "https://github.com/google/sentencepiece";

View File

@ -6,10 +6,13 @@
buildPythonPackage rec {
pname = "sentencepiece";
inherit (sentencepiece) version src meta;
inherit (sentencepiece) version src;
nativeBuildInputs = [ pkgconfig ];
buildInputs = [ sentencepiece ];
buildInputs = [ sentencepiece.dev ];
sourceRoot = "source/python";
# sentencepiece installs 'bin' output.
meta = builtins.removeAttrs sentencepiece.meta [ "outputsToInstall" ];
}