nixpkgs/pkgs/development/python-modules/somajo/default.nix
Daniël de Kok d8a7d04a29 python3Packages.somajo: 2.0.4 -> 2.0.5
Changelog:

- Add heuristics for ambiguous quotation marks (issue #11).
- Avoid false positives for emoticons that contain a space (issue
  #12).
- Correctly tokenize obfuscated email addresses that contain spaces.
- Do not split tl;dr and its German variant zl;ng.

https://github.com/tsproisl/SoMaJo/releases/tag/v2.0.5
2020-04-10 00:08:13 -07:00

24 lines
619 B
Nix

{ pkgs, stdenv, fetchFromGitHub, buildPythonPackage, isPy3k, regex }:
buildPythonPackage rec {
pname = "SoMaJo";
version = "2.0.5";
disabled = !isPy3k;
src = fetchFromGitHub {
owner = "tsproisl";
repo = pname;
rev = "v${version}";
sha256 = "01zvmqilnndh2b257z7bhcc7av5vhjm1g8gmdiiw15gcd2xfmqjs";
};
propagatedBuildInputs = [ regex ];
meta = with stdenv.lib; {
description = "Tokenizer and sentence splitter for German and English web texts";
homepage = "https://github.com/tsproisl/SoMaJo";
license = licenses.gpl3;
maintainers = with maintainers; [ danieldk ];
};
}