Merge pull request #112885 from alyssais/wiktionary

dictdDBs.wiktionary: 20161001 -> 20210201; refactor
This commit is contained in:
Michael Raskin 2021-02-12 19:12:45 +00:00 committed by GitHub
commit 5a1a7a359f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 88 additions and 32 deletions

View File

@ -306,6 +306,11 @@ lib.mapAttrs (n: v: v // { shortName = n; }) {
fullName = "GNU Free Documentation License v1.1 only";
};
fdl11Plus = spdx {
spdxId = "GFDL-1.1-or-later";
fullName = "GNU Free Documentation License v1.1 or later";
};
fdl12Only = spdx {
spdxId = "GFDL-1.2-only";
fullName = "GNU Free Documentation License v1.2 only";

View File

@ -91,5 +91,5 @@ in rec {
locale = "en_UK";
};
wordnet = callPackage ./dictd-wordnet.nix {};
wiktionary = callPackage ./dictd-wiktionary.nix {};
wiktionary = callPackage ./wiktionary {};
}

View File

@ -1,31 +0,0 @@
{lib, stdenv, fetchurl, python, dict, glibcLocales, writeScript}:
stdenv.mkDerivation rec {
version = "20161001";
pname = "dict-db-wiktionary";
data = fetchurl {
url = "http://dumps.wikimedia.org/enwiktionary/${version}/enwiktionary-${version}-pages-articles.xml.bz2";
sha256 = "0g3k7kxp2nzg0v56i4cz253af3aqvhn1lwkys2fnam51cn3yqm7m";
};
convert = ./wiktionary2dict.py;
buildInputs = [python dict glibcLocales];
builder = writeScript "wiktionary-builder.sh" ''
source $stdenv/setup
mkdir -p $out/share/dictd/
cd $out/share/dictd
python -O ${convert} ${data}
dictzip wiktionary-en.dict
echo en_US.UTF-8 > locale
'';
meta = {
description = "DICT version of English Wiktionary";
homepage = "http://en.wiktionary.org/";
maintainers = [ ];
platforms = lib.platforms.all;
};
}

View File

@ -0,0 +1,8 @@
source $stdenv/setup
mkdir -p $out/share/dictd/
cd $out/share/dictd
python -O "$convert" "$src"
dictzip wiktionary-en.dict
echo en_US.UTF-8 > locale

View File

@ -0,0 +1,25 @@
{ lib, stdenv, fetchurl, python, dict, glibcLocales }:
stdenv.mkDerivation rec {
version = "20210201";
pname = "dict-db-wiktionary";
src = fetchurl {
url = "https://dumps.wikimedia.org/enwiktionary/${version}/enwiktionary-${version}-pages-articles.xml.bz2";
sha256 = "0dc34cbadsg0f6lhfcyx0np7zjnlg6837piqhlvnn0b45xnzn0cs";
};
convert = ./wiktionary2dict.py;
buildInputs = [ python dict glibcLocales ];
builder = ./builder.sh;
passthru.updateScript = ./update.sh;
meta = with lib; {
description = "DICT version of English Wiktionary";
homepage = "http://en.wiktionary.org/";
maintainers = with maintainers; [ qyliss ];
platforms = platforms.all;
license = with licenses; [ cc-by-sa-30 fdl11Plus ];
};
}

View File

@ -0,0 +1,42 @@
import subprocess
from html.parser import HTMLParser
from os.path import abspath, dirname
from urllib.request import urlopen
class WiktionaryLatestVersionParser(HTMLParser):
def __init__(self, current_version, *args, **kwargs):
self.latest_version = current_version
super().__init__(*args, **kwargs)
def handle_starttag(self, tag, attrs):
if tag != 'a':
return
href = dict(attrs)['href'][0:-1]
if href == 'latest':
return
self.latest_version = max(self.latest_version, href)
def nix_prefetch_url(url, algo='sha256'):
"""Prefetches the content of the given URL."""
print(f'nix-prefetch-url {url}')
out = subprocess.check_output(['nix-prefetch-url', '--type', algo, url])
return out.decode('utf-8').rstrip()
current_version = subprocess.check_output([
'nix', 'eval', '--raw',
'-f', dirname(abspath(__file__)) + '/../../../..',
'dictdDBs.wiktionary.version',
]).decode('utf-8')
parser = WiktionaryLatestVersionParser(current_version)
with urlopen('https://dumps.wikimedia.org/enwiktionary/') as resp:
parser.feed(resp.read().decode('utf-8'))
print(parser.latest_version)

View File

@ -0,0 +1,7 @@
#! /usr/bin/env nix-shell
#! nix-shell -i bash -p common-updater-scripts python3
set -ueo pipefail
version="$(python "$(dirname "${BASH_SOURCE[0]}")"/latest_version.py)"
update-source-version dictdDBs.wiktionary "$version"