Partition all-cabal-hashes into case-insensitive-safe components.
OS X by default has a case-insensitive filesystem, and fetching all-cabal-hashes there fails due to a hash mismatch caused by package pairs like compactable and Compactable. This partitions the package set such that each partition contains no equivalent-up-to-case pairs.
This commit is contained in:
parent
7a946f0ae6
commit
843e0992ca
@ -1,11 +1,33 @@
|
||||
{ fetchFromGitHub }:
|
||||
{ fetchFromGitHub
|
||||
, stdenv
|
||||
, pkgconfig
|
||||
, libarchive
|
||||
, glib
|
||||
, # Override this to use a different revision
|
||||
src-spec ?
|
||||
{ owner = "commercialhaskell";
|
||||
repo = "all-cabal-hashes";
|
||||
rev = "901c2522e6797270f5ded4495b1a529e6c16ef45";
|
||||
sha256 = "05jmwsgrk77nz9vvgfbpsps0l320qgjpkr2c9zhkn9sc3d275lfb";
|
||||
}
|
||||
, lib
|
||||
}:
|
||||
|
||||
# Use builtins.fetchTarball "https://github.com/commercialhaskell/all-cabal-hashes/archive/hackage.tar.gz"
|
||||
# instead if you want the latest Hackage automatically at the price of frequent re-downloads.
|
||||
|
||||
fetchFromGitHub {
|
||||
owner = "commercialhaskell";
|
||||
repo = "all-cabal-hashes";
|
||||
rev = "901c2522e6797270f5ded4495b1a529e6c16ef45";
|
||||
sha256 = "0wng314y3yn6bbwa5ar254l7p8y99gsvm8ll4z7f3wg77v5fzish";
|
||||
}
|
||||
let partition-all-cabal-hashes = stdenv.mkDerivation
|
||||
{ name = "partition-all-cabal-hashes";
|
||||
src = ./partition-all-cabal-hashes.c;
|
||||
unpackPhase = "true";
|
||||
buildInputs = [ pkgconfig libarchive glib ];
|
||||
buildPhase =
|
||||
"cc -O3 $(pkg-config --cflags --libs libarchive glib-2.0) $src -o partition-all-cabal-hashes";
|
||||
installPhase =
|
||||
''
|
||||
mkdir -p $out/bin
|
||||
install -m755 partition-all-cabal-hashes $out/bin
|
||||
'';
|
||||
};
|
||||
in fetchFromGitHub (src-spec //
|
||||
{ postFetch = "${partition-all-cabal-hashes}/bin/partition-all-cabal-hashes $downloadedFile $out";
|
||||
})
|
||||
|
190
pkgs/data/misc/hackage/partition-all-cabal-hashes.c
Normal file
190
pkgs/data/misc/hackage/partition-all-cabal-hashes.c
Normal file
@ -0,0 +1,190 @@
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
||||
static char * case_normalize(char * str) {
|
||||
for (char * iter = str; *iter; ++iter) {
|
||||
*iter = tolower(*iter);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static gint compare_str(const void * a, const void * b, void * _) {
|
||||
return strcmp(a, b);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: %s TARBALL OUTPUT\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t output_len = strlen(argv[2]);
|
||||
|
||||
/* Switch to standard locale to ensure consistency in case-folding.
|
||||
*/
|
||||
setlocale(LC_CTYPE, "C");
|
||||
|
||||
/* Map from case-normalized package name to a sorted sequence of
|
||||
* package names in the equivalence class defined by
|
||||
* case-normalization.
|
||||
*/
|
||||
GHashTable * equivalence_classes =
|
||||
g_hash_table_new(g_str_hash, g_str_equal);
|
||||
|
||||
/* Open up the tarball.
|
||||
*/
|
||||
struct archive * ar = archive_read_new();
|
||||
if (!ar) {
|
||||
perror("Allocating archive structure");
|
||||
return 1;
|
||||
}
|
||||
archive_read_support_filter_gzip(ar);
|
||||
archive_read_support_format_tar(ar);
|
||||
if (archive_read_open_filename( ar
|
||||
, argv[1]
|
||||
, 10240
|
||||
) == ARCHIVE_FATAL) {
|
||||
fprintf( stderr
|
||||
, "Error opening %s: %s\n"
|
||||
, argv[0]
|
||||
, archive_error_string(ar)
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Extract the length of the output directory that prefixes all
|
||||
* tarball entries from the first entry in the tarball.
|
||||
*/
|
||||
struct archive_entry * ent;
|
||||
int err = archive_read_next_header(ar, &ent);
|
||||
if (err != ARCHIVE_OK) {
|
||||
if (err == ARCHIVE_EOF) {
|
||||
fprintf( stderr
|
||||
, "No entries in %s, surely this is an error!\n"
|
||||
, argv[1]
|
||||
);
|
||||
} else {
|
||||
fprintf( stderr
|
||||
, "Error reading entry from %s: %s\n"
|
||||
, argv[1]
|
||||
, archive_error_string(ar)
|
||||
);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
const char * path = archive_entry_pathname(ent);
|
||||
/* Number of characters from the start of the path name until after
|
||||
* the slash after the leading directory.
|
||||
*/
|
||||
size_t prefix_len = strchr(path, '/') - path + 1;
|
||||
|
||||
/* Extract each entry to the right partition.
|
||||
*/
|
||||
do {
|
||||
path = archive_entry_pathname(ent) + prefix_len;
|
||||
const char * pkg_end = strchr(path, '/');
|
||||
if (!pkg_end)
|
||||
/* If there is no second slash, then this is either just the entry
|
||||
* corresponding to the root or some non-package file (e.g.
|
||||
* travis.yml). In either case, we don't care.
|
||||
*/
|
||||
continue;
|
||||
|
||||
/* Find our package in the equivalence class map.
|
||||
*/
|
||||
char * pkg_name = g_strndup(path, pkg_end - path);
|
||||
char * pkg_normalized =
|
||||
case_normalize(g_strndup(path, pkg_end - path));
|
||||
GSequence * pkg_class =
|
||||
g_hash_table_lookup(equivalence_classes, pkg_normalized);
|
||||
gint partition_num;
|
||||
if (!pkg_class) {
|
||||
/* We haven't seen any packages with this normalized name yet,
|
||||
* so we need to initialize the sequence and add it to the map.
|
||||
*/
|
||||
pkg_class = g_sequence_new(NULL);
|
||||
g_sequence_append(pkg_class, pkg_name);
|
||||
g_hash_table_insert( equivalence_classes
|
||||
, pkg_normalized
|
||||
, pkg_class
|
||||
);
|
||||
partition_num = 1;
|
||||
} else {
|
||||
g_free(pkg_normalized);
|
||||
/* Find the package name in the equivalence class */
|
||||
GSequenceIter * pkg_iter =
|
||||
g_sequence_search( pkg_class
|
||||
, pkg_name
|
||||
, compare_str
|
||||
, NULL
|
||||
);
|
||||
if (!g_sequence_iter_is_end(pkg_iter)) {
|
||||
/* If there are any packages after this one in the list, bail
|
||||
* out. In principle we could solve this by moving them up to
|
||||
* the next partition, but so far I've never seen any github
|
||||
* tarballs out of order so let's save ourselves the work
|
||||
* until we know we need it.
|
||||
*/
|
||||
fprintf( stderr
|
||||
, "Out of order github tarball: %s is after %s\n"
|
||||
, pkg_name
|
||||
, (char *) g_sequence_get(pkg_iter)
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
pkg_iter = g_sequence_iter_prev(pkg_iter);
|
||||
if (strcmp( g_sequence_get(pkg_iter)
|
||||
, pkg_name
|
||||
) != 0) {
|
||||
/* This package doesn't have the same name as the one right
|
||||
* before where it should be in the sequence, which means it's
|
||||
* new and needs to be added to the sequence.
|
||||
*
|
||||
* !!! We need to change this to use g_sequence_insert_before
|
||||
* if we ever get an out-of-order github tarball, see comment
|
||||
* after the check for !g_sequence_iter_is_end(pkg_iter).
|
||||
*/
|
||||
pkg_iter = g_sequence_append(pkg_class, pkg_name);
|
||||
} else {
|
||||
g_free(pkg_name);
|
||||
}
|
||||
/* Get the partition number, starting with 1.
|
||||
*/
|
||||
partition_num = g_sequence_iter_get_position(pkg_iter) + 1;
|
||||
}
|
||||
|
||||
/* Set the destination path.
|
||||
* The 3 below is for the length of /#/, the partition number part
|
||||
* of the path. If we have more than 9 partitions, we deserve to
|
||||
* segfault. The 1 at the end is for the trailing null.
|
||||
*/
|
||||
char * dest_path = g_malloc(output_len + 3 + strlen(path) + 1);
|
||||
sprintf(dest_path, "%s/%d/%s", argv[2], partition_num, path);
|
||||
archive_entry_set_pathname(ent, dest_path);
|
||||
|
||||
if (archive_read_extract(ar, ent, 0) != ARCHIVE_OK) {
|
||||
fprintf( stderr
|
||||
, "Error extracting entry %s from %s: %s\n"
|
||||
, dest_path
|
||||
, argv[1]
|
||||
, archive_error_string(ar)
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
} while ((err = archive_read_next_header(ar, &ent)) == ARCHIVE_OK);
|
||||
if (err != ARCHIVE_EOF) {
|
||||
fprintf( stderr
|
||||
, "Error reading entry from %s: %s\n"
|
||||
, argv[1]
|
||||
, archive_error_string(ar)
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -118,10 +118,23 @@ let
|
||||
'';
|
||||
};
|
||||
|
||||
hackage2nix = name: version: self.haskellSrc2nix {
|
||||
all-cabal-hashes-component = name: import (pkgs.runCommand "all-cabal-hashes-component-${name}.nix" {}
|
||||
''
|
||||
set +o pipefail
|
||||
for component in ${all-cabal-hashes}/*; do
|
||||
if ls $component | grep -q ${name}; then
|
||||
echo "builtins.storePath $component" > $out
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
echo "${name} not found in any all-cabal-hashes component, are you sure it's in hackage?" >&2
|
||||
exit 1
|
||||
'');
|
||||
|
||||
hackage2nix = name: version: let component = all-cabal-hashes-component name; in self.haskellSrc2nix {
|
||||
name = "${name}-${version}";
|
||||
sha256 = ''$(sed -e 's/.*"SHA256":"//' -e 's/".*$//' "${all-cabal-hashes}/${name}/${version}/${name}.json")'';
|
||||
src = "${all-cabal-hashes}/${name}/${version}/${name}.cabal";
|
||||
sha256 = ''$(sed -e 's/.*"SHA256":"//' -e 's/".*$//' "${component}/${name}/${version}/${name}.json")'';
|
||||
src = "${component}/${name}/${version}/${name}.cabal";
|
||||
};
|
||||
|
||||
in package-set { inherit pkgs stdenv callPackage; } self // {
|
||||
|
Loading…
Reference in New Issue
Block a user