Partition all-cabal-hashes into case-insensitive-safe components.

OS X by default has a case-insensitive filesystem, and fetching
all-cabal-hashes there fails due to a hash mismatch caused by package
pairs like compactable and Compactable. This partitions the package set
such that each partition contains no equivalent-up-to-case pairs.
This commit is contained in:
Shea Levy 2017-10-10 12:24:30 -04:00
parent 7a946f0ae6
commit 843e0992ca
No known key found for this signature in database
GPG Key ID: 5C0BD6957D86FE27
3 changed files with 236 additions and 11 deletions

View File

@ -1,11 +1,33 @@
{ fetchFromGitHub }:
{ fetchFromGitHub
, stdenv
, pkgconfig
, libarchive
, glib
, # Override this to use a different revision
src-spec ?
{ owner = "commercialhaskell";
repo = "all-cabal-hashes";
rev = "901c2522e6797270f5ded4495b1a529e6c16ef45";
sha256 = "05jmwsgrk77nz9vvgfbpsps0l320qgjpkr2c9zhkn9sc3d275lfb";
}
, lib
}:
# Use builtins.fetchTarball "https://github.com/commercialhaskell/all-cabal-hashes/archive/hackage.tar.gz"
# instead if you want the latest Hackage automatically at the price of frequent re-downloads.
fetchFromGitHub {
owner = "commercialhaskell";
repo = "all-cabal-hashes";
rev = "901c2522e6797270f5ded4495b1a529e6c16ef45";
sha256 = "0wng314y3yn6bbwa5ar254l7p8y99gsvm8ll4z7f3wg77v5fzish";
}
let partition-all-cabal-hashes = stdenv.mkDerivation
{ name = "partition-all-cabal-hashes";
src = ./partition-all-cabal-hashes.c;
unpackPhase = "true";
buildInputs = [ pkgconfig libarchive glib ];
buildPhase =
"cc -O3 $(pkg-config --cflags --libs libarchive glib-2.0) $src -o partition-all-cabal-hashes";
installPhase =
''
mkdir -p $out/bin
install -m755 partition-all-cabal-hashes $out/bin
'';
};
in fetchFromGitHub (src-spec //
{ postFetch = "${partition-all-cabal-hashes}/bin/partition-all-cabal-hashes $downloadedFile $out";
})

View File

@ -0,0 +1,190 @@
#include <stdio.h>
#include <ctype.h>
#include <glib.h>
#include <string.h>
#include <locale.h>
#include <archive.h>
#include <archive_entry.h>
static char * case_normalize(char * str) {
for (char * iter = str; *iter; ++iter) {
*iter = tolower(*iter);
}
return str;
}
static gint compare_str(const void * a, const void * b, void * _) {
return strcmp(a, b);
}
int main(int argc, char ** argv) {
if (argc != 3) {
fprintf(stderr, "Usage: %s TARBALL OUTPUT\n", argv[0]);
return 1;
}
size_t output_len = strlen(argv[2]);
/* Switch to standard locale to ensure consistency in case-folding.
*/
setlocale(LC_CTYPE, "C");
/* Map from case-normalized package name to a sorted sequence of
* package names in the equivalence class defined by
* case-normalization.
*/
GHashTable * equivalence_classes =
g_hash_table_new(g_str_hash, g_str_equal);
/* Open up the tarball.
*/
struct archive * ar = archive_read_new();
if (!ar) {
perror("Allocating archive structure");
return 1;
}
archive_read_support_filter_gzip(ar);
archive_read_support_format_tar(ar);
if (archive_read_open_filename( ar
, argv[1]
, 10240
) == ARCHIVE_FATAL) {
fprintf( stderr
, "Error opening %s: %s\n"
, argv[0]
, archive_error_string(ar)
);
return 1;
}
/* Extract the length of the output directory that prefixes all
* tarball entries from the first entry in the tarball.
*/
struct archive_entry * ent;
int err = archive_read_next_header(ar, &ent);
if (err != ARCHIVE_OK) {
if (err == ARCHIVE_EOF) {
fprintf( stderr
, "No entries in %s, surely this is an error!\n"
, argv[1]
);
} else {
fprintf( stderr
, "Error reading entry from %s: %s\n"
, argv[1]
, archive_error_string(ar)
);
}
return 1;
}
const char * path = archive_entry_pathname(ent);
/* Number of characters from the start of the path name until after
* the slash after the leading directory.
*/
size_t prefix_len = strchr(path, '/') - path + 1;
/* Extract each entry to the right partition.
*/
do {
path = archive_entry_pathname(ent) + prefix_len;
const char * pkg_end = strchr(path, '/');
if (!pkg_end)
/* If there is no second slash, then this is either just the entry
* corresponding to the root or some non-package file (e.g.
* travis.yml). In either case, we don't care.
*/
continue;
/* Find our package in the equivalence class map.
*/
char * pkg_name = g_strndup(path, pkg_end - path);
char * pkg_normalized =
case_normalize(g_strndup(path, pkg_end - path));
GSequence * pkg_class =
g_hash_table_lookup(equivalence_classes, pkg_normalized);
gint partition_num;
if (!pkg_class) {
/* We haven't seen any packages with this normalized name yet,
* so we need to initialize the sequence and add it to the map.
*/
pkg_class = g_sequence_new(NULL);
g_sequence_append(pkg_class, pkg_name);
g_hash_table_insert( equivalence_classes
, pkg_normalized
, pkg_class
);
partition_num = 1;
} else {
g_free(pkg_normalized);
/* Find the package name in the equivalence class */
GSequenceIter * pkg_iter =
g_sequence_search( pkg_class
, pkg_name
, compare_str
, NULL
);
if (!g_sequence_iter_is_end(pkg_iter)) {
/* If there are any packages after this one in the list, bail
* out. In principle we could solve this by moving them up to
* the next partition, but so far I've never seen any github
* tarballs out of order so let's save ourselves the work
* until we know we need it.
*/
fprintf( stderr
, "Out of order github tarball: %s is after %s\n"
, pkg_name
, (char *) g_sequence_get(pkg_iter)
);
return 1;
}
pkg_iter = g_sequence_iter_prev(pkg_iter);
if (strcmp( g_sequence_get(pkg_iter)
, pkg_name
) != 0) {
/* This package doesn't have the same name as the one right
* before where it should be in the sequence, which means it's
* new and needs to be added to the sequence.
*
* !!! We need to change this to use g_sequence_insert_before
* if we ever get an out-of-order github tarball, see comment
* after the check for !g_sequence_iter_is_end(pkg_iter).
*/
pkg_iter = g_sequence_append(pkg_class, pkg_name);
} else {
g_free(pkg_name);
}
/* Get the partition number, starting with 1.
*/
partition_num = g_sequence_iter_get_position(pkg_iter) + 1;
}
/* Set the destination path.
* The 3 below is for the length of /#/, the partition number part
* of the path. If we have more than 9 partitions, we deserve to
* segfault. The 1 at the end is for the trailing null.
*/
char * dest_path = g_malloc(output_len + 3 + strlen(path) + 1);
sprintf(dest_path, "%s/%d/%s", argv[2], partition_num, path);
archive_entry_set_pathname(ent, dest_path);
if (archive_read_extract(ar, ent, 0) != ARCHIVE_OK) {
fprintf( stderr
, "Error extracting entry %s from %s: %s\n"
, dest_path
, argv[1]
, archive_error_string(ar)
);
return 1;
}
} while ((err = archive_read_next_header(ar, &ent)) == ARCHIVE_OK);
if (err != ARCHIVE_EOF) {
fprintf( stderr
, "Error reading entry from %s: %s\n"
, argv[1]
, archive_error_string(ar)
);
return 1;
}
return 0;
}

View File

@ -118,10 +118,23 @@ let
'';
};
hackage2nix = name: version: self.haskellSrc2nix {
all-cabal-hashes-component = name: import (pkgs.runCommand "all-cabal-hashes-component-${name}.nix" {}
''
set +o pipefail
for component in ${all-cabal-hashes}/*; do
if ls $component | grep -q ${name}; then
echo "builtins.storePath $component" > $out
exit 0
fi
done
echo "${name} not found in any all-cabal-hashes component, are you sure it's in hackage?" >&2
exit 1
'');
hackage2nix = name: version: let component = all-cabal-hashes-component name; in self.haskellSrc2nix {
name = "${name}-${version}";
sha256 = ''$(sed -e 's/.*"SHA256":"//' -e 's/".*$//' "${all-cabal-hashes}/${name}/${version}/${name}.json")'';
src = "${all-cabal-hashes}/${name}/${version}/${name}.cabal";
sha256 = ''$(sed -e 's/.*"SHA256":"//' -e 's/".*$//' "${component}/${name}/${version}/${name}.json")'';
src = "${component}/${name}/${version}/${name}.cabal";
};
in package-set { inherit pkgs stdenv callPackage; } self // {