From ca626ea96ddda8e028c1bf13865ccdb4c90c4348 Mon Sep 17 00:00:00 2001 From: Michael Raskin <7c6f434c@mail.ru> Date: Wed, 24 Sep 2014 19:54:36 +0400 Subject: [PATCH] A small vanity counter for finding top-grossing committers --- .../scripts/vanity-manual-equalities.txt | 3 + maintainers/scripts/vanity.sh | 65 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 maintainers/scripts/vanity-manual-equalities.txt create mode 100755 maintainers/scripts/vanity.sh diff --git a/maintainers/scripts/vanity-manual-equalities.txt b/maintainers/scripts/vanity-manual-equalities.txt new file mode 100644 index 000000000000..392ff266e1ae --- /dev/null +++ b/maintainers/scripts/vanity-manual-equalities.txt @@ -0,0 +1,3 @@ +viric viriketo@gmail.com +Pjotr Prins pjotr.public01@thebird.nl +Pjotr Prins pjotr.public05@thebird.nl diff --git a/maintainers/scripts/vanity.sh b/maintainers/scripts/vanity.sh new file mode 100755 index 000000000000..0f09f66e435c --- /dev/null +++ b/maintainers/scripts/vanity.sh @@ -0,0 +1,65 @@ +#! /bin/sh + +export LANG=C LC_ALL=C LC_COLLATE=C + +# Load git log +git_data="$(git log | grep 'Author:' | + sed -e 's/^ *Author://; s/\\//g; s/^ *//; s/ *$//; + s/ @ .*//; s/ *[<]/\t/; s/[>]//')" + +# Name - nick - email correspondence from log and from maintainer list +# Also there are a few manual entries +maintainers="$(cat "$(dirname "$0")/../../lib/maintainers.nix" | + grep '=' | sed -re 's/\\"/''/g; + s/ *([^ =]*) *= *" *(.*[^ ]) *[<](.*)[>] *".*/\1\t\2\t\3/')" +git_lines="$( ( echo "$git_data"; + cat vanity-manual-equalities.txt) | sort |uniq)" + +# For RDF +normalize_name () { + sed -e 's/ /_/g; s/'\''/*/g; s/"/**/g;' +} + +denormalize_name () { + sed -e 's/_/ /g; s/[*][*]/"/g; s/[*]/'\''/g;' +} + +n3="$(mktemp --suffix .n3)" + +# «The same person» relation and a sorting hint +# Full name is something with a space +( +echo "$git_lines" | sed -re 's@(.*)\t(.*)@ .@' +echo "$git_lines" | sed -re 's@(.*)\t(.*)@ .@' +echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@ .@' +echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@ .@' +echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@ .@' +echo "$git_lines" | grep ' ' | cut -f 1 | sed -e 's@.*@ .@' +echo "$git_lines" | grep -v ' ' | cut -f 1 | sed -e 's@.*@ .@' +echo "$maintainers" | cut -f 2 | sed -e 's@.*@ .@' +) | normalize_name | grep -E '' | sort | uniq > "$n3" + +# Get transitive closure +sparql="$(nix-build '' -A apache-jena --no-out-link)/bin/sparql" +name_list="$( + "$sparql" --results=TSV --data="$n3" " + select ?x ?y ?g where { + ?x + ?y. + ?x ?g. + } + " | tail -n +2 | + sed -re 's@@@g;' | + sort -k 2,3 -t ' ' +)" + +# Take first spelling option for every person +name_list_canonical="$(echo "$name_list" | cut -f 1,2 | uniq -f1)" + +cleaner_script="$(echo "$name_list_canonical" | denormalize_name | + sed -re 's/(.*)\t(.*)/s#^\2$#\1#g/g')" + +echo "$name_list" | denormalize_name + +echo + +echo "$git_data" | cut -f 1 | sed -re "$cleaner_script" | sort | uniq -c | sort -k1n