about summary refs log tree commit diff
path: root/maintainers/scripts/vanity.sh
diff options
context:
space:
mode:
authorMichael Raskin <7c6f434c@mail.ru>2014-09-24 19:54:36 +0400
committerMichael Raskin <7c6f434c@mail.ru>2014-09-30 23:11:19 +0400
commitca626ea96ddda8e028c1bf13865ccdb4c90c4348 (patch)
treeaa42b15abf5b999cafe2c8006c8e3a96a067de51 /maintainers/scripts/vanity.sh
parent4dabba4d1e24aad154db2fc340427990d46634f5 (diff)
A small vanity counter for finding top-grossing committers
Diffstat (limited to 'maintainers/scripts/vanity.sh')
-rwxr-xr-xmaintainers/scripts/vanity.sh65
1 files changed, 65 insertions, 0 deletions
diff --git a/maintainers/scripts/vanity.sh b/maintainers/scripts/vanity.sh
new file mode 100755
index 0000000000000..0f09f66e435c7
--- /dev/null
+++ b/maintainers/scripts/vanity.sh
@@ -0,0 +1,65 @@
+#! /bin/sh
+
+export LANG=C LC_ALL=C LC_COLLATE=C
+
+# Load git log
+git_data="$(git log | grep 'Author:' | 
+  sed -e 's/^ *Author://; s/\\//g; s/^ *//; s/ *$//; 
+  s/ @ .*//; s/ *[<]/\t/; s/[>]//')"
+
+# Name - nick - email correspondence from log and from maintainer list
+# Also there are a few manual entries
+maintainers="$(cat "$(dirname "$0")/../../lib/maintainers.nix" | 
+  grep '=' | sed -re 's/\\"/''/g;
+  s/ *([^ =]*) *= *" *(.*[^ ]) *[<](.*)[>] *".*/\1\t\2\t\3/')"
+git_lines="$( ( echo "$git_data"; 
+  cat vanity-manual-equalities.txt) | sort |uniq)"
+
+# For RDF
+normalize_name () {
+	sed -e 's/ /_/g; s/'\''/*/g; s/"/**/g;'
+}
+
+denormalize_name () {
+	sed -e 's/_/ /g; s/[*][*]/"/g; s/[*]/'\''/g;'
+}
+
+n3="$(mktemp --suffix .n3)"
+
+# «The same person» relation and a sorting hint
+# Full name is something with a space
+(
+echo "$git_lines" | sed -re 's@(.*)\t(.*)@<my://name/\1>	<my://can-be>	<my://name/\2>.@'
+echo "$git_lines" | sed -re 's@(.*)\t(.*)@<my://name/\2>	<my://can-be>	<my://name/\1>.@'
+echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\1>	<my://can-be>	<my://name/\2>.@'
+echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\2>	<my://can-be>	<my://name/\3>.@'
+echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\3>	<my://can-be>	<my://name/\1>.@'
+echo "$git_lines" | grep ' ' | cut -f 1 | sed -e 's@.*@<my://name/&>	<my://is-name>	<my://0>.@'
+echo "$git_lines" | grep -v ' ' | cut -f 1 | sed -e 's@.*@<my://name/&>	<my://is-name>	<my://1>.@'
+echo "$maintainers" | cut -f 2 | sed -e 's@.*@<my://name/&>	<my://is-name>	<my://0>.@'
+) | normalize_name | grep -E '<my://[-a-z]+>' | sort | uniq > "$n3"
+
+# Get transitive closure
+sparql="$(nix-build '<nixpkgs>' -A apache-jena --no-out-link)/bin/sparql"
+name_list="$(
+	"$sparql" --results=TSV --data="$n3" "
+	select ?x ?y ?g where {
+	  ?x <my://can-be>+ ?y.
+	  ?x <my://is-name> ?g.
+        }
+	" | tail -n +2 | 
+	sed -re 's@<my://name/@@g; s@<my://@@g; s@>@@g;' | 
+	sort -k 2,3 -t '	'
+)"
+
+# Take first spelling option for every person
+name_list_canonical="$(echo "$name_list" | cut -f 1,2 | uniq -f1)"
+
+cleaner_script="$(echo "$name_list_canonical" | denormalize_name | 
+  sed -re 's/(.*)\t(.*)/s#^\2$#\1#g/g')"
+
+echo "$name_list" | denormalize_name
+
+echo
+
+echo "$git_data" | cut -f 1 | sed -re "$cleaner_script" | sort | uniq -c | sort -k1n