nixpkgs/pkgs/development/tools/parsing/tree-sitter/update.nix
2021-01-16 13:15:46 +01:00

195 lines
6.2 KiB
Nix

{ writeShellScript, nix-prefetch-git, formats, lib
, curl, jq, xe
, src }:
let
# Grammars we want to fetch from the tree-sitter github orga
knownTreeSitterOrgGrammarRepos = [
"tree-sitter-javascript"
"tree-sitter-c"
"tree-sitter-swift"
"tree-sitter-json"
"tree-sitter-cpp"
"tree-sitter-ruby"
"tree-sitter-razor"
"tree-sitter-go"
"tree-sitter-c-sharp"
"tree-sitter-python"
"tree-sitter-typescript"
"tree-sitter-rust"
"tree-sitter-bash"
"tree-sitter-php"
"tree-sitter-java"
"tree-sitter-scala"
"tree-sitter-ocaml"
"tree-sitter-julia"
"tree-sitter-agda"
"tree-sitter-fluent"
"tree-sitter-html"
"tree-sitter-haskell"
"tree-sitter-regex"
"tree-sitter-css"
"tree-sitter-verilog"
"tree-sitter-jsdoc"
"tree-sitter-ql"
"tree-sitter-embedded-template"
];
knownTreeSitterOrgGrammarReposJson = jsonFile "known-tree-sitter-org-grammar-repos" knownTreeSitterOrgGrammarRepos;
# repos of the tree-sitter github orga we want to ignore (not grammars)
ignoredTreeSitterOrgRepos = [
"tree-sitter"
"tree-sitter-cli"
# this is the haskell language bindings, tree-sitter-haskell is the grammar
"haskell-tree-sitter"
# this is the ruby language bindings, tree-sitter-ruby is the grammar
"ruby-tree-sitter"
# this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar
"rust-tree-sitter"
# this is the nodejs language bindings, tree-sitter-javascript is the grammar
"node-tree-sitter"
# this is the python language bindings, tree-sitter-python is the grammar
"py-tree-sitter"
# afl fuzzing for tree sitter
"afl-tree-sitter"
# archived
"highlight-schema"
# website
"tree-sitter.github.io"
];
ignoredTreeSitterOrgReposJson = jsonFile "ignored-tree-sitter-org-repos" ignoredTreeSitterOrgRepos;
# Additional grammars that are not in the official github orga.
# If you need a grammar that already exists in the official orga,
# make sure to give it a different name.
otherGrammars = {
"tree-sitter-nix" = {
orga = "cstrahan";
repo = "tree-sitter-nix";
};
"tree-sitter-lua" = {
orga = "nvim-treesitter";
repo = "tree-sitter-lua";
};
};
allGrammars =
let
treeSitterOrgaGrammars =
lib.listToAttrs (map (repo:
{ name = repo;
value = {
orga = "tree-sitter";
inherit repo;
};
})
knownTreeSitterOrgGrammarRepos);
in
mergeAttrsUnique otherGrammars treeSitterOrgaGrammars;
# TODO: move to lib
mergeAttrsUnique = left: right:
let intersect = lib.intersectLists (lib.attrNames left) (lib.attrNames right); in
assert
lib.assertMsg (intersect == [])
(lib.concatStringsSep "\n" [
"mergeAttrsUnique: keys in attrset overlapping:"
"left: ${lib.generators.toPretty {} (lib.getAttrs intersect left)}"
"right: ${lib.generators.toPretty {} (lib.getAttrs intersect right)}"
]);
left // right;
jsonFile = name: val: (formats.json {}).generate name val;
# check the tree-sitter orga repos
checkTreeSitterRepos = writeShellScript "get-grammars.sh" ''
set -euo pipefail
res=$(${jq}/bin/jq \
--slurpfile known "${knownTreeSitterOrgGrammarReposJson}" \
--slurpfile ignore "${ignoredTreeSitterOrgReposJson}" \
'. - ($known[0] + $ignore[0])' \
)
if [ ! "$res" == "[]" ]; then
echo "These repositories are neither known nor ignored:" 1>&2
echo "$res" 1>&2
exit 1
fi
'';
# TODO
urlEscape = x: x;
# generic bash script to find the latest github release for a repo
latestGithubRelease = { orga, repo }: writeShellScript "latest-github-release" ''
set -euo pipefail
res=$(${curl}/bin/curl \
--silent \
"https://api.github.com/repos/${urlEscape orga}/${urlEscape repo}/releases/latest")
if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
echo "rate limited" >&2
fi
release=$(printf "%s" "$res" | ${jq}/bin/jq '.tag_name')
# github sometimes returns an empty list even tough there are releases
if [ "$release" = "null" ]; then
echo "uh-oh, latest for ${orga + "/" + repo} is not there, using HEAD" >&2
release="HEAD"
fi
echo "$release"
'';
# find the latest repos of a github organization
latestGithubRepos = { orga }: writeShellScript "latest-github-repos" ''
set -euo pipefail
res=$(${curl}/bin/curl \
--silent \
'https://api.github.com/orgs/${urlEscape orga}/repos?per_page=100')
if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
echo "rate limited" >&2 #
fi
printf "%s" "$res" | ${jq}/bin/jq 'map(.name)' \
|| echo "failed $res"
'';
# update one tree-sitter grammar repo and print their nix-prefetch-git output
updateGrammar = { orga, repo }: writeShellScript "update-grammar.sh" ''
set -euo pipefail
latest="$(${latestGithubRelease { inherit orga repo; }})"
echo "Fetching latest release ($latest) of ${repo} " >&2
${nix-prefetch-git}/bin/nix-prefetch-git \
--quiet \
--no-deepClone \
--url "https://github.com/${urlEscape orga}/${urlEscape repo}" \
--rev "$latest"
'';
foreachSh = attrs: f:
lib.concatMapStringsSep "\n" f
(lib.mapAttrsToList (k: v: { name = k; } // v) attrs);
update-all-grammars = writeShellScript "update-all-grammars.sh" ''
set -euo pipefail
echo "fetching list of grammars" 1>&2
treeSitterRepos=$(${latestGithubRepos { orga = "tree-sitter"; }})
echo "checking the tree-sitter repo list against the grammars we know" 1>&2
printf '%s' "$treeSitterRepos" | ${checkTreeSitterRepos}
outputDir="${toString ./.}/grammars"
echo "writing files to $outputDir" 1>&2
mkdir -p "$outputDir"
${foreachSh allGrammars
({name, orga, repo}: ''${updateGrammar { inherit orga repo; }} > $outputDir/${name}.json'')}
( echo "{"
${foreachSh allGrammars
({name, ...}: ''
# indentation hack
printf " %s = (builtins.fromJSON (builtins.readFile ./%s.json));\n" "${name}" "${name}"'')}
echo "}" ) \
> "$outputDir/default.nix"
'';
in update-all-grammars