diff options
author | illustris <rharikrishnan95@gmail.com> | 2021-09-17 20:49:21 +0530 |
---|---|---|
committer | illustris <rharikrishnan95@gmail.com> | 2021-09-17 20:52:54 +0530 |
commit | 682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e (patch) | |
tree | abde07e0b8c4dff9797c6168cf24c1cf24917f1e /pkgs/applications/networking/cluster/spark | |
parent | f023c4710137586b1b443b0100c8d8bce74f5437 (diff) |
spark: 2.4.4 -> 2.4.8, init 3.1.2
The "spark" package now points to 3.1.2, and "spark2" points to spark 2.4.8
Diffstat (limited to 'pkgs/applications/networking/cluster/spark')
-rw-r--r-- | pkgs/applications/networking/cluster/spark/default.nix | 109 |
1 files changed, 64 insertions, 45 deletions
diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 76230b8e10033..af194afafa9a8 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -1,56 +1,75 @@ -{ lib, stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop +{ lib, stdenv, fetchzip, makeWrapper, jdk8, python3Packages, extraPythonPackages ? [], coreutils, hadoop , RSupport? true, R }: with lib; -stdenv.mkDerivation rec { +let + spark = { pname, version, src }: + stdenv.mkDerivation rec { + inherit pname version src; + nativeBuildInputs = [ makeWrapper ]; + buildInputs = [ jdk8 python3Packages.python ] + ++ extraPythonPackages + ++ optional RSupport R; - pname = "spark"; - version = "2.4.4"; + untarDir = "${pname}-${version}"; + installPhase = '' + mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} + mv * $out/lib/${untarDir} - src = fetchzip { - url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; - sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g"; + cp $out/lib/${untarDir}/conf/log4j.properties{.template,} + + cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF + export JAVA_HOME="${jdk8}" + export SPARK_HOME="$out/lib/${untarDir}" + export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) + export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}" + export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" + ${optionalString RSupport '' + export SPARKR_R_SHELL="${R}/bin/R" + export PATH="\$PATH:${R}/bin"''} + EOF + + for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do + makeWrapper "$n" "$out/bin/$(basename $n)" + substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname + done + for n in $(find $out/lib/${untarDir}/sbin -type f); do + # Spark deprecated scripts with "slave" in the name. + # This line adds forward compatibility with the nixos spark module for + # older versions of spark that don't have the new "worker" scripts. + ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true + done + ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java + ''; + + meta = { + description = "Apache Spark is a fast and general engine for large-scale data processing"; + homepage = "http://spark.apache.org"; + license = lib.licenses.asl20; + platforms = lib.platforms.all; + maintainers = with maintainers; [ thoughtpolice offline kamilchm illustris ]; + repositories.git = "git://git.apache.org/spark.git"; + }; + }; +in { + spark3 = spark rec { + pname = "spark"; + version = "3.1.2"; + + src = fetchzip { + url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; + sha256 = "1bgh2y6jm7wqy6yc40rx68xkki31i3jiri2yixb1bm0i9pvsj9yf"; + }; }; + spark2 = spark rec { + pname = "spark"; + version = "2.4.8"; - nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ jre pythonPackages.python pythonPackages.numpy ] - ++ optional RSupport R; - - untarDir = "${pname}-${version}-bin-without-hadoop"; - installPhase = '' - mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} - mv * $out/lib/${untarDir} - - sed -e 's/INFO, console/WARN, console/' < \ - $out/lib/${untarDir}/conf/log4j.properties.template > \ - $out/lib/${untarDir}/conf/log4j.properties - - cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF - export JAVA_HOME="${jre}" - export SPARK_HOME="$out/lib/${untarDir}" - export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) - export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}" - export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" - ${optionalString RSupport - ''export SPARKR_R_SHELL="${R}/bin/R" - export PATH=$PATH:"${R}/bin/R"''} - EOF - - for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do - makeWrapper "$n" "$out/bin/$(basename $n)" - substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname - done - ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java - ''; - - meta = { - description = "Apache Spark is a fast and general engine for large-scale data processing"; - homepage = "http://spark.apache.org"; - license = lib.licenses.asl20; - platforms = lib.platforms.all; - maintainers = with maintainers; [ thoughtpolice offline kamilchm ]; - repositories.git = "git://git.apache.org/spark.git"; + src = fetchzip { + url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; + sha256 = "1mkyq0gz9fiav25vr0dba5ivp0wh0mh7kswwnx8pvsmb6wbwyfxv"; + }; }; } |