about summary refs log tree commit diff
path: root/nixos/tests/hadoop
diff options
context:
space:
mode:
authorillustris <rharikrishnan95@gmail.com>2021-10-21 02:03:56 +0530
committerRaphael Megzari <raphael@megzari.com>2021-10-25 16:30:19 +0900
commit6688c522542b25cc4a1ee106eddb18dc3d7a4133 (patch)
tree6fc74e3546ee40ca71024627a78c2e7454b441b3 /nixos/tests/hadoop
parent91bb2b7016de43dfc08fde834d135954369737dc (diff)
nixos/hadoop: add better test
The existing tests for HDFS and YARN only check if the services come up and expose their web interfaces.
The new combined hadoop test will also test whether the services and roles work together as intended.
It spin up an HDFS+YARN cluster and submit a demo YARN application that uses the hadoop cluster for
storageand yarn cluster for compute.
Diffstat (limited to 'nixos/tests/hadoop')
-rw-r--r--nixos/tests/hadoop/hadoop.nix70
-rw-r--r--nixos/tests/hadoop/hdfs.nix4
-rw-r--r--nixos/tests/hadoop/yarn.nix4
3 files changed, 74 insertions, 4 deletions
diff --git a/nixos/tests/hadoop/hadoop.nix b/nixos/tests/hadoop/hadoop.nix
new file mode 100644
index 0000000000000..46dfac26e065b
--- /dev/null
+++ b/nixos/tests/hadoop/hadoop.nix
@@ -0,0 +1,70 @@
+import ../make-test-python.nix ({pkgs, ...}: {
+
+  nodes = let
+    package = pkgs.hadoop;
+    coreSite = {
+      "fs.defaultFS" = "hdfs://master";
+    };
+  in {
+    master = {pkgs, options, ...}: {
+      services.hadoop = {
+        inherit package coreSite;
+        hdfs.namenode.enabled = true;
+        yarn.resourcemanager.enabled = true;
+      };
+      virtualisation.memorySize = 1024;
+    };
+
+    worker = {pkgs, options, ...}: {
+      services.hadoop = {
+        inherit package coreSite;
+        hdfs.datanode.enabled = true;
+        yarn.nodemanager.enabled = true;
+        yarnSite = options.services.hadoop.yarnSite.default // {
+          "yarn.resourcemanager.hostname" = "master";
+        };
+      };
+      virtualisation.memorySize = 2048;
+    };
+  };
+
+  testScript = ''
+    start_all()
+
+    master.wait_for_unit("network.target")
+    master.wait_for_unit("hdfs-namenode")
+
+    master.wait_for_open_port(8020)
+    master.wait_for_open_port(9870)
+
+    worker.wait_for_unit("network.target")
+    worker.wait_for_unit("hdfs-datanode")
+    worker.wait_for_open_port(9864)
+    worker.wait_for_open_port(9866)
+    worker.wait_for_open_port(9867)
+
+    master.succeed("curl -f http://worker:9864")
+    worker.succeed("curl -f http://master:9870")
+
+    worker.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait")
+
+    master.wait_for_unit("yarn-resourcemanager")
+
+    master.wait_for_open_port(8030)
+    master.wait_for_open_port(8031)
+    master.wait_for_open_port(8032)
+    master.wait_for_open_port(8088)
+    worker.succeed("curl -f http://master:8088")
+
+    worker.wait_for_unit("yarn-nodemanager")
+    worker.wait_for_open_port(8042)
+    worker.wait_for_open_port(8040)
+    master.succeed("curl -f http://worker:8042")
+
+    assert "Total Nodes:1" in worker.succeed("yarn node -list")
+
+    assert "Estimated value of Pi is" in worker.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10")
+    assert "SUCCEEDED" in worker.succeed("yarn application -list -appStates FINISHED")
+    worker.succeed("sudo -u hdfs hdfs dfs -ls / | systemd-cat")
+  '';
+ })
diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix
index f1f98ed42eb31..f5907185c039e 100644
--- a/nixos/tests/hadoop/hdfs.nix
+++ b/nixos/tests/hadoop/hdfs.nix
@@ -2,7 +2,7 @@ import ../make-test-python.nix ({...}: {
   nodes = {
     namenode = {pkgs, ...}: {
       services.hadoop = {
-        package = pkgs.hadoop_3_1;
+        package = pkgs.hadoop;
         hdfs.namenode.enabled = true;
         coreSite = {
           "fs.defaultFS" = "hdfs://namenode:8020";
@@ -20,7 +20,7 @@ import ../make-test-python.nix ({...}: {
     };
     datanode = {pkgs, ...}: {
       services.hadoop = {
-        package = pkgs.hadoop_3_1;
+        package = pkgs.hadoop;
         hdfs.datanode.enabled = true;
         coreSite = {
           "fs.defaultFS" = "hdfs://namenode:8020";
diff --git a/nixos/tests/hadoop/yarn.nix b/nixos/tests/hadoop/yarn.nix
index 01077245d3973..fbbb293eecd6b 100644
--- a/nixos/tests/hadoop/yarn.nix
+++ b/nixos/tests/hadoop/yarn.nix
@@ -1,7 +1,7 @@
 import ../make-test-python.nix ({...}: {
   nodes = {
     resourcemanager = {pkgs, ...}: {
-      services.hadoop.package = pkgs.hadoop_3_1;
+      services.hadoop.package = pkgs.hadoop;
       services.hadoop.yarn.resourcemanager.enabled = true;
       services.hadoop.yarnSite = {
         "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
@@ -12,7 +12,7 @@ import ../make-test-python.nix ({...}: {
       ];
     };
     nodemanager = {pkgs, ...}: {
-      services.hadoop.package = pkgs.hadoop_3_1;
+      services.hadoop.package = pkgs.hadoop;
       services.hadoop.yarn.nodemanager.enabled = true;
       services.hadoop.yarnSite = {
         "yarn.resourcemanager.hostname" = "resourcemanager";