Implement machine enumeration

author: Boris Kolpackov <boris@codesynthesis.com> 2017-04-08 14:14:26 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2017-04-08 14:14:26 +0200
commit: 36e0c88e7a3912c8a2e6594841172adb9c14525b (patch)
tree: 909a269ded721a0201a01d3493af6fc11dd75292 /bbot/agent.cxx
parent: cfd31379be5eefb22a72b5ee90ce8fd17a0802b7 (diff)
1 files changed, 263 insertions, 223 deletions
diff --git a/bbot/agent.cxx b/bbot/agent.cxx
index 3e4f8dc..76c3a86 100644
--- a/bbot/agent.cxx
+++ b/bbot/agent.cxx
@@ -9,12 +9,8 @@
 #include <iostream>
 
 #include <butl/pager>
-#include <butl/fdstream>
 #include <butl/filesystem> // dir_iterator
 
-#include <butl/manifest-parser>
-#include <butl/manifest-serializer>
-
 #include <bbot/manifest>
 
 #include <bbot/types>
@@ -29,50 +25,64 @@ using namespace std;
 using namespace butl;
 using namespace bbot;
 
+// The btrfs tool likes to print informational messages, like "Created
+// snapshot such and such". Luckily, it writes them to stdout while proper
+// diagnostics to stderr.
+//
+template <typename... A>
+inline void
+btrfs (tracer& t, A&&... a)
+{
+  if (verb >= 3)
+    run (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...);
+  else
+    run (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+template <typename... A>
+inline butl::process_exit::code_type
+btrfs_exit (tracer& t, A&&... a)
+{
+  return verb >= 3
+    ? run_exit (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...)
+    : run_exit (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+agent_options ops;
+
 const string bs_prot ("1"); // Bootstrap protocol version.
 
 string tc_name; // Toolchain name.
 string tc_num;  // Toolchain number.
 string tc_id;   // Toolchain id.
 
-template <typename T>
-static T
-parse_manifest (const path& f, const char* what, bool ignore_unknown = true)
+static bootstrapped_machine_manifest
+bootstrap_machine (const dir_path& md, const machine_manifest& mm)
 {
-  try
-  {
-    if (!file_exists (f))
-      fail << what << " manifest file " << f << " does not exist";
+  bootstrapped_machine_manifest r {
+    mm,
+    toolchain_manifest {tc_id},
+    bootstrap_manifest {
+      bootstrap_manifest::versions_type {
+        {"bbot",    BBOT_VERSION},
+        {"libbbot", LIBBBOT_VERSION},
+        {"libbpkg", LIBBPKG_VERSION},
+        {"libbutl", LIBBUTL_VERSION}
+      }
+    }
+  };
 
-    ifdstream ifs (f);
-    manifest_parser mp (ifs, f.string ());
-    return T (mp, ignore_unknown);
-  }
-  catch (const manifest_parsing& e)
-  {
-    fail << "invalid " << what << " manifest: "
-         << f << ':' << e.line << ':' << e.column << ": " << e.description
-         << endf;
-  }
-  catch (const io_error& e)
-  {
-    fail << "unable to read " << what << " manifest " << f << ": " << e
-         << endf;
-  }
-  catch (const system_error& e) // EACCES, etc.
+  if (!ops.fake_bootstrap ())
   {
-    fail << "unable to access " << what << " manifest " << f << ": " << e
-         << endf;
   }
-}
-
-/*
 
-static bootstrapped_machine_manifest
-bootstrap_machine (const dir_path&);
+  serialize_manifest (r, md / "manifest", "bootstrapped machine");
+  return r;
+}
 
 static machine_manifests
 enumerate_machines (const dir_path& rd)
+try
 {
   tracer trace ("enumerate_machines");
 
@@ -93,231 +103,242 @@ enumerate_machines (const dir_path& rd)
 
     // Inside we have machines.
     //
-    for (const dir_entry& me: dir_iterator (vd))
+    try
     {
-      const string mn (me.path ().string ());
-
-      if (me.type () != entry_type::directory || mn[0] == '.')
-        continue;
-
-      const dir_path md (dir_path (vd) /= mn);
-
-      // Our endgoal here is to obtain a bootstrapped snapshot of this machine
-      // while watching out for potential race conditions (machines being
-      // added/upgraded/removed; see the manual for details).
-      //
-      // So here is our overall plan:
-      //
-      // 1. Resolve current subvolume link for our bootstrap protocol.
-      //
-      // 2. If there is no link, cleanup and ignore this machine.
-      //
-      // 3. Try to create a snapshot of current subvolume (this operation is
-      //    atomic). If failed (e.g., someone changed the link and removed the
-      //    subvolume in the meantime), retry from #1.
-      //
-      // 4. Compare the snapshot to the already bootstrapped version (if any)
-      //    and see if we need to re-bootstrap. If so, use the snapshot as a
-      //    starting point. Rename to bootstrapped at the end (atomic).
-      //
-      const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
-      const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolc...>
-      bool te (dir_exists (tp));
-
-      auto delete_t = [&tp] ()
+      for (const dir_entry& me: dir_iterator (vd))
       {
-        // btrfs property set -ts $tp ro false
-        // btrfs subvolume delete $tp
-      };
+        const string mn (me.path ().string ());
 
-      for (size_t retry (0);; ++retry)
-      {
-        if (retry != 0)
-          sleep (1);
+        if (me.type () != entry_type::directory || mn[0] == '.')
+          continue;
+
+        const dir_path md (dir_path (vd) /= mn);
 
-        // Resolve the link to subvolume path.
+        // Our endgoal here is to obtain a bootstrapped snapshot of this
+        // machine while watching out for potential race conditions (machines
+        // being added/upgraded/removed; see the manual for details).
         //
-        dir_path sp; // <name>-<P>.<R>
-        try
+        // So here is our overall plan:
+        //
+        // 1. Resolve current subvolume link for our bootstrap protocol.
+        //
+        // 2. If there is no link, cleanup and ignore this machine.
+        //
+        // 3. Try to create a snapshot of current subvolume (this operation is
+        //    atomic). If failed (e.g., someone changed the link and removed
+        //    the subvolume in the meantime), retry from #1.
+        //
+        // 4. Compare the snapshot to the already bootstrapped version (if
+        //    any) and see if we need to re-bootstrap. If so, use the snapshot
+        //    as a starting point. Rename to bootstrapped at the end (atomic).
+        //
+        const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
+        const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<too...>
+        bool te (dir_exists (tp));
+
+        auto delete_t = [&tp, &trace] ()
+        {
+          btrfs (trace, "property", "set", "-ts", tp, "ro", "false");
+          btrfs (trace, "subvolume", "delete", tp);
+        };
+
+        for (size_t retry (0);; ++retry)
         {
-          char b [PATH_MAX + 1];
-          ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+          if (retry != 0)
+            sleep (1);
 
-          if (r == -1)
+          // Resolve the link to subvolume path.
+          //
+          dir_path sp; // <name>-<P>.<R>
+          try
           {
-            if (errno != ENOENT)
-              throw_generic_error (errno);
+            char b [PATH_MAX + 1];
+            ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+
+            if (r == -1)
+            {
+              if (errno != ENOENT)
+                throw_generic_error (errno);
+            }
+            else if (static_cast<size_t> (r) >= sizeof (b))
+              throw_generic_error (EINVAL);
+            else
+            {
+              b[r] = '\0';
+              sp = dir_path (b);
+              if (sp.relative ())
+                sp = md / sp;
+            }
           }
-          else if (static_cast<size_t> (r) >= sizeof (b))
-            throw_generic_error (EINVAL);
-          else
+          catch (const system_error& e)
           {
-            b[r] = '\0';
-            sp = dir_path (b);
-            if (sp.relative ())
-              sp = md / sp;
+            fail << "unable to read subvolume link " << lp << ": " << e;
           }
-        }
-        catch (const system_error& e)
-        {
-          fail << "unable to read subvolume link " << lp << ": " << e;
-        }
 
-        // If the resolution fails, then this means there is no current
-        // machine subvolume (for this bootstrap protocol). In this case we
-        // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
-        // this machine.
-        //
-        if (sp.empty ())
-        {
-          if (te)
-            delete_t ();
+          // If the resolution fails, then this means there is no current
+          // machine subvolume (for this bootstrap protocol). In this case we
+          // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
+          // this machine.
+          //
+          if (sp.empty ())
+          {
+            if (te)
+              delete_t ();
 
-          break;
-        }
+            l2 ([&]{trace << "skipping " << md << ": no subvolume link";});
+            break;
+          }
 
-        // <name>-<toolchain>-<xxx>
-        //
-        const dir_path xp (dir_path (md) /=
-                           path::traits::temp_name (mn + '-' + tc_name));
+          // <name>-<toolchain>-<xxx>
+          //
+          const dir_path xp (dir_path (md) /=
+                             path::traits::temp_name (mn + '-' + tc_name));
 
-        // btrfs subvolume snapshot $sp $xp
-        if (false)
-        {
-          if (retry >= 10)
-            fail << "unable to snapshot subvolume " << sp;
+          if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0)
+          {
+            if (retry >= 10)
+              fail << "unable to snapshot subvolume " << sp;
 
-          continue;
-        }
+            continue;
+          }
 
-        // Load the (original) machine manifest.
-        //
-        auto mm (
-          parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+          // Load the (original) machine manifest.
+          //
+          auto mm (
+            parse_manifest<machine_manifest> (sp / "manifest", "machine"));
 
-        // If we already have <name>-<toolchain>, see if it needs to be re-
-        // bootstrapped. Things that render it obsolete:
-        //
-        // 1. New machine revision  (compare machine ids).
-        // 2. New toolchain         (compare toolchain ids).
-        // 3. New bbot/libbbot      (compare versions).
-        //
-        // The last case has a complication: what should we do if we have
-        // bootstrapped a newer version of bbot? This would mean that we are
-        // about to be stopped and upgraded (and the upgraded version will
-        // probably be able to use the result). So we simply ignore this
-        // machine for this run.
+          // If we already have <name>-<toolchain>, see if it needs to be re-
+          // bootstrapped. Things that render it obsolete:
+          //
+          // 1. New machine revision  (compare machine ids).
+          // 2. New toolchain         (compare toolchain ids).
+          // 3. New bbot/libbbot      (compare versions).
+          //
+          // The last case has a complication: what should we do if we have
+          // bootstrapped a newer version of bbot? This would mean that we are
+          // about to be stopped and upgraded (and the upgraded version will
+          // probably be able to use the result). So we simply ignore this
+          // machine for this run.
 
-        // Return -1 if older, 0 if the same, and +1 if newer.
-        //
-        auto compare_bbot = [] (const bootstrap_manifest& m) -> int
-        {
-          auto cmp = [&m] (const string& n, uint64_t v) -> int
+          // Return -1 if older, 0 if the same, and +1 if newer.
+          //
+          auto compare_bbot = [] (const bootstrap_manifest& m) -> int
           {
-            auto i = m.versions.find (n);
+            auto cmp = [&m] (const string& n, uint64_t v) -> int
+            {
+              auto i = m.versions.find (n);
+              return
+                i == m.versions.end () || i->second < v
+                ? -1
+                : i->second > v ? 1 : 0;
+            };
+
+            // Start from the top assuming a new dependency cannot be added
+            // without changing the dependent's version.
+            //
+            int r;
             return
-              i == m.versions.end () || i->second < v
-              ? -1
-              : i->second > v ? 1 : 0;
+              (r = cmp ("bbot",       BBOT_VERSION)) != 0 ? r :
+              (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r :
+              (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r :
+              (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0;
           };
 
-          // Start from the top assuming a new dependency cannot be added
-          // without changing the dependent's version.
-          //
-          int r;
-          return
-            (r = cmp ("bbot",       BBOT_VERSION)) != 0 ? r :
-            (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r :
-            (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r :
-            (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0;
-        };
-
-        if (te)
-        {
-          auto bmm (
-            parse_manifest<bootstrapped_machine_manifest> (
-              tp / "manifest",
-              "bootstrapped machine"));
-
-          if (bmm.machine.id != mm.id)
+          if (te)
           {
-            trace << "re-bootstrapping " << tp << ": new machine";
-            te = false;
-          }
+            auto bmm (
+              parse_manifest<bootstrapped_machine_manifest> (
+                tp / "manifest",
+                "bootstrapped machine"));
 
-          if (bmm.toolchain.id != tc_id)
-          {
-            trace << "re-bootstrapping " << tp << ": new toolchain";
-            te = false;
-          }
+            if (bmm.machine.id != mm.id)
+            {
+              l2 ([&]{trace << "re-bootstrapping " << tp << ": new machine";});
+              te = false;
+            }
 
-          if (int i = compare_bbot (bmm.bootstrap))
-          {
-            if (i < 0)
+            if (bmm.toolchain.id != tc_id)
             {
-              trace << "re-bootstrapping " << tp << ": new bbot";
+              l2 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";});
               te = false;
             }
-            else
+
+            if (int i = compare_bbot (bmm.bootstrap))
             {
-              trace << "ignoring " << tp << ": newer bbot";
-              // btrfs subvolume snapshot $xp
-              break;
+              if (i < 0)
+              {
+                l2 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";});
+                te = false;
+              }
+              else
+              {
+                l2 ([&]{trace << "ignoring " << tp << ": old bbot";});
+                btrfs (trace, "subvolume", "delete", xp);
+                break;
+              }
             }
+
+            if (!te)
+              delete_t ();
           }
+          else
+            l2 ([&]{trace << "bootstrapping " << tp;});
 
           if (!te)
-            delete_t ();
-        }
-
-        if (!te)
-        {
-          // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
-          // bootstrap the new machine. Then atomically rename it to
-          // <name>-<toolchain>.
-          //
-          bootstrapped_machine_manifest bmm (bootstrap_machine (xp));
-
-          try
-          {
-            mvdir (xp, tp);
-          }
-          catch (const system_error& e)
           {
-            fail << "unable to rename " << xp << " to " << tp;
-          }
+            // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
+            // bootstrap the new machine. Then atomically rename it to
+            // <name>-<toolchain>.
+            //
+            bootstrapped_machine_manifest bmm (bootstrap_machine (xp, mm));
 
-          te = true;
+            try
+            {
+              mvdir (xp, tp);
+            }
+            catch (const system_error& e)
+            {
+              fail << "unable to rename " << xp << " to " << tp;
+            }
 
-          // Check the boostrapped bbot version as above and ignore this
-          // machine if it's newer than us.
-          //
-          if (int i = compare_bbot (bmm.bootstrap))
-          {
-            assert (i > 0);
-            trace << "ignoring " << tp << ": newer bbot";
-            break;
+            te = true;
+
+            // Check the boostrapped bbot version as above and ignore this
+            // machine if it's newer than us.
+            //
+            if (int i = compare_bbot (bmm.bootstrap))
+            {
+              assert (i > 0);
+              l2 ([&]{trace << "ignoring " << tp << ": old bbot";});
+              break;
+            }
           }
-        }
-        else
-          ;// btrfs subvolume snapshot $xp
+          else
+            btrfs (trace, "subvolume", "delete", xp);
 
-        // Add the machine to the list.
-        //
-        // In order not to forget to clear new fields, we are instead going to
-        // create a new instance with just the required fields.
-        //
-        r.push_back (machine_manifest (mm.id, mm.name, mm.summary));
+          // Add the machine to the list.
+          //
+          // In order not to forget to clear new fields, we are instead going
+          // to create a new instance with just the required fields.
+          //
+          r.push_back (machine_manifest (mm.id, mm.name, mm.summary));
 
-        break;
+          break;
+        }
       }
     }
+    catch (const system_error& e)
+    {
+      fail << "unable to iterate over " << vd << ": " << e << endf;
+    }
   }
 
   return r;
 }
-
-*/
+catch (const system_error& e)
+{
+  fail << "unable to iterate over " << rd << ": " << e << endf;
+}
 
 extern "C" void
 handle_signal (int sig)
@@ -339,7 +360,9 @@ main (int argc, char* argv[])
 try
 {
   cli::argv_scanner scan (argc, argv, true);
-  agent_options ops (scan);
+  ops.parse (scan);
+
+  verb = ops.verbose ();
 
   if (ops.systemd_daemon ())
   {
@@ -359,6 +382,11 @@ try
     warn.type_  = "<4>";
     info.type_  = "<6>";
     trace_type  = "<7>";
+
+    info << "bbot agent for " << tc_name << '/' << tc_num <<
+      info << "toolchain id " << tc_id <<
+      info << "CPU(s)       " << ops.cpu () <<
+      info << "RAM(kB)      " << ops.ram ();
   }
 
   tracer trace ("main");
@@ -412,16 +440,28 @@ try
     fail << "unable to set signal handler: "
          << system_error (errno, generic_category ()); // Sanitize.
 
-  info << "bbot agent for " << tc_name << '/' << tc_num <<
-    info << "toolchain id " << tc_id <<
-    info << "CPU(s)       " << ops.cpu () <<
-    info << "RAM(kB)      " << ops.ram ();
-
-  for (;;)
+  // The work loop. The steps we go through are:
+  //
+  // 1. Enumerate the available machines, (re-)bootstrapping any of necessary.
+  //
+  // 2. Poll controller(s) for build tasks.
+  //
+  // 3. If no build tasks are available, go to #1 after sleeping a bit.
+  //
+  // 4. If a build task is returned, do it, upload the result, and go to #1
+  //    immediately.
+  //
+  for (unsigned int s; (s = 60); sleep (s))
   {
-    error << "sleeping" <<
-      warn << "lightly";
-    sleep (10);
+    machine_manifests mms (enumerate_machines (ops.machines ()));
+
+    if (ops.dump_machines ())
+    {
+      for (const machine_manifest& mm: mms)
+        serialize_manifest (mm, cout, "stdout", "machine manifest");
+
+      return 0;
+    }
   }
 }
 catch (const failed&)
author	Boris Kolpackov <boris@codesynthesis.com>	2017-04-08 14:14:26 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2017-04-08 14:14:26 +0200
commit	36e0c88e7a3912c8a2e6594841172adb9c14525b (patch)
tree	909a269ded721a0201a01d3493af6fc11dd75292 /bbot/agent.cxx
parent	cfd31379be5eefb22a72b5ee90ce8fd17a0802b7 (diff)