From 7c248362c13e4a4cae65bcae71e651f70e06fe60 Mon Sep 17 00:00:00 2001 From: jmmv Date: Wed, 30 Mar 2011 11:10:56 +0000 Subject: [PATCH] Pull up upstream revision 648ed6360b2b7cda81a6079b00dc436d09c745b8: Retry calls that raise file system errors during cleanup If a test case mounts user-space (puffs/fuse) file systems or spawns server processes that create pid files, the termination of the corresponding processes does not guarantee that the file system is left in a consistent state immediately. The cleanup routines of both components (file systems and daemons) may still be running. This situation causes a race condition between the termination of the auxiliary processes and our own file system cleanup: the file system calls performed from within the cleanup routine may raise errors because the file system is still changing underneath. (E.g. we first enumerate the contents of a directory and get file X, but when we attempt to delete file X, it may be gone.) Deal with this by retrying failing file system calls a few times and ignoring "expected" errors before giving up. --- external/bsd/atf/dist/atf-run/fs.cpp | 92 ++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 20 deletions(-) diff --git a/external/bsd/atf/dist/atf-run/fs.cpp b/external/bsd/atf/dist/atf-run/fs.cpp index 69465e015138..fcc3acdefc62 100644 --- a/external/bsd/atf/dist/atf-run/fs.cpp +++ b/external/bsd/atf/dist/atf-run/fs.cpp @@ -1,7 +1,7 @@ // // Automated Testing Framework (atf) // -// Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. +// Copyright (c) 2007, 2008, 2009, 2011 The NetBSD Foundation, Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,6 +45,7 @@ extern "C" { #include #include "atf-c++/detail/process.hpp" +#include "atf-c++/detail/sanity.hpp" #include "fs.hpp" #include "user.hpp" @@ -61,6 +62,19 @@ static void cleanup_aux_dir(const atf::fs::path&, const atf::fs::file_info&, bool); static void do_unmount(const atf::fs::path&); +// The cleanup routines below are tricky: they are executed immediately after +// a test case's death, and after we have forcibly killed any stale processes. +// However, even if the processes are dead, this does not mean that the file +// system we are scanning is stable. In particular, if the test case has +// mounted file systems through fuse/puffs, the fact that the processes died +// does not mean that the file system is truly unmounted. +// +// The code below attempts to cope with this by catching errors and either +// ignoring them or retrying the actions on the same file/directory a few times +// before giving up. +static const int max_retries = 5; +static const int retry_delay_in_seconds = 1; + // The erase parameter in this routine is to control nested mount points. // We want to descend into a mount point to unmount anything that is // mounted under it, but we do not want to delete any files while doing @@ -70,19 +84,24 @@ static void cleanup_aux(const atf::fs::path& p, dev_t parent_device, bool erase) { - atf::fs::file_info fi(p); + try { + atf::fs::file_info fi(p); - if (fi.get_type() == atf::fs::file_info::dir_type) - cleanup_aux_dir(p, fi, fi.get_device() == parent_device); - - if (fi.get_device() != parent_device) - do_unmount(p); - - if (erase) { if (fi.get_type() == atf::fs::file_info::dir_type) - atf::fs::rmdir(p); - else - atf::fs::remove(p); + cleanup_aux_dir(p, fi, fi.get_device() == parent_device); + + if (fi.get_device() != parent_device) + do_unmount(p); + + if (erase) { + if (fi.get_type() == atf::fs::file_info::dir_type) + atf::fs::rmdir(p); + else + atf::fs::remove(p); + } + } catch (const atf::system_error& e) { + if (e.code() != ENOENT && e.code() != ENOTDIR) + throw e; } } @@ -92,15 +111,39 @@ cleanup_aux_dir(const atf::fs::path& p, const atf::fs::file_info& fi, bool erase) { if (erase && ((fi.get_mode() & S_IRWXU) != S_IRWXU)) { - if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) - throw atf::system_error(IMPL_NAME "::cleanup(" + - p.str() + ")", "chmod(2) failed", errno); + int retries = max_retries; +retry_chmod: + if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) { + if (retries > 0) { + retries--; + ::sleep(retry_delay_in_seconds); + goto retry_chmod; + } else { + throw atf::system_error(IMPL_NAME "::cleanup(" + + p.str() + ")", "chmod(2) failed", + errno); + } + } } std::set< std::string > subdirs; { - const atf::fs::directory d(p); - subdirs = d.names(); + bool ok = false; + int retries = max_retries; + while (!ok) { + INV(retries > 0); + try { + const atf::fs::directory d(p); + subdirs = d.names(); + ok = true; + } catch (const atf::system_error& e) { + if (retries == 0) + throw e; + retries--; + ::sleep(retry_delay_in_seconds); + } + } + INV(ok); } for (std::set< std::string >::const_iterator iter = subdirs.begin(); @@ -122,9 +165,18 @@ do_unmount(const atf::fs::path& in_path) in_path : in_path.to_absolute(); #if defined(HAVE_UNMOUNT) - if (unmount(abs_path.c_str(), 0) == -1) - throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + ")", - "unmount(2) failed", errno); + int retries = max_retries; +retry_unmount: + if (unmount(abs_path.c_str(), 0) == -1) { + if (errno == EBUSY && retries > 0) { + retries--; + ::sleep(retry_delay_in_seconds); + goto retry_unmount; + } else { + throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + + ")", "unmount(2) failed", errno); + } + } #else // We could use umount(2) instead if it was available... but // trying to do so under, e.g. Linux, is a nightmare because we