Pull up upstream revision 648ed6360b2b7cda81a6079b00dc436d09c745b8:

Retry calls that raise file system errors during cleanup

If a test case mounts user-space (puffs/fuse) file systems or spawns
server processes that create pid files, the termination of the
corresponding processes does not guarantee that the file system is
left in a consistent state immediately.  The cleanup routines of both
components (file systems and daemons) may still be running.

This situation causes a race condition between the termination of the
auxiliary processes and our own file system cleanup: the file system
calls performed from within the cleanup routine may raise errors
because the file system is still changing underneath.  (E.g. we first
enumerate the contents of a directory and get file X, but when we
attempt to delete file X, it may be gone.)

Deal with this by retrying failing file system calls a few times and
ignoring "expected" errors before giving up.
This commit is contained in:
jmmv 2011-03-30 11:10:56 +00:00
parent 99ae53aa53
commit 7c248362c1

View File

@ -1,7 +1,7 @@
//
// Automated Testing Framework (atf)
//
// Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
// Copyright (c) 2007, 2008, 2009, 2011 The NetBSD Foundation, Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@ -45,6 +45,7 @@ extern "C" {
#include <cstring>
#include "atf-c++/detail/process.hpp"
#include "atf-c++/detail/sanity.hpp"
#include "fs.hpp"
#include "user.hpp"
@ -61,6 +62,19 @@ static void cleanup_aux_dir(const atf::fs::path&, const atf::fs::file_info&,
bool);
static void do_unmount(const atf::fs::path&);
// The cleanup routines below are tricky: they are executed immediately after
// a test case's death, and after we have forcibly killed any stale processes.
// However, even if the processes are dead, this does not mean that the file
// system we are scanning is stable. In particular, if the test case has
// mounted file systems through fuse/puffs, the fact that the processes died
// does not mean that the file system is truly unmounted.
//
// The code below attempts to cope with this by catching errors and either
// ignoring them or retrying the actions on the same file/directory a few times
// before giving up.
static const int max_retries = 5;
static const int retry_delay_in_seconds = 1;
// The erase parameter in this routine is to control nested mount points.
// We want to descend into a mount point to unmount anything that is
// mounted under it, but we do not want to delete any files while doing
@ -70,19 +84,24 @@ static
void
cleanup_aux(const atf::fs::path& p, dev_t parent_device, bool erase)
{
atf::fs::file_info fi(p);
try {
atf::fs::file_info fi(p);
if (fi.get_type() == atf::fs::file_info::dir_type)
cleanup_aux_dir(p, fi, fi.get_device() == parent_device);
if (fi.get_device() != parent_device)
do_unmount(p);
if (erase) {
if (fi.get_type() == atf::fs::file_info::dir_type)
atf::fs::rmdir(p);
else
atf::fs::remove(p);
cleanup_aux_dir(p, fi, fi.get_device() == parent_device);
if (fi.get_device() != parent_device)
do_unmount(p);
if (erase) {
if (fi.get_type() == atf::fs::file_info::dir_type)
atf::fs::rmdir(p);
else
atf::fs::remove(p);
}
} catch (const atf::system_error& e) {
if (e.code() != ENOENT && e.code() != ENOTDIR)
throw e;
}
}
@ -92,15 +111,39 @@ cleanup_aux_dir(const atf::fs::path& p, const atf::fs::file_info& fi,
bool erase)
{
if (erase && ((fi.get_mode() & S_IRWXU) != S_IRWXU)) {
if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1)
throw atf::system_error(IMPL_NAME "::cleanup(" +
p.str() + ")", "chmod(2) failed", errno);
int retries = max_retries;
retry_chmod:
if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) {
if (retries > 0) {
retries--;
::sleep(retry_delay_in_seconds);
goto retry_chmod;
} else {
throw atf::system_error(IMPL_NAME "::cleanup(" +
p.str() + ")", "chmod(2) failed",
errno);
}
}
}
std::set< std::string > subdirs;
{
const atf::fs::directory d(p);
subdirs = d.names();
bool ok = false;
int retries = max_retries;
while (!ok) {
INV(retries > 0);
try {
const atf::fs::directory d(p);
subdirs = d.names();
ok = true;
} catch (const atf::system_error& e) {
if (retries == 0)
throw e;
retries--;
::sleep(retry_delay_in_seconds);
}
}
INV(ok);
}
for (std::set< std::string >::const_iterator iter = subdirs.begin();
@ -122,9 +165,18 @@ do_unmount(const atf::fs::path& in_path)
in_path : in_path.to_absolute();
#if defined(HAVE_UNMOUNT)
if (unmount(abs_path.c_str(), 0) == -1)
throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + ")",
"unmount(2) failed", errno);
int retries = max_retries;
retry_unmount:
if (unmount(abs_path.c_str(), 0) == -1) {
if (errno == EBUSY && retries > 0) {
retries--;
::sleep(retry_delay_in_seconds);
goto retry_unmount;
} else {
throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() +
")", "unmount(2) failed", errno);
}
}
#else
// We could use umount(2) instead if it was available... but
// trying to do so under, e.g. Linux, is a nightmare because we