libroot_build: Rework path normalization to not rely on system calls.

This path normalization was functionally a recursive lstat(), which
should theoretically be identical to rooting the path in the CWD
and then normalizing the rest of the components after that.

Well, a recursive lstat() is much slower than simple manipulation.
How much slower? Well, on my system, the existing lstat() version
took up a combined total of 63,284,607 us for building haiku.hpkg
(only the package itself, no other components rebuilt), while
this new version uses just 47,901 us -- and this just for a @minimum!

I performed a full @nightly build with both versions in use at once,
with an abort() in place if paths ever did not match, and it
did not fire once. (I even sabotaged the new function just to
ensure that it would actually find differing paths.)

This code was merged in 338b8dc301 (2005),
and has remained largely unchanged since then. I don't know what the
rationale was at the time for using this method instead of this much
simpler version. Perhaps the 3-argument normalize_dir_path was written
first and used more, while this 2-argument version was added later
as a simple shim? But the original commit has no uses of the 3-argument
version aside from the 2-argument one...

Either way, this is an absolutely unbelievable speedup to Haiku builds.
These functions are hit in every I/O operation of all libroot_build
users, and their usages really do add up, as the example above shows.

Fixes #16288.

Change-Id: Ia11f64b0d4106ee62f22741a32ccc0c75c184442
Reviewed-on: https://review.haiku-os.org/c/haiku/+/4427
Reviewed-by: Alex von Gluck IV <kallisti5@unixzen.com>
This commit is contained in:
Augustin Cavalier 2021-09-05 22:19:51 -04:00 committed by waddlesplash
parent 1ea3818ff5
commit 906fe09778

View File

@ -235,6 +235,7 @@ find_dir_entry(DIR *dir, const char *path, NodeRef ref, string &name,
return B_OK;
}
// find_dir_entry
static status_t
find_dir_entry(const char *path, NodeRef ref, string &name, bool skipDot)
@ -253,97 +254,51 @@ find_dir_entry(const char *path, NodeRef ref, string &name, bool skipDot)
}
static bool
guess_normalized_dir_path(string path, NodeRef ref, string& _normalizedPath)
{
// We assume the CWD is normalized and hope that the directory is an
// ancestor of it. We just chop off path components until we find a match or
// hit root.
char cwd[B_PATH_NAME_LENGTH];
if (getcwd(cwd, sizeof(cwd)) == NULL)
return false;
while (cwd[0] == '/') {
struct stat st;
if (stat(cwd, &st) == 0) {
if (st.st_dev == ref.device && st.st_ino == ref.node) {
_normalizedPath = cwd;
return true;
}
}
*strrchr(cwd, '/') = '\0';
}
// TODO: If path is absolute, we could also try to work with that, though
// the other way around -- trying prefixes until we hit a "." or ".."
// component.
return false;
}
// normalize_dir_path
static status_t
normalize_dir_path(string path, NodeRef ref, string &normalizedPath)
{
// get parent path
path += "/..";
// stat the parent dir
struct stat st;
if (lstat(path.c_str(), &st) < 0)
return errno;
// root dir?
NodeRef parentRef(st);
if (parentRef == ref) {
normalizedPath = "/";
return 0;
}
// find the entry
string name;
status_t error = find_dir_entry(path.c_str(), ref, name, true) ;
if (error != B_OK) {
if (error != B_ENTRY_NOT_FOUND) {
// We couldn't open the directory. This might be because we don't
// have read permission. We're OK with not fully normalizing the
// path and try to guess the path in this case. Note: We don't check
// error for B_PERMISSION_DENIED, since opendir() may clobber the
// actual kernel error code with something not helpful.
if (guess_normalized_dir_path(path, ref, normalizedPath))
return B_OK;
}
return error;
}
// recurse to get the parent dir path, if found
error = normalize_dir_path(path, parentRef, normalizedPath);
if (error != 0)
return error;
// construct the normalizedPath
if (normalizedPath.length() > 1) // don't append "/", if parent is root
normalizedPath += '/';
normalizedPath += name;
return 0;
}
// normalize_dir_path
// normalize_dir_path: Make path absolute and remove redundant entries.
static status_t
normalize_dir_path(const char *path, string &normalizedPath)
{
// stat() the dir
struct stat st;
if (stat(path, &st) < 0)
return errno;
const size_t pathLen = strlen(path);
return normalize_dir_path(path, NodeRef(st), normalizedPath);
// Add CWD to relative paths.
if (pathLen == 0 || path[0] != '/') {
char pwd[PATH_MAX];
if (getcwd(pwd, sizeof(pwd)) == NULL)
return B_ERROR;
normalizedPath += pwd;
}
const char *end = &path[pathLen];
const char *next;
for (const char *ptr = path; ptr < end; ptr = next + 1) {
next = (char *)memchr(ptr, '/', end - ptr);
if (next == NULL)
next = end;
size_t len = next - ptr;
if (len == 2 && ptr[0] == '.' && ptr[1] == '.') {
string::size_type pos = normalizedPath.rfind('/');
if (pos != string::npos)
normalizedPath.resize(pos);
continue;
} else if (len == 0 || (len == 1 && ptr[0] == '.')) {
continue;
}
if (normalizedPath.length() != 1)
normalizedPath += '/';
normalizedPath.append(ptr, len);
}
if (normalizedPath.length() == 0)
normalizedPath += '/';
return B_OK;
}
// normalize_entry_path
static status_t
normalize_entry_path(const char *path, string &normalizedPath)
@ -362,7 +317,6 @@ normalize_entry_path(const char *path, string &normalizedPath)
dirPathString = string(path, leafName - path);
dirPath = dirPathString.c_str();
}
} else {
// path contains no slash, so it is a path relative to the current dir
dirPath = ".";