From 8e0d457a78164e13768e314d707e28823e65f8fc Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 1 Nov 2017 15:05:44 -0400 Subject: [PATCH 1/3] dir.c: make add_excludes aware of fscache during status Teach read_directory_recursive() and add_excludes() to be aware of optional fscache and avoid trying to open() and fstat() non-existant ".gitignore" files in every directory in the worktree. The current code in add_excludes() calls open() and then fstat() for a ".gitignore" file in each directory present in the worktree. Change that when fscache is enabled to call lstat() first and if present, call open(). This seems backwards because both lstat needs to do more work than fstat. But when fscache is enabled, fscache will already know if the .gitignore file exists and can completely avoid the IO calls. This works because of the lstat diversion to mingw_lstat when fscache is enabled. This reduced status times on a 350K file enlistment of the Windows repo on a NVMe SSD by 0.25 seconds. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 5 +++++ compat/win32/fscache.h | 3 +++ dir.c | 33 ++++++++++++++++++++++++--------- git-compat-util.h | 4 ++++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 403441f6a9..330e3d9a11 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -10,6 +10,11 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); +int fscache_is_enabled(void) +{ + return enabled; +} + /* * An entry in the file system cache. Used for both entire directory listings * and file entries. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index ed518b422d..9a21fd5709 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,6 +4,9 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) +int fscache_is_enabled(void); +#define is_fscache_enabled() (fscache_is_enabled()) + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index 6ca2ef5f04..2362df010b 100644 --- a/dir.c +++ b/dir.c @@ -1063,16 +1063,31 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (flags & PATTERN_NOFOLLOW) - fd = open_nofollow(fname, O_RDONLY); - else - fd = open(fname, O_RDONLY); - - if (fd < 0 || fstat(fd, &st) < 0) { - if (fd < 0) - warn_on_fopen_errors(fname); + if (is_fscache_enabled()) { + if (lstat(fname, &st) < 0) { + fd = -1; + } else { + fd = open(fname, O_RDONLY); + if (fd < 0) + warn_on_fopen_errors(fname); + } + } else { + if (flags & PATTERN_NOFOLLOW) + fd = open_nofollow(fname, O_RDONLY); else - close(fd); + fd = open(fname, O_RDONLY); + + if (fd < 0 || fstat(fd, &st) < 0) { + if (fd < 0) + warn_on_fopen_errors(fname); + else { + close(fd); + fd = -1; + } + } + } + + if (fd < 0) { if (!istate) return -1; r = read_skip_worktree_file_from_index(istate, fname, diff --git a/git-compat-util.h b/git-compat-util.h index 18e3cf61d8..7d33f08b53 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1493,6 +1493,10 @@ static inline int is_missing_file_error(int errno_) #define enable_fscache(x) /* noop */ #endif +#ifndef is_fscache_enabled +#define is_fscache_enabled() (0) +#endif + int cmd_main(int, const char **); /* From 13f74123a87cf75439a8747aa94550ffdfcb7363 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 10:43:41 -0500 Subject: [PATCH 2/3] fscache: make fscache_enabled() public Make fscache_enabled() function public rather than static. Remove unneeded fscache_is_enabled() function. Change is_fscache_enabled() macro to call fscache_enabled(). is_fscache_enabled() now takes a pathname so that the answer is more precise and mean "is fscache enabled for this pathname", since fscache only stores repo-relative paths and not absolute paths, we can avoid attempting lookups for absolute paths. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 7 +------ compat/win32/fscache.h | 4 ++-- dir.c | 2 +- git-compat-util.h | 2 +- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 330e3d9a11..303a5d6274 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -10,11 +10,6 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); -int fscache_is_enabled(void) -{ - return enabled; -} - /* * An entry in the file system cache. Used for both entire directory listings * and file entries. @@ -271,7 +266,7 @@ static void fscache_clear(void) /* * Checks if the cache is enabled for the given path. */ -static inline int fscache_enabled(const char *path) +int fscache_enabled(const char *path) { return enabled > 0 && !is_absolute_path(path); } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 9a21fd5709..660ada053b 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,8 +4,8 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) -int fscache_is_enabled(void); -#define is_fscache_enabled() (fscache_is_enabled()) +int fscache_enabled(const char *path); +#define is_fscache_enabled(path) fscache_enabled(path) DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index 2362df010b..31c7e01440 100644 --- a/dir.c +++ b/dir.c @@ -1063,7 +1063,7 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (is_fscache_enabled()) { + if (is_fscache_enabled(fname)) { if (lstat(fname, &st) < 0) { fd = -1; } else { diff --git a/git-compat-util.h b/git-compat-util.h index 7d33f08b53..16b789c5d1 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1494,7 +1494,7 @@ static inline int is_missing_file_error(int errno_) #endif #ifndef is_fscache_enabled -#define is_fscache_enabled() (0) +#define is_fscache_enabled(path) (0) #endif int cmd_main(int, const char **); From 680ab10a7acc515e286f8025fbb461ca52ad2a1c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 11:19:27 -0500 Subject: [PATCH 3/3] dir.c: regression fix for add_excludes with fscache Fix regression described in: https://github.com/git-for-windows/git/issues/1392 which was introduced in: https://github.com/git-for-windows/git/commit/b2353379bba414e6c00dde913497cc9c827366f2 Problem Symptoms ================ When the user has a .gitignore file that is a symlink, the fscache optimization introduced above caused the stat-data from the symlink, rather that of the target file, to be returned. Later when the ignore file was read, the buffer length did not match the stat.st_size field and we called die("cannot use as an exclude file") Optimization Rationale ====================== The above optimization calls lstat() before open() primarily to ask fscache if the file exists. It gets the current stat-data as a side effect essentially for free (since we already have it in memory). If the file does not exist, it does not need to call open(). And since very few directories have .gitignore files, we can greatly reduce time spent in the filesystem. Discussion of Fix ================= The above optimization calls lstat() rather than stat() because the fscache only intercepts lstat() calls. Calls to stat() stay directed to the mingw_stat() completly bypassing fscache. Furthermore, calls to mingw_stat() always call {open, fstat, close} so that symlinks are properly dereferenced, which adds *additional* open/close calls on top of what the original code in dir.c is doing. Since the problem only manifests for symlinks, we add code to overwrite the stat-data when the path is a symlink. This preserves the effect of the performance gains provided by the fscache in the normal case. Signed-off-by: Jeff Hostetler --- dir.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dir.c b/dir.c index 31c7e01440..a7d2cc5b5b 100644 --- a/dir.c +++ b/dir.c @@ -1063,6 +1063,29 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; + /* + * A performance optimization for status. + * + * During a status scan, git looks in each directory for a .gitignore + * file before scanning the directory. Since .gitignore files are not + * that common, we can waste a lot of time looking for files that are + * not there. Fortunately, the fscache already knows if the directory + * contains a .gitignore file, since it has already read the directory + * and it already has the stat-data. + * + * If the fscache is enabled, use the fscache-lstat() interlude to see + * if the file exists (in the fscache hash maps) before trying to open() + * it. + * + * This causes problem when the .gitignore file is a symlink, because + * we call lstat() rather than stat() on the symlnk and the resulting + * stat-data is for the symlink itself rather than the target file. + * We CANNOT use stat() here because the fscache DOES NOT install an + * interlude for stat() and mingw_stat() always calls "open-fstat-close" + * on the file and defeats the purpose of the optimization here. Since + * symlinks are even more rare than .gitignore files, we force a fstat() + * after our open() to get stat-data for the target file. + */ if (is_fscache_enabled(fname)) { if (lstat(fname, &st) < 0) { fd = -1; @@ -1070,6 +1093,11 @@ static int add_patterns(const char *fname, const char *base, int baselen, fd = open(fname, O_RDONLY); if (fd < 0) warn_on_fopen_errors(fname); + else if (S_ISLNK(st.st_mode) && fstat(fd, &st) < 0) { + warn_on_fopen_errors(fname); + close(fd); + fd = -1; + } } } else { if (flags & PATTERN_NOFOLLOW)