From 5f44a60db4ea394a587e211519e03b678baf430a Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 01/21] Win32: make FILETIME conversion functions public We will use them in the upcoming "FSCache" patches (to accelerate sequential lstat() calls). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw-posix.h | 18 ++++++++++++++++++ compat/mingw.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index c9fae1cf1e..88d8d0e8f8 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -342,6 +342,17 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows, * including our own struct stat with 64 bit st_size and nanosecond-precision @@ -358,6 +369,13 @@ struct timespec { #endif #endif +static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) +{ + long long hnsec = filetime_to_hnsec(ft); + ts->tv_sec = (time_t)(hnsec / 10000000); + ts->tv_nsec = (hnsec % 10000000) * 100; +} + struct mingw_stat { _dev_t st_dev; _ino_t st_ino; diff --git a/compat/mingw.c b/compat/mingw.c index 5bffcd4938..5dd59e2c91 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -883,24 +883,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) -{ - long long hnsec = filetime_to_hnsec(ft); - ts->tv_sec = (time_t)(hnsec / 10000000); - ts->tv_nsec = (hnsec % 10000000) * 100; -} - /** * Verifies that safe_create_leading_directories() would succeed. */ From 4e349eb83fc3b5b5d95c0ca52405808fcc45cbae Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 02/21] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4..2603a0fa39 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From 9451c6b76b26a11ee3d9c09a0b856ee80fc10b4a Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 03/21] mingw: make the dirent implementation pluggable Emulating the POSIX `dirent` API on Windows via `FindFirstFile()`/`FindNextFile()` is pretty staightforward, however, most of the information provided in the `WIN32_FIND_DATA` structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to `lstat()`. Make the `dirent` implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to `readdir()`/`closedir()` that match the `opendir()` implementation (similar to vtable pointers in Object-Oriented Programming). Define `readdir()`/`closedir()` so that they call the function pointers in the `DIR` structure. This allows to choose the `opendir()` implementation on a call-by-call basis. Make the fixed-size `dirent.d_name` buffer a flex array, as `d_name` may be implementation specific (e.g. a caching implementation may allocate a `struct dirent` with _just_ the size needed to hold the `d_name` in question). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/dirent.c | 30 +++++++++++++++++++----------- compat/win32/dirent.h | 28 +++++++++++++++++++++------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39..139d2ba3c4 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,21 @@ #include "../../git-compat-util.h" -struct DIR { - struct dirent dd_dir; /* includes d_type */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + struct dirent dd_dir; /* includes d_type */ +} dirent_DIR; +#pragma GCC diagnostic pop + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +24,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +51,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +63,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +90,11 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR) + MAX_PATH); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..a58a8075fd 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,34 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char d_name[/* FLEX_ARRAY */]; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +#define opendir git_opendir + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From d1f5e7c1f3115ad3931c1bdce961833ed5654891 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 04/21] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw-posix.h | 2 +- compat/mingw.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index 88d8d0e8f8..d868451ccc 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -408,7 +408,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); int mingw_utime(const char *file_name, const struct utimbuf *times); diff --git a/compat/mingw.c b/compat/mingw.c index 5dd59e2c91..2e085d3dc5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1022,6 +1022,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) { BY_HANDLE_FILE_INFORMATION fdata; From 63df1b22f572e15b722c03dd867c40d8d642a9b6 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 05/21] mingw: add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config/core.adoc | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 3 +++ 6 files changed, 33 insertions(+) diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index 3fbe83eef1..c49f13a0d5 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -690,6 +690,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other process. diff --git a/builtin/commit.c b/builtin/commit.c index fba0dded64..3666f4c822 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1602,6 +1602,7 @@ struct repository *repo UNUSED) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; diff --git a/compat/mingw.c b/compat/mingw.c index 2e085d3dc5..89bf071462 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -248,6 +248,7 @@ enum hide_dotfiles_type { static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx UNUSED, @@ -261,6 +262,11 @@ int mingw_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { if (!value) return config_error_nonbool(var); diff --git a/compat/mingw.h b/compat/mingw.h index 6ea53ee0d2..65df57d2a7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -1,5 +1,7 @@ #include "mingw-posix.h" +extern int core_fscache; + struct config_context; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx, void *cb); diff --git a/git-compat-util.h b/git-compat-util.h index 285d4b8aaa..04aad2acd7 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1046,6 +1046,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index 40ab2abafb..f0d0d8413c 100644 --- a/preload-index.c +++ b/preload-index.c @@ -138,6 +138,7 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -173,6 +174,8 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); + + enable_fscache(0); } int repo_read_index_preload(struct repository *repo, From 1238254c5248c54840b5d268e5659cbc78f7979f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 06/21] mingw: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow `lstat()` emulation (git calls `lstat()` once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an `lstat()` implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent `lstat()` calls for the same directory are served directly from the cache. Also implement `opendir()`/`readdir()`/`closedir()` so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 473 ++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- contrib/buildsystems/CMakeLists.txt | 3 +- git-compat-util.h | 2 + meson.build | 1 + 6 files changed, 490 insertions(+), 3 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..dc765ddd57 --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,473 @@ +#include "../../git-compat-util.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" +#include "../../dir.h" +#include "../../abspath.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; + } s; + } u; + + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + struct dirent dirent; +}; +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wflexible-array-extensions" +#endif +struct heap_fsentry { + union { + struct fsentry ent; + char dummy[sizeof(struct fsentry) + MAX_PATH]; + } u; +}; +#pragma GCC diagnostic pop + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *cmp_data UNUSED, + const struct fsentry *fse1, const struct fsentry *fse2, + void *keydata UNUSED) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return fspathncmp(fse1->dirent.d_name, fse2->dirent.d_name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->dirent.d_name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + if (len > MAX_PATH) + BUG("Trying to allocate fsentry for long path '%.*s'", + (int)len, name); + memcpy(fse->dirent.d_name, name, len); + fse->dirent.d_name[len] = 0; + fse->len = len; + hashmap_entry_init(&fse->ent, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + /* init the rest of the structure */ + fsentry_init(fse, list, name, len); + fse->next = NULL; + fse->u.refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->u.refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->u.refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; + fse->u.s.st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->u.s.st_atim)); + filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->u.s.st_mtim)); + filetime_to_timespec(&(fdata->ftCreationTime), &(fse->u.s.st_ctim)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, + dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, &fse->ent); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_clear_and_free(&map, struct fsentry, ent); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get_entry(&map, key->list, ent, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get_entry(&map, key, ent, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; +#pragma GCC diagnostic push +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wflexible-array-extensions" +#endif + struct heap_fsentry key[2]; +#pragma GCC diagnostic pop + struct fsentry *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(&key[0].u.ent, NULL, filename, dirlen); + fsentry_init(&key[1].u.ent, &key[0].u.ent, filename + base, len - base); + fse = fscache_get(&key[1].u.ent); + if (!fse) { + errno = ENOENT; + return -1; + } + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->u.s.st_size; + st->st_atim = fse->u.s.st_atim; + st->st_mtim = fse->u.s.st_mtim; + st->st_ctim = fse->u.s.st_ctim; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent *dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent = &next->dirent; + return dir->dirent; +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct heap_fsentry key; + struct fsentry *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key.u.ent, NULL, dirname, len); + list = fscache_get(&key.u.ent); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index fb536af8a6..d125a4a551 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -509,7 +509,7 @@ endif compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DENSURE_MSYSTEM_IS_SET -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO # invalidcontinue.obj allows Git's source code to close the same file @@ -712,7 +712,7 @@ ifeq ($(uname_S),MINGW) compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DWIN32 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 826c1b4f3b..a67b41b3cc 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -294,7 +294,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") compat/win32/trace2_win32_process_info.c compat/win32/dirent.c compat/nedmalloc/nedmalloc.c - compat/strdup.c) + compat/strdup.c + compat/win32/fscache.c) set(NO_UNIX_SOCKETS 1) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/git-compat-util.h b/git-compat-util.h index 04aad2acd7..ebcffb806e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -191,9 +191,11 @@ static inline int is_xplatform_dir_sep(int c) /* pull in Windows compatibility stuff */ #include "compat/win32/path-utils.h" #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/win32/path-utils.h" #include "compat/msvc.h" +#include "compat/win32/fscache.h" #endif /* used on Mac OS X */ diff --git a/meson.build b/meson.build index d93524bb4f..79a6085821 100644 --- a/meson.build +++ b/meson.build @@ -1214,6 +1214,7 @@ elif host_machine.system() == 'windows' 'compat/winansi.c', 'compat/win32/dirent.c', 'compat/win32/flush.c', + 'compat/win32/fscache.c', 'compat/win32/path-utils.c', 'compat/win32/pthread.c', 'compat/win32/syslog.c', From 43199376c086dfefc12dbdfdb4e7aed57099ee99 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 07/21] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index dc765ddd57..ff2479c738 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -27,6 +27,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -266,16 +268,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->u.refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->u.hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->u.hwait, INFINITE); + CloseHandle(key->u.hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get_entry(&map, key, ent, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get_entry(&map, key, ent, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -283,7 +312,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get_entry(&map, key->list, ent, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -292,16 +321,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->u.refcnt = 0; + hashmap_add(&map, &future->ent); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get_entry(&map, key, ent, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, &future->ent, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->u.hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list) From 666f0f36dbf7bc19d4604e97b11aaa40f4b0fc2a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 24 Jan 2017 15:12:13 -0500 Subject: [PATCH 08/21] fscache: add key for GIT_TRACE_FSCACHE Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index ff2479c738..d67dc918d6 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -4,11 +4,13 @@ #include "fscache.h" #include "../../dir.h" #include "../../abspath.h" +#include "../../trace.h" static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* * An entry in the file system cache. Used for both entire directory listings @@ -212,6 +214,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", + errno, dir->dirent.d_name); return NULL; } @@ -397,6 +401,7 @@ int fscache_enable(int enable) fscache_clear(); LeaveCriticalSection(&mutex); } + trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); return result; } From 25891d57d642bce2e5a54299826df5b89488c54e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 13 Dec 2016 14:05:32 -0500 Subject: [PATCH 09/21] fscache: remember not-found directories Teach FSCACHE to remember "not found" directories. This is a performance optimization. FSCACHE is a performance optimization available for Windows. It intercepts Posix-style lstat() calls into an in-memory directory using FindFirst/FindNext. It improves performance on Windows by catching the first lstat() call in a directory, using FindFirst/ FindNext to read the list of files (and attribute data) for the entire directory into the cache, and short-cut subsequent lstat() calls in the same directory. This gives a major performance boost on Windows. However, it does not remember "not found" directories. When STATUS runs and there are missing directories, the lstat() interception fails to find the parent directory and simply return ENOENT for the file -- it does not remember that the FindFirst on the directory failed. Thus subsequent lstat() calls in the same directory, each re-attempt the FindFirst. This completely defeats any performance gains. This can be seen by doing a sparse-checkout on a large repo and then doing a read-tree to reset the skip-worktree bits and then running status. This change reduced status times for my very large repo by 60%. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index d67dc918d6..7aa3450e7e 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -186,7 +186,8 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir) +static struct fsentry *fsentry_create_list(const struct fsentry *dir, + int *dir_not_found) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; @@ -195,6 +196,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) struct fsentry *list, **phead; DWORD err; + *dir_not_found = 0; + /* convert name to UTF-16 and check length < MAX_PATH */ if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, dir->len)) < 0) { @@ -213,6 +216,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); + *dir_not_found = 1; /* or empty directory */ errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", errno, dir->dirent.d_name); @@ -221,6 +225,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) /* allocate object to hold directory listing */ list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + list->st_mode = S_IFDIR; + list->dirent.d_type = DT_DIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; @@ -305,12 +311,16 @@ static struct fsentry *fscache_get_wait(struct fsentry *key) static struct fsentry *fscache_get(struct fsentry *key) { struct fsentry *fse, *future, *waiter; + int dir_not_found; EnterCriticalSection(&mutex); /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { - fsentry_addref(fse); + if (fse->st_mode) + fsentry_addref(fse); + else + fse = NULL; /* non-existing directory */ LeaveCriticalSection(&mutex); return fse; } @@ -319,7 +329,10 @@ static struct fsentry *fscache_get(struct fsentry *key) fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); - /* dir entry without file entry -> file doesn't exist */ + /* + * dir entry without file entry, or dir does not + * exist -> file doesn't exist + */ errno = ENOENT; return NULL; } @@ -333,7 +346,7 @@ static struct fsentry *fscache_get(struct fsentry *key) /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future); + fse = fsentry_create_list(future, &dir_not_found); EnterCriticalSection(&mutex); /* remove future entry and signal waiting threads */ @@ -347,6 +360,18 @@ static struct fsentry *fscache_get(struct fsentry *key) /* leave on error (errno set by fsentry_create_list) */ if (!fse) { + if (dir_not_found && key->list) { + /* + * Record that the directory does not exist (or is + * empty, which for all practical matters is the same + * thing as far as fscache is concerned). + */ + fse = fsentry_alloc(key->list->list, + key->list->dirent.d_name, + key->list->len); + fse->st_mode = 0; + hashmap_add(&map, &fse->ent); + } LeaveCriticalSection(&mutex); return NULL; } @@ -358,6 +383,9 @@ static struct fsentry *fscache_get(struct fsentry *key) if (key->list) fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse && !fse->st_mode) + fse = NULL; /* non-existing directory */ + /* return entry or ENOENT */ if (fse) fsentry_addref(fse); From 8b5acc4b1979b530e4bd1a75eeb365fbba7c72fb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 25 Jan 2017 18:39:16 +0100 Subject: [PATCH 10/21] fscache: add a test for the dir-not-found optimization Signed-off-by: Johannes Schindelin --- t/t1090-sparse-checkout-scope.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 3a14218b24..529844e286 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,4 +106,24 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success MINGW 'no unnecessary opendir() with fscache' ' + git clone . fscache-test && + ( + cd fscache-test && + git config core.fscache 1 && + echo "/excluded/*" >.git/info/sparse-checkout && + for f in $(test_seq 10) + do + sha1=$(echo $f | git hash-object -w --stdin) && + git update-index --add \ + --cacheinfo 100644,$sha1,excluded/$f || exit 1 + done && + test_tick && + git commit -m excluded && + GIT_TRACE_FSCACHE=1 git status >out 2>err && + grep excluded err >grep.out && + test_line_count = 1 grep.out + ) +' + test_done From 21ad54f9d82c54eb3c0d5f5fd837e468327ad32a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 22 Nov 2016 11:26:38 -0500 Subject: [PATCH 11/21] add: use preload-index and fscache for performance Teach "add" to use preload-index and fscache features to improve performance on very large repositories. During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. This calls lstat(). On Windows, the fscache code intercepts the lstat() calls and builds a private cache using the FindFirst/FindNext routines, which are much faster. Somewhat independent of this, is the preload-index code which distributes some of the start-up costs across multiple threads. We need to keep the call to read_cache() before parsing the pathspecs (and hence cannot use the pathspecs to limit any preload) because parse_pathspec() is using the index to determine whether a pathspec is, in fact, in a submodule. If we would not read the index first, parse_pathspec() would not error out on a path that is inside a submodule, and t7400-submodule-basic.sh would fail with not ok 47 - do not add files from a submodule We still want the nice preload performance boost, though, so we simply call read_cache_preload(&pathspecs) after parsing the pathspecs. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- builtin/add.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/add.c b/builtin/add.c index 7c292ffdc6..c04be23223 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -477,6 +477,10 @@ int cmd_add(int argc, die_in_unpopulated_submodule(repo->index, prefix); die_path_inside_submodule(repo->index, &pathspec); + enable_fscache(1); + /* We do not really re-read the index but update the up-to-date flags */ + preload_index(repo->index, &pathspec, 0); + if (add_new_files) { int baselen; @@ -589,5 +593,6 @@ finish: free(ps_matched); dir_clear(&dir); clear_pathspec(&pathspec); + enable_fscache(0); return exit_status; } From 1b7d89aa1ff040c4ccbcf1fcae17314066e45a87 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 1 Nov 2017 15:05:44 -0400 Subject: [PATCH 12/21] dir.c: make add_excludes aware of fscache during status Teach read_directory_recursive() and add_excludes() to be aware of optional fscache and avoid trying to open() and fstat() non-existant ".gitignore" files in every directory in the worktree. The current code in add_excludes() calls open() and then fstat() for a ".gitignore" file in each directory present in the worktree. Change that when fscache is enabled to call lstat() first and if present, call open(). This seems backwards because both lstat needs to do more work than fstat. But when fscache is enabled, fscache will already know if the .gitignore file exists and can completely avoid the IO calls. This works because of the lstat diversion to mingw_lstat when fscache is enabled. This reduced status times on a 350K file enlistment of the Windows repo on a NVMe SSD by 0.25 seconds. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 5 +++++ compat/win32/fscache.h | 3 +++ dir.c | 39 ++++++++++++++++++++++++++++++--------- git-compat-util.h | 4 ++++ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 7aa3450e7e..edec8f5813 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -12,6 +12,11 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); +int fscache_is_enabled(void) +{ + return enabled; +} + /* * An entry in the file system cache. Used for both entire directory listings * and file entries. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index ed518b422d..9a21fd5709 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,6 +4,9 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) +int fscache_is_enabled(void); +#define is_fscache_enabled() (fscache_is_enabled()) + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index a374972b62..ea54dd1504 100644 --- a/dir.c +++ b/dir.c @@ -1115,16 +1115,37 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (flags & PATTERN_NOFOLLOW) - fd = open_nofollow(fname, O_RDONLY); - else - fd = open(fname, O_RDONLY); - - if (fd < 0 || fstat(fd, &st) < 0) { - if (fd < 0) - warn_on_fopen_errors(fname); + /* + * Since `clang`'s `-Wunreachable-code` mode is clever, it would figure + * out that on non-Windows platforms, this `lstat()` is unreachable. + * We do want to keep the conditional block for the sake of Windows, + * though, so let's use the `NOT_CONSTANT()` trick to suppress that error. + */ + if (NOT_CONSTANT(is_fscache_enabled(fname))) { + if (lstat(fname, &st) < 0) { + fd = -1; + } else { + fd = open(fname, O_RDONLY); + if (fd < 0) + warn_on_fopen_errors(fname); + } + } else { + if (flags & PATTERN_NOFOLLOW) + fd = open_nofollow(fname, O_RDONLY); else - close(fd); + fd = open(fname, O_RDONLY); + + if (fd < 0 || fstat(fd, &st) < 0) { + if (fd < 0) + warn_on_fopen_errors(fname); + else { + close(fd); + fd = -1; + } + } + } + + if (fd < 0) { if (!istate) return -1; r = read_skip_worktree_file_from_index(istate, fname, diff --git a/git-compat-util.h b/git-compat-util.h index ebcffb806e..876a772516 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1063,6 +1063,10 @@ static inline int is_missing_file_error(int errno_) #define enable_fscache(x) /* noop */ #endif +#ifndef is_fscache_enabled +#define is_fscache_enabled() (0) +#endif + int cmd_main(int, const char **); /* From 382c1cae577108b6ba545d9e02c82cd227b332ec Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 10:43:41 -0500 Subject: [PATCH 13/21] fscache: make fscache_enabled() public Make fscache_enabled() function public rather than static. Remove unneeded fscache_is_enabled() function. Change is_fscache_enabled() macro to call fscache_enabled(). is_fscache_enabled() now takes a pathname so that the answer is more precise and mean "is fscache enabled for this pathname", since fscache only stores repo-relative paths and not absolute paths, we can avoid attempting lookups for absolute paths. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 7 +------ compat/win32/fscache.h | 4 ++-- git-compat-util.h | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index edec8f5813..6e44df0a2d 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -12,11 +12,6 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); -int fscache_is_enabled(void) -{ - return enabled; -} - /* * An entry in the file system cache. Used for both entire directory listings * and file entries. @@ -278,7 +273,7 @@ static void fscache_clear(void) /* * Checks if the cache is enabled for the given path. */ -static inline int fscache_enabled(const char *path) +int fscache_enabled(const char *path) { return enabled > 0 && !is_absolute_path(path); } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 9a21fd5709..660ada053b 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,8 +4,8 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) -int fscache_is_enabled(void); -#define is_fscache_enabled() (fscache_is_enabled()) +int fscache_enabled(const char *path); +#define is_fscache_enabled(path) fscache_enabled(path) DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/git-compat-util.h b/git-compat-util.h index 876a772516..d6000d90a5 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1064,7 +1064,7 @@ static inline int is_missing_file_error(int errno_) #endif #ifndef is_fscache_enabled -#define is_fscache_enabled() (0) +#define is_fscache_enabled(path) (0) #endif int cmd_main(int, const char **); From b6201999f811ac6051eaa30b4e7c5d2a1ef256e8 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 11:19:27 -0500 Subject: [PATCH 14/21] dir.c: regression fix for add_excludes with fscache Fix regression described in: https://github.com/git-for-windows/git/issues/1392 which was introduced in: https://github.com/git-for-windows/git/commit/b2353379bba414e6c00dde913497cc9c827366f2 Problem Symptoms ================ When the user has a .gitignore file that is a symlink, the fscache optimization introduced above caused the stat-data from the symlink, rather that of the target file, to be returned. Later when the ignore file was read, the buffer length did not match the stat.st_size field and we called die("cannot use as an exclude file") Optimization Rationale ====================== The above optimization calls lstat() before open() primarily to ask fscache if the file exists. It gets the current stat-data as a side effect essentially for free (since we already have it in memory). If the file does not exist, it does not need to call open(). And since very few directories have .gitignore files, we can greatly reduce time spent in the filesystem. Discussion of Fix ================= The above optimization calls lstat() rather than stat() because the fscache only intercepts lstat() calls. Calls to stat() stay directed to the mingw_stat() completly bypassing fscache. Furthermore, calls to mingw_stat() always call {open, fstat, close} so that symlinks are properly dereferenced, which adds *additional* open/close calls on top of what the original code in dir.c is doing. Since the problem only manifests for symlinks, we add code to overwrite the stat-data when the path is a symlink. This preserves the effect of the performance gains provided by the fscache in the normal case. Signed-off-by: Jeff Hostetler --- dir.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dir.c b/dir.c index ea54dd1504..6a0e11de68 100644 --- a/dir.c +++ b/dir.c @@ -1116,6 +1116,28 @@ static int add_patterns(const char *fname, const char *base, int baselen, char *buf; /* + * A performance optimization for status. + * + * During a status scan, git looks in each directory for a .gitignore + * file before scanning the directory. Since .gitignore files are not + * that common, we can waste a lot of time looking for files that are + * not there. Fortunately, the fscache already knows if the directory + * contains a .gitignore file, since it has already read the directory + * and it already has the stat-data. + * + * If the fscache is enabled, use the fscache-lstat() interlude to see + * if the file exists (in the fscache hash maps) before trying to open() + * it. + * + * This causes problem when the .gitignore file is a symlink, because + * we call lstat() rather than stat() on the symlnk and the resulting + * stat-data is for the symlink itself rather than the target file. + * We CANNOT use stat() here because the fscache DOES NOT install an + * interlude for stat() and mingw_stat() always calls "open-fstat-close" + * on the file and defeats the purpose of the optimization here. Since + * symlinks are even more rare than .gitignore files, we force a fstat() + * after our open() to get stat-data for the target file. + * * Since `clang`'s `-Wunreachable-code` mode is clever, it would figure * out that on non-Windows platforms, this `lstat()` is unreachable. * We do want to keep the conditional block for the sake of Windows, @@ -1128,6 +1150,11 @@ static int add_patterns(const char *fname, const char *base, int baselen, fd = open(fname, O_RDONLY); if (fd < 0) warn_on_fopen_errors(fname); + else if (S_ISLNK(st.st_mode) && fstat(fd, &st) < 0) { + warn_on_fopen_errors(fname); + close(fd); + fd = -1; + } } } else { if (flags & PATTERN_NOFOLLOW) From 20c069d066b4453df18b0e2e9e30339320f43d13 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Wed, 22 Nov 2017 20:39:38 +0900 Subject: [PATCH 15/21] fetch-pack.c: enable fscache for stats under .git/objects When I do git fetch, git call file stats under .git/objects for each refs. This takes time when there are many refs. By enabling fscache, git takes file stats by directory traversing and that improved the speed of fetch-pack for repository having large number of refs. In my windows workstation, this improves the time of `git fetch` for chromium repository like below. I took stats 3 times. * With this patch TotalSeconds: 9.9825165 TotalSeconds: 9.1862075 TotalSeconds: 10.1956256 Avg: 9.78811653333333 * Without this patch TotalSeconds: 15.8406702 TotalSeconds: 15.6248053 TotalSeconds: 15.2085938 Avg: 15.5580231 Signed-off-by: Takuto Ikuta Signed-off-by: Johannes Schindelin --- fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index fa4231fee7..449c9bb4ff 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -762,6 +762,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); + enable_fscache(1); for (ref = *refs; ref; ref = ref->next) { struct commit *commit; @@ -786,6 +787,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, if (!cutoff || cutoff < commit->date) cutoff = commit->date; } + enable_fscache(0); trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); /* From bbc8a771d1524a3775b08c7c69662fecfa2c4c4c Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Tue, 30 Jan 2018 22:42:58 +0900 Subject: [PATCH 16/21] checkout.c: enable fscache for checkout again This is retry of #1419. I added flush_fscache macro to flush cached stats after disk writing with tests for regression reported in #1438 and #1442. git checkout checks each file path in sorted order, so cache flushing does not make performance worse unless we have large number of modified files in a directory containing many files. Using chromium repository, I tested `git checkout .` performance when I delete 10 files in different directories. With this patch: TotalSeconds: 4.307272 TotalSeconds: 4.4863595 TotalSeconds: 4.2975562 Avg: 4.36372923333333 Without this patch: TotalSeconds: 20.9705431 TotalSeconds: 22.4867685 TotalSeconds: 18.8968292 Avg: 20.7847136 I confirmed this patch passed all tests in t/ with core_fscache=1. Signed-off-by: Takuto Ikuta --- builtin/checkout.c | 2 ++ compat/win32/fscache.c | 12 ++++++++++++ compat/win32/fscache.h | 3 +++ entry.c | 3 +++ git-compat-util.h | 4 ++++ parallel-checkout.c | 1 + t/t7201-co.sh | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 61 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index d185982f3a..f779c3e01d 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -408,6 +408,7 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); + enable_fscache(1); for (pos = 0; pos < the_repository->index->cache_nr; pos++) { struct cache_entry *ce = the_repository->index->cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -433,6 +434,7 @@ static int checkout_worktree(const struct checkout_opts *opts, errs |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); mem_pool_discard(&ce_mem_pool, should_validate_cache_entries()); + enable_fscache(0); remove_marked_cache_entries(the_repository->index, 1); remove_scheduled_dirs(); errs |= finish_delayed_checkout(&state, opts->show_progress); diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 6e44df0a2d..b6de459c4d 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -433,6 +433,18 @@ int fscache_enable(int enable) return result; } +/* + * Flush cached stats result when fscache is enabled. + */ +void fscache_flush(void) +{ + if (enabled) { + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } +} + /* * Lstat replacement, uses the cache if enabled, otherwise redirects to * mingw_lstat. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 660ada053b..2f06f8df97 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -7,6 +7,9 @@ int fscache_enable(int enable); int fscache_enabled(const char *path); #define is_fscache_enabled(path) fscache_enabled(path) +void fscache_flush(void); +#define flush_fscache() fscache_flush() + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/entry.c b/entry.c index f36ec5ad24..640d5de5d2 100644 --- a/entry.c +++ b/entry.c @@ -411,6 +411,9 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca } finish: + /* Flush cached lstat in fscache after writing to disk. */ + flush_fscache(); + if (state->refresh_cache) { if (!fstat_done && lstat(ce->name, &st) < 0) return error_errno("unable to stat just-written file %s", diff --git a/git-compat-util.h b/git-compat-util.h index d6000d90a5..76be6a4ac8 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1067,6 +1067,10 @@ static inline int is_missing_file_error(int errno_) #define is_fscache_enabled(path) (0) #endif +#ifndef flush_fscache +#define flush_fscache() /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/parallel-checkout.c b/parallel-checkout.c index 57c2dcaa8f..5e17f30e89 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -639,6 +639,7 @@ static void write_items_sequentially(struct checkout *state) { size_t i; + flush_fscache(); for (i = 0; i < parallel_checkout.nr; i++) { struct parallel_checkout_item *pc_item = ¶llel_checkout.items[i]; write_pc_item(pc_item, state); diff --git a/t/t7201-co.sh b/t/t7201-co.sh index 9bcf7c0b40..545f388c44 100755 --- a/t/t7201-co.sh +++ b/t/t7201-co.sh @@ -35,6 +35,42 @@ fill () { } +test_expect_success MINGW 'fscache flush cache' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + echo B >> test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + +test_expect_success MINGW 'fscache flush cache dir' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + rm test.txt && + mkdir test.txt && + touch test.txt/test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + test_expect_success setup ' fill x y z >same && fill 1 2 3 4 5 6 7 8 >one && From 683541b1ddaafddb9c95f56dd6e88b6008300883 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Fri, 7 Sep 2018 11:39:57 -0400 Subject: [PATCH 17/21] Enable the filesystem cache (fscache) in refresh_index(). On file systems that support it, this can dramatically speed up operations like add, commit, describe, rebase, reset, rm that would otherwise have to lstat() every file to "re-match" the stat information in the index to that of the file system. On a synthetic repo with 1M files, "git reset" dropped from 52.02 seconds to 14.42 seconds for a savings of 72%. Signed-off-by: Ben Peart --- read-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/read-cache.c b/read-cache.c index c0bb760ad4..d161035d8b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1504,6 +1504,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n"; added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n"; unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n"; + enable_fscache(1); /* * Use the multi-threaded preload_index() to refresh most of the * cache entries quickly then in the single threaded loop below, @@ -1598,6 +1599,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, display_progress(progress, istate->cache_nr); stop_progress(&progress); trace_performance_leave("refresh index"); + enable_fscache(0); return has_errors; } From 96870d1b25c6c1fcf608736f18b4c5ba2226b59e Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 23 Oct 2018 11:42:06 -0400 Subject: [PATCH 18/21] fscache: use FindFirstFileExW to avoid retrieving the short name Use FindFirstFileExW with FindExInfoBasic to avoid forcing NTFS to look up the short name. Also switch to a larger (64K vs 4K) buffer using FIND_FIRST_EX_LARGE_FETCH to minimize round trips to the kernel. In a repo with ~200K files, this drops warm cache status times from 3.19 seconds to 2.67 seconds for a 16% savings. Signed-off-by: Ben Peart --- compat/win32/fscache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index b6de459c4d..c6ab9f1a2c 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -213,7 +213,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, pattern[wlen] = 0; /* open find handle */ - h = FindFirstFileW(pattern, &fdata); + h = FindFirstFileExW(pattern, FindExInfoBasic, &fdata, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); *dir_not_found = 1; /* or empty directory */ From 0404729430fc51fc2ceb45b7c85ec11c5c3a730d Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Thu, 4 Oct 2018 18:10:21 -0400 Subject: [PATCH 19/21] fscache: add GIT_TEST_FSCACHE support Add support to fscache to enable running the entire test suite with the fscache enabled. Signed-off-by: Ben Peart --- compat/win32/fscache.c | 5 +++++ t/README | 3 +++ 2 files changed, 8 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index c6ab9f1a2c..13b3810473 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -5,6 +5,7 @@ #include "../../dir.h" #include "../../abspath.h" #include "../../trace.h" +#include "config.h" static int initialized; static volatile long enabled; @@ -406,7 +407,11 @@ int fscache_enable(int enable) int result; if (!initialized) { + int fscache = git_env_bool("GIT_TEST_FSCACHE", -1); + /* allow the cache to be disabled entirely */ + if (fscache != -1) + core_fscache = fscache; if (!core_fscache) return 0; diff --git a/t/README b/t/README index adbbd9acf4..f194681514 100644 --- a/t/README +++ b/t/README @@ -479,6 +479,9 @@ GIT_TEST_NAME_HASH_VERSION=, when set, causes 'git pack-objects' to assume '--name-hash-version='. +GIT_TEST_FSCACHE= exercises the uncommon fscache code path +which adds a cache below mingw's lstat and dirent implementations. + Naming Tests ------------ From 8202d91d79c944bd027f37402dff6c3fc357db29 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 25 Sep 2018 16:28:16 -0400 Subject: [PATCH 20/21] fscache: add fscache hit statistics Track fscache hits and misses for lstat and opendir requests. Reporting of statistics is done when the cache is disabled for the last time and freed and is only reported if GIT_TRACE_FSCACHE is set. Sample output is: 11:33:11.836428 compat/win32/fscache.c:433 fscache: lstat 3775, opendir 263, total requests/misses 4052/269 Signed-off-by: Ben Peart --- compat/win32/fscache.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 13b3810473..cf8ed5c635 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -11,6 +11,10 @@ static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static unsigned int lstat_requests; +static unsigned int opendir_requests; +static unsigned int fscache_requests; +static unsigned int fscache_misses; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* @@ -270,6 +274,8 @@ static void fscache_clear(void) { hashmap_clear_and_free(&map, struct fsentry, ent); hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; } /* @@ -316,6 +322,7 @@ static struct fsentry *fscache_get(struct fsentry *key) int dir_not_found; EnterCriticalSection(&mutex); + fscache_requests++; /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { @@ -379,6 +386,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* add directory listing to the cache */ + fscache_misses++; fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ @@ -416,6 +424,8 @@ int fscache_enable(int enable) return 0; InitializeCriticalSection(&mutex); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); initialized = 1; } @@ -432,6 +442,10 @@ int fscache_enable(int enable) opendir = dirent_opendir; lstat = mingw_lstat; EnterCriticalSection(&mutex); + trace_printf_key(&trace_fscache, "fscache: lstat %u, opendir %u, " + "total requests/misses %u/%u\n", + lstat_requests, opendir_requests, + fscache_requests, fscache_misses); fscache_clear(); LeaveCriticalSection(&mutex); } @@ -469,6 +483,7 @@ int fscache_lstat(const char *filename, struct stat *st) if (!fscache_enabled(filename)) return mingw_lstat(filename, st); + lstat_requests++; /* split filename into path + name */ len = strlen(filename); if (len && is_dir_sep(filename[len - 1])) @@ -550,6 +565,7 @@ DIR *fscache_opendir(const char *dirname) if (!fscache_enabled(dirname)) return dirent_opendir(dirname); + opendir_requests++; /* prepare name (strip trailing '/', replace '.') */ len = strlen(dirname); if ((len == 1 && dirname[0] == '.') || From 485f3689571e6077da5cd63f1ac45723bec15c45 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 00:58:49 +0000 Subject: [PATCH 21/21] unpack-trees: enable fscache for sparse-checkout When updating the skip-worktree bits in the index to align with new values in a sparse-checkout file, Git scans the entire working directory with lstat() calls. In a sparse-checkout, many of these lstat() calls are for paths that do not exist. Enable the fscache feature during this scan. Since enable_fscache() calls nest, the disable_fscache() method decrements a counter and would only clear the cache if that counter reaches zero. In a local test of a repo with ~2.2 million paths, updating the index with git read-tree -m -u HEAD with a sparse-checkout file containing only /.gitattributes improved from 2-3 minutes to ~6 seconds. Signed-off-by: Derrick Stolee Signed-off-by: Johannes Schindelin --- unpack-trees.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index 471837f032..8c474636bf 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1823,7 +1823,9 @@ static void mark_new_skip_worktree(struct pattern_list *pl, * 2. Widen worktree according to sparse-checkout file. * Matched entries will have skip_wt_flag cleared (i.e. "in") */ + enable_fscache(istate->cache_nr); clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress); + disable_fscache(); } static void populate_from_existing_patterns(struct unpack_trees_options *o,