From 86c0c24ad4304ffbcd1d9276ec62a576b397a6de Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 01/21] Win32: make FILETIME conversion functions public We will use them in the upcoming "FSCache" patches (to accelerate sequential lstat() calls). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw-posix.h | 18 ++++++++++++++++++ compat/mingw.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index da934834a1..2362ce6897 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -338,6 +338,17 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows, * including our own struct stat with 64 bit st_size and nanosecond-precision @@ -354,6 +365,13 @@ struct timespec { #endif #endif +static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) +{ + long long hnsec = filetime_to_hnsec(ft); + ts->tv_sec = (time_t)(hnsec / 10000000); + ts->tv_nsec = (hnsec % 10000000) * 100; +} + struct mingw_stat { _dev_t st_dev; _ino_t st_ino; diff --git a/compat/mingw.c b/compat/mingw.c index 98fc8ff074..2718d5fa5d 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1058,24 +1058,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) -{ - long long hnsec = filetime_to_hnsec(ft); - ts->tv_sec = (time_t)(hnsec / 10000000); - ts->tv_nsec = (hnsec % 10000000) * 100; -} - /** * Verifies that safe_create_leading_directories() would succeed. */ From 9ebeafd91b09502203c2ba684de91d6fed76b02b Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 02/21] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 24ee9b814d..2f7d64369f 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -21,40 +21,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -93,3 +59,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From adb55cab7ef456b000d34928e9e0881cd4264e58 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 03/21] mingw: make the dirent implementation pluggable Emulating the POSIX `dirent` API on Windows via `FindFirstFile()`/`FindNextFile()` is pretty staightforward, however, most of the information provided in the `WIN32_FIND_DATA` structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to `lstat()`. Make the `dirent` implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to `readdir()`/`closedir()` that match the `opendir()` implementation (similar to vtable pointers in Object-Oriented Programming). Define `readdir()`/`closedir()` so that they call the function pointers in the `DIR` structure. This allows to choose the `opendir()` implementation on a call-by-call basis. Make the fixed-size `dirent.d_name` buffer a flex array, as `d_name` may be implementation specific (e.g. a caching implementation may allocate a `struct dirent` with _just_ the size needed to hold the `d_name` in question). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/dirent.c | 30 +++++++++++++++++++----------- compat/win32/dirent.h | 28 +++++++++++++++++++++------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2f7d64369f..f17e159546 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,21 @@ #include "../../git-compat-util.h" -struct DIR { - struct dirent dd_dir; /* includes d_type */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + struct dirent dd_dir; /* includes d_type */ +} dirent_DIR; +#pragma GCC diagnostic pop + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if ((fdata->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) @@ -21,7 +27,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -48,7 +54,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -60,13 +66,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -87,9 +93,11 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR) + MAX_PATH); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..a58a8075fd 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,34 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char d_name[/* FLEX_ARRAY */]; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +#define opendir git_opendir + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From 3af31fe4ff2eb4c9fc6fb442fa7bae9df8fa1300 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 04/21] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw-posix.h | 2 +- compat/mingw.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index 2362ce6897..1a917890b1 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -404,7 +404,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); int mingw_utime(const char *file_name, const struct utimbuf *times); diff --git a/compat/mingw.c b/compat/mingw.c index 2718d5fa5d..5353a6e15e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1250,6 +1250,8 @@ int mingw_lstat(const char *file_name, struct stat *buf) return -1; } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) { BY_HANDLE_FILE_INFORMATION fdata; From 2eae55f50e78236b1e9a6fd6c40f94aac6b25974 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 05/21] mingw: add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config/core.adoc | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 3 +++ 6 files changed, 33 insertions(+) diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index 9bc9de29d9..80dcc4b004 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -710,6 +710,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other process. diff --git a/builtin/commit.c b/builtin/commit.c index 8e901fe8db..24eb8e5a21 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1623,6 +1623,7 @@ struct repository *repo UNUSED) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; diff --git a/compat/mingw.c b/compat/mingw.c index 5353a6e15e..a2c8afe671 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -274,6 +274,7 @@ enum hide_dotfiles_type { static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx UNUSED, @@ -287,6 +288,11 @@ int mingw_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { if (!value) return config_error_nonbool(var); diff --git a/compat/mingw.h b/compat/mingw.h index 6ea53ee0d2..65df57d2a7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -1,5 +1,7 @@ #include "mingw-posix.h" +extern int core_fscache; + struct config_context; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx, void *cb); diff --git a/git-compat-util.h b/git-compat-util.h index ded8edb0e5..7779c8825b 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1050,6 +1050,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index b222821b44..61e8f3a1f6 100644 --- a/preload-index.c +++ b/preload-index.c @@ -141,6 +141,7 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -176,6 +177,8 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); + + enable_fscache(0); } int repo_read_index_preload(struct repository *repo, From efb2db143b884a45eea980a9dbe27a4724972db8 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 06/21] mingw: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow `lstat()` emulation (git calls `lstat()` once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an `lstat()` implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent `lstat()` calls for the same directory are served directly from the cache. Also implement `opendir()`/`readdir()`/`closedir()` so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 487 ++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- contrib/buildsystems/CMakeLists.txt | 3 +- git-compat-util.h | 2 + meson.build | 1 + 6 files changed, 504 insertions(+), 3 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..c780881486 --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,487 @@ +#include "../../git-compat-util.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" +#include "../../dir.h" +#include "../../abspath.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; + } s; + } u; + + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + struct dirent dirent; +}; +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wflexible-array-extensions" +#endif +struct heap_fsentry { + union { + struct fsentry ent; + char dummy[sizeof(struct fsentry) + MAX_PATH]; + } u; +}; +#pragma GCC diagnostic pop + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *cmp_data UNUSED, + const struct fsentry *fse1, const struct fsentry *fse2, + void *keydata UNUSED) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return fspathncmp(fse1->dirent.d_name, fse2->dirent.d_name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->dirent.d_name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + if (len > MAX_PATH) + BUG("Trying to allocate fsentry for long path '%.*s'", + (int)len, name); + memcpy(fse->dirent.d_name, name, len); + fse->dirent.d_name[len] = 0; + fse->len = len; + hashmap_entry_init(&fse->ent, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + /* init the rest of the structure */ + fsentry_init(fse, list, name, len); + fse->next = NULL; + fse->u.refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->u.refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->u.refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes, + fdata->EaSize); + fse->dirent.d_type = S_ISREG(fse->st_mode) ? DT_REG : + S_ISDIR(fse->st_mode) ? DT_DIR : DT_LNK; + fse->u.s.st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->u.s.st_atim)); + filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->u.s.st_mtim)); + filetime_to_timespec(&(fdata->ftCreationTime), &(fse->u.s.st_ctim)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, + dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, &fse->ent); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_clear_and_free(&map, struct fsentry, ent); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get_entry(&map, key->list, ent, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get_entry(&map, key, ent, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; +#pragma GCC diagnostic push +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wflexible-array-extensions" +#endif + struct heap_fsentry key[2]; +#pragma GCC diagnostic pop + struct fsentry *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(&key[0].u.ent, NULL, filename, dirlen); + fsentry_init(&key[1].u.ent, &key[0].u.ent, filename + base, len - base); + fse = fscache_get(&key[1].u.ent); + if (!fse) { + errno = ENOENT; + return -1; + } + + /* + * Special case symbolic links: FindFirstFile()/FindNextFile() did not + * provide us with the length of the target path. + */ + if (fse->u.s.st_size == MAX_PATH && S_ISLNK(fse->st_mode)) { + char buf[MAX_LONG_PATH]; + int len = readlink(filename, buf, sizeof(buf) - 1); + + if (len > 0) + fse->u.s.st_size = len; + } + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->u.s.st_size; + st->st_atim = fse->u.s.st_atim; + st->st_mtim = fse->u.s.st_mtim; + st->st_ctim = fse->u.s.st_ctim; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent *dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent = &next->dirent; + return dir->dirent; +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct heap_fsentry key; + struct fsentry *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key.u.ent, NULL, dirname, len); + list = fscache_get(&key.u.ent); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index a429da8797..94b12f3121 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -509,7 +509,7 @@ endif compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY \ -DENSURE_MSYSTEM_IS_SET="\"$(MSYSTEM)\"" -DMINGW_PREFIX="\"$(patsubst /%,%,$(MINGW_PREFIX))\"" \ -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" @@ -714,7 +714,7 @@ ifeq ($(uname_S),MINGW) compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DWIN32 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index fe26cebdab..7be6a0a7fc 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -301,7 +301,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") compat/win32/trace2_win32_process_info.c compat/win32/dirent.c compat/nedmalloc/nedmalloc.c - compat/strdup.c) + compat/strdup.c + compat/win32/fscache.c) set(NO_UNIX_SOCKETS 1) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/git-compat-util.h b/git-compat-util.h index 7779c8825b..8badf93ca9 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -162,9 +162,11 @@ static inline int is_xplatform_dir_sep(int c) /* pull in Windows compatibility stuff */ #include "compat/win32/path-utils.h" #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/win32/path-utils.h" #include "compat/msvc.h" +#include "compat/win32/fscache.h" #endif /* used on Mac OS X */ diff --git a/meson.build b/meson.build index c7f62875af..e6c2e592b4 100644 --- a/meson.build +++ b/meson.build @@ -1260,6 +1260,7 @@ elif host_machine.system() == 'windows' 'compat/winansi.c', 'compat/win32/dirent.c', 'compat/win32/flush.c', + 'compat/win32/fscache.c', 'compat/win32/path-utils.c', 'compat/win32/pthread.c', 'compat/win32/syslog.c', From c7bec347d5f286ff6e6bb8ecaf1437f42339947d Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 07/21] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index c780881486..aaaa8360be 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -27,6 +27,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -268,16 +270,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->u.refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->u.hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->u.hwait, INFINITE); + CloseHandle(key->u.hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get_entry(&map, key, ent, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get_entry(&map, key, ent, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -285,7 +314,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get_entry(&map, key->list, ent, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -294,16 +323,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->u.refcnt = 0; + hashmap_add(&map, &future->ent); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get_entry(&map, key, ent, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, &future->ent, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->u.hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list) From 1d27d42582e4509cf339215e0efea26814596023 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 24 Jan 2017 15:12:13 -0500 Subject: [PATCH 08/21] fscache: add key for GIT_TRACE_FSCACHE Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index aaaa8360be..405d1f3e9d 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -4,11 +4,13 @@ #include "fscache.h" #include "../../dir.h" #include "../../abspath.h" +#include "../../trace.h" static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* * An entry in the file system cache. Used for both entire directory listings @@ -214,6 +216,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", + errno, dir->dirent.d_name); return NULL; } @@ -399,6 +403,7 @@ int fscache_enable(int enable) fscache_clear(); LeaveCriticalSection(&mutex); } + trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); return result; } From 9de01b7f1643ceb9c6b0c5335d778532e3bab4a3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 13 Dec 2016 14:05:32 -0500 Subject: [PATCH 09/21] fscache: remember not-found directories Teach FSCACHE to remember "not found" directories. This is a performance optimization. FSCACHE is a performance optimization available for Windows. It intercepts Posix-style lstat() calls into an in-memory directory using FindFirst/FindNext. It improves performance on Windows by catching the first lstat() call in a directory, using FindFirst/ FindNext to read the list of files (and attribute data) for the entire directory into the cache, and short-cut subsequent lstat() calls in the same directory. This gives a major performance boost on Windows. However, it does not remember "not found" directories. When STATUS runs and there are missing directories, the lstat() interception fails to find the parent directory and simply return ENOENT for the file -- it does not remember that the FindFirst on the directory failed. Thus subsequent lstat() calls in the same directory, each re-attempt the FindFirst. This completely defeats any performance gains. This can be seen by doing a sparse-checkout on a large repo and then doing a read-tree to reset the skip-worktree bits and then running status. This change reduced status times for my very large repo by 60%. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 405d1f3e9d..62ff84d0e4 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -188,7 +188,8 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir) +static struct fsentry *fsentry_create_list(const struct fsentry *dir, + int *dir_not_found) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; @@ -197,6 +198,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) struct fsentry *list, **phead; DWORD err; + *dir_not_found = 0; + /* convert name to UTF-16 and check length < MAX_PATH */ if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, dir->len)) < 0) { @@ -215,6 +218,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); + *dir_not_found = 1; /* or empty directory */ errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", errno, dir->dirent.d_name); @@ -223,6 +227,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) /* allocate object to hold directory listing */ list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + list->st_mode = S_IFDIR; + list->dirent.d_type = DT_DIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; @@ -307,12 +313,16 @@ static struct fsentry *fscache_get_wait(struct fsentry *key) static struct fsentry *fscache_get(struct fsentry *key) { struct fsentry *fse, *future, *waiter; + int dir_not_found; EnterCriticalSection(&mutex); /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { - fsentry_addref(fse); + if (fse->st_mode) + fsentry_addref(fse); + else + fse = NULL; /* non-existing directory */ LeaveCriticalSection(&mutex); return fse; } @@ -321,7 +331,10 @@ static struct fsentry *fscache_get(struct fsentry *key) fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); - /* dir entry without file entry -> file doesn't exist */ + /* + * dir entry without file entry, or dir does not + * exist -> file doesn't exist + */ errno = ENOENT; return NULL; } @@ -335,7 +348,7 @@ static struct fsentry *fscache_get(struct fsentry *key) /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future); + fse = fsentry_create_list(future, &dir_not_found); EnterCriticalSection(&mutex); /* remove future entry and signal waiting threads */ @@ -349,6 +362,18 @@ static struct fsentry *fscache_get(struct fsentry *key) /* leave on error (errno set by fsentry_create_list) */ if (!fse) { + if (dir_not_found && key->list) { + /* + * Record that the directory does not exist (or is + * empty, which for all practical matters is the same + * thing as far as fscache is concerned). + */ + fse = fsentry_alloc(key->list->list, + key->list->dirent.d_name, + key->list->len); + fse->st_mode = 0; + hashmap_add(&map, &fse->ent); + } LeaveCriticalSection(&mutex); return NULL; } @@ -360,6 +385,9 @@ static struct fsentry *fscache_get(struct fsentry *key) if (key->list) fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse && !fse->st_mode) + fse = NULL; /* non-existing directory */ + /* return entry or ENOENT */ if (fse) fsentry_addref(fse); From e5da8f7490e3189c183ec200ff8ec704abc41348 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 25 Jan 2017 18:39:16 +0100 Subject: [PATCH 10/21] fscache: add a test for the dir-not-found optimization Signed-off-by: Johannes Schindelin --- t/t1090-sparse-checkout-scope.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 3a14218b24..529844e286 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,4 +106,24 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success MINGW 'no unnecessary opendir() with fscache' ' + git clone . fscache-test && + ( + cd fscache-test && + git config core.fscache 1 && + echo "/excluded/*" >.git/info/sparse-checkout && + for f in $(test_seq 10) + do + sha1=$(echo $f | git hash-object -w --stdin) && + git update-index --add \ + --cacheinfo 100644,$sha1,excluded/$f || exit 1 + done && + test_tick && + git commit -m excluded && + GIT_TRACE_FSCACHE=1 git status >out 2>err && + grep excluded err >grep.out && + test_line_count = 1 grep.out + ) +' + test_done From e56081b94c99b2d6433b8942c3cca9c60d626804 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 22 Nov 2016 11:26:38 -0500 Subject: [PATCH 11/21] add: use preload-index and fscache for performance Teach "add" to use preload-index and fscache features to improve performance on very large repositories. During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. This calls lstat(). On Windows, the fscache code intercepts the lstat() calls and builds a private cache using the FindFirst/FindNext routines, which are much faster. Somewhat independent of this, is the preload-index code which distributes some of the start-up costs across multiple threads. We need to keep the call to read_cache() before parsing the pathspecs (and hence cannot use the pathspecs to limit any preload) because parse_pathspec() is using the index to determine whether a pathspec is, in fact, in a submodule. If we would not read the index first, parse_pathspec() would not error out on a path that is inside a submodule, and t7400-submodule-basic.sh would fail with not ok 47 - do not add files from a submodule We still want the nice preload performance boost, though, so we simply call read_cache_preload(&pathspecs) after parsing the pathspecs. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- builtin/add.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/add.c b/builtin/add.c index 32709794b3..25add8da96 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -493,6 +493,10 @@ int cmd_add(int argc, die_in_unpopulated_submodule(repo->index, prefix); die_path_inside_submodule(repo->index, &pathspec); + enable_fscache(1); + /* We do not really re-read the index but update the up-to-date flags */ + preload_index(repo->index, &pathspec, 0); + if (add_new_files) { int baselen; @@ -605,5 +609,6 @@ finish: free(ps_matched); dir_clear(&dir); clear_pathspec(&pathspec); + enable_fscache(0); return exit_status; } From 957859f62684c4e6dd6ea2fd0a38f52548ad3fe7 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 1 Nov 2017 15:05:44 -0400 Subject: [PATCH 12/21] dir.c: make add_excludes aware of fscache during status Teach read_directory_recursive() and add_excludes() to be aware of optional fscache and avoid trying to open() and fstat() non-existant ".gitignore" files in every directory in the worktree. The current code in add_excludes() calls open() and then fstat() for a ".gitignore" file in each directory present in the worktree. Change that when fscache is enabled to call lstat() first and if present, call open(). This seems backwards because both lstat needs to do more work than fstat. But when fscache is enabled, fscache will already know if the .gitignore file exists and can completely avoid the IO calls. This works because of the lstat diversion to mingw_lstat when fscache is enabled. This reduced status times on a 350K file enlistment of the Windows repo on a NVMe SSD by 0.25 seconds. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 5 +++++ compat/win32/fscache.h | 3 +++ dir.c | 39 ++++++++++++++++++++++++++++++--------- git-compat-util.h | 4 ++++ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 62ff84d0e4..2006b61a9a 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -12,6 +12,11 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); +int fscache_is_enabled(void) +{ + return enabled; +} + /* * An entry in the file system cache. Used for both entire directory listings * and file entries. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index ed518b422d..9a21fd5709 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,6 +4,9 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) +int fscache_is_enabled(void); +#define is_fscache_enabled() (fscache_is_enabled()) + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index b00821f294..154eab4b40 100644 --- a/dir.c +++ b/dir.c @@ -1156,16 +1156,37 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (flags & PATTERN_NOFOLLOW) - fd = open_nofollow(fname, O_RDONLY); - else - fd = open(fname, O_RDONLY); - - if (fd < 0 || fstat(fd, &st) < 0) { - if (fd < 0) - warn_on_fopen_errors(fname); + /* + * Since `clang`'s `-Wunreachable-code` mode is clever, it would figure + * out that on non-Windows platforms, this `lstat()` is unreachable. + * We do want to keep the conditional block for the sake of Windows, + * though, so let's use the `NOT_CONSTANT()` trick to suppress that error. + */ + if (NOT_CONSTANT(is_fscache_enabled(fname))) { + if (lstat(fname, &st) < 0) { + fd = -1; + } else { + fd = open(fname, O_RDONLY); + if (fd < 0) + warn_on_fopen_errors(fname); + } + } else { + if (flags & PATTERN_NOFOLLOW) + fd = open_nofollow(fname, O_RDONLY); else - close(fd); + fd = open(fname, O_RDONLY); + + if (fd < 0 || fstat(fd, &st) < 0) { + if (fd < 0) + warn_on_fopen_errors(fname); + else { + close(fd); + fd = -1; + } + } + } + + if (fd < 0) { if (!istate) return -1; r = read_skip_worktree_file_from_index(istate, fname, diff --git a/git-compat-util.h b/git-compat-util.h index 8badf93ca9..2a0de29dd3 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1067,6 +1067,10 @@ static inline int is_missing_file_error(int errno_) #define enable_fscache(x) /* noop */ #endif +#ifndef is_fscache_enabled +#define is_fscache_enabled() (0) +#endif + int cmd_main(int, const char **); /* From 4cc591257901b1d89d4ff581f15b3d1b823aa0b6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 10:43:41 -0500 Subject: [PATCH 13/21] fscache: make fscache_enabled() public Make fscache_enabled() function public rather than static. Remove unneeded fscache_is_enabled() function. Change is_fscache_enabled() macro to call fscache_enabled(). is_fscache_enabled() now takes a pathname so that the answer is more precise and mean "is fscache enabled for this pathname", since fscache only stores repo-relative paths and not absolute paths, we can avoid attempting lookups for absolute paths. Signed-off-by: Jeff Hostetler --- compat/win32/fscache.c | 7 +------ compat/win32/fscache.h | 4 ++-- git-compat-util.h | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 2006b61a9a..4add9fb973 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -12,11 +12,6 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); -int fscache_is_enabled(void) -{ - return enabled; -} - /* * An entry in the file system cache. Used for both entire directory listings * and file entries. @@ -280,7 +275,7 @@ static void fscache_clear(void) /* * Checks if the cache is enabled for the given path. */ -static inline int fscache_enabled(const char *path) +int fscache_enabled(const char *path) { return enabled > 0 && !is_absolute_path(path); } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 9a21fd5709..660ada053b 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,8 +4,8 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) -int fscache_is_enabled(void); -#define is_fscache_enabled() (fscache_is_enabled()) +int fscache_enabled(const char *path); +#define is_fscache_enabled(path) fscache_enabled(path) DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/git-compat-util.h b/git-compat-util.h index 2a0de29dd3..de650d74c7 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1068,7 +1068,7 @@ static inline int is_missing_file_error(int errno_) #endif #ifndef is_fscache_enabled -#define is_fscache_enabled() (0) +#define is_fscache_enabled(path) (0) #endif int cmd_main(int, const char **); From 2fe9937172eea2d039ddcdbf7f7cbd0a46667a57 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Dec 2017 11:19:27 -0500 Subject: [PATCH 14/21] dir.c: regression fix for add_excludes with fscache Fix regression described in: https://github.com/git-for-windows/git/issues/1392 which was introduced in: https://github.com/git-for-windows/git/commit/b2353379bba414e6c00dde913497cc9c827366f2 Problem Symptoms ================ When the user has a .gitignore file that is a symlink, the fscache optimization introduced above caused the stat-data from the symlink, rather that of the target file, to be returned. Later when the ignore file was read, the buffer length did not match the stat.st_size field and we called die("cannot use as an exclude file") Optimization Rationale ====================== The above optimization calls lstat() before open() primarily to ask fscache if the file exists. It gets the current stat-data as a side effect essentially for free (since we already have it in memory). If the file does not exist, it does not need to call open(). And since very few directories have .gitignore files, we can greatly reduce time spent in the filesystem. Discussion of Fix ================= The above optimization calls lstat() rather than stat() because the fscache only intercepts lstat() calls. Calls to stat() stay directed to the mingw_stat() completly bypassing fscache. Furthermore, calls to mingw_stat() always call {open, fstat, close} so that symlinks are properly dereferenced, which adds *additional* open/close calls on top of what the original code in dir.c is doing. Since the problem only manifests for symlinks, we add code to overwrite the stat-data when the path is a symlink. This preserves the effect of the performance gains provided by the fscache in the normal case. Signed-off-by: Jeff Hostetler --- dir.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dir.c b/dir.c index 154eab4b40..186b8832db 100644 --- a/dir.c +++ b/dir.c @@ -1157,6 +1157,28 @@ static int add_patterns(const char *fname, const char *base, int baselen, char *buf; /* + * A performance optimization for status. + * + * During a status scan, git looks in each directory for a .gitignore + * file before scanning the directory. Since .gitignore files are not + * that common, we can waste a lot of time looking for files that are + * not there. Fortunately, the fscache already knows if the directory + * contains a .gitignore file, since it has already read the directory + * and it already has the stat-data. + * + * If the fscache is enabled, use the fscache-lstat() interlude to see + * if the file exists (in the fscache hash maps) before trying to open() + * it. + * + * This causes problem when the .gitignore file is a symlink, because + * we call lstat() rather than stat() on the symlnk and the resulting + * stat-data is for the symlink itself rather than the target file. + * We CANNOT use stat() here because the fscache DOES NOT install an + * interlude for stat() and mingw_stat() always calls "open-fstat-close" + * on the file and defeats the purpose of the optimization here. Since + * symlinks are even more rare than .gitignore files, we force a fstat() + * after our open() to get stat-data for the target file. + * * Since `clang`'s `-Wunreachable-code` mode is clever, it would figure * out that on non-Windows platforms, this `lstat()` is unreachable. * We do want to keep the conditional block for the sake of Windows, @@ -1169,6 +1191,11 @@ static int add_patterns(const char *fname, const char *base, int baselen, fd = open(fname, O_RDONLY); if (fd < 0) warn_on_fopen_errors(fname); + else if (S_ISLNK(st.st_mode) && fstat(fd, &st) < 0) { + warn_on_fopen_errors(fname); + close(fd); + fd = -1; + } } } else { if (flags & PATTERN_NOFOLLOW) From deaa5a51adb7c9ca24d853927fb1175d8e1908e2 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Wed, 22 Nov 2017 20:39:38 +0900 Subject: [PATCH 15/21] fetch-pack.c: enable fscache for stats under .git/objects When I do git fetch, git call file stats under .git/objects for each refs. This takes time when there are many refs. By enabling fscache, git takes file stats by directory traversing and that improved the speed of fetch-pack for repository having large number of refs. In my windows workstation, this improves the time of `git fetch` for chromium repository like below. I took stats 3 times. * With this patch TotalSeconds: 9.9825165 TotalSeconds: 9.1862075 TotalSeconds: 10.1956256 Avg: 9.78811653333333 * Without this patch TotalSeconds: 15.8406702 TotalSeconds: 15.6248053 TotalSeconds: 15.2085938 Avg: 15.5580231 Signed-off-by: Takuto Ikuta Signed-off-by: Johannes Schindelin --- fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index 40316c9a34..9cf2b6967c 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -760,6 +760,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); + enable_fscache(1); for (ref = *refs; ref; ref = ref->next) { struct commit *commit; @@ -784,6 +785,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, if (!cutoff || cutoff < commit->date) cutoff = commit->date; } + enable_fscache(0); trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); /* From f04f9325254b0941efc4ab39e919b8307aa413c7 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Tue, 30 Jan 2018 22:42:58 +0900 Subject: [PATCH 16/21] checkout.c: enable fscache for checkout again This is retry of #1419. I added flush_fscache macro to flush cached stats after disk writing with tests for regression reported in #1438 and #1442. git checkout checks each file path in sorted order, so cache flushing does not make performance worse unless we have large number of modified files in a directory containing many files. Using chromium repository, I tested `git checkout .` performance when I delete 10 files in different directories. With this patch: TotalSeconds: 4.307272 TotalSeconds: 4.4863595 TotalSeconds: 4.2975562 Avg: 4.36372923333333 Without this patch: TotalSeconds: 20.9705431 TotalSeconds: 22.4867685 TotalSeconds: 18.8968292 Avg: 20.7847136 I confirmed this patch passed all tests in t/ with core_fscache=1. Signed-off-by: Takuto Ikuta --- builtin/checkout.c | 2 ++ compat/win32/fscache.c | 12 ++++++++++++ compat/win32/fscache.h | 3 +++ entry.c | 3 +++ git-compat-util.h | 4 ++++ parallel-checkout.c | 1 + t/t7201-co.sh | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 61 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index 0ba4f03f2e..a3da5a736a 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -415,6 +415,7 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); + enable_fscache(1); for (pos = 0; pos < the_repository->index->cache_nr; pos++) { struct cache_entry *ce = the_repository->index->cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -440,6 +441,7 @@ static int checkout_worktree(const struct checkout_opts *opts, errs |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); mem_pool_discard(&ce_mem_pool, should_validate_cache_entries()); + enable_fscache(0); remove_marked_cache_entries(the_repository->index, 1); remove_scheduled_dirs(); errs |= finish_delayed_checkout(&state, opts->show_progress); diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 4add9fb973..1463f8da34 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -435,6 +435,18 @@ int fscache_enable(int enable) return result; } +/* + * Flush cached stats result when fscache is enabled. + */ +void fscache_flush(void) +{ + if (enabled) { + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } +} + /* * Lstat replacement, uses the cache if enabled, otherwise redirects to * mingw_lstat. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 660ada053b..2f06f8df97 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -7,6 +7,9 @@ int fscache_enable(int enable); int fscache_enabled(const char *path); #define is_fscache_enabled(path) fscache_enabled(path) +void fscache_flush(void); +#define flush_fscache() fscache_flush() + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/entry.c b/entry.c index 7817aee362..5ab78ca884 100644 --- a/entry.c +++ b/entry.c @@ -411,6 +411,9 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca } finish: + /* Flush cached lstat in fscache after writing to disk. */ + flush_fscache(); + if (state->refresh_cache) { if (!fstat_done && lstat(ce->name, &st) < 0) return error_errno("unable to stat just-written file %s", diff --git a/git-compat-util.h b/git-compat-util.h index de650d74c7..e3a2841289 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1071,6 +1071,10 @@ static inline int is_missing_file_error(int errno_) #define is_fscache_enabled(path) (0) #endif +#ifndef flush_fscache +#define flush_fscache() /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/parallel-checkout.c b/parallel-checkout.c index 0bf4bd6d4a..8fadb7c804 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -640,6 +640,7 @@ static void write_items_sequentially(struct checkout *state) { size_t i; + flush_fscache(); for (i = 0; i < parallel_checkout.nr; i++) { struct parallel_checkout_item *pc_item = ¶llel_checkout.items[i]; write_pc_item(pc_item, state); diff --git a/t/t7201-co.sh b/t/t7201-co.sh index 9bcf7c0b40..545f388c44 100755 --- a/t/t7201-co.sh +++ b/t/t7201-co.sh @@ -35,6 +35,42 @@ fill () { } +test_expect_success MINGW 'fscache flush cache' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + echo B >> test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + +test_expect_success MINGW 'fscache flush cache dir' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + rm test.txt && + mkdir test.txt && + touch test.txt/test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + test_expect_success setup ' fill x y z >same && fill 1 2 3 4 5 6 7 8 >one && From e0773980fe1b54ed610882471536782a74de7ae8 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Fri, 7 Sep 2018 11:39:57 -0400 Subject: [PATCH 17/21] Enable the filesystem cache (fscache) in refresh_index(). On file systems that support it, this can dramatically speed up operations like add, commit, describe, rebase, reset, rm that would otherwise have to lstat() every file to "re-match" the stat information in the index to that of the file system. On a synthetic repo with 1M files, "git reset" dropped from 52.02 seconds to 14.42 seconds for a savings of 72%. Signed-off-by: Ben Peart --- read-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/read-cache.c b/read-cache.c index 0c07c3aef7..09566361b9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1512,6 +1512,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n"; added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n"; unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n"; + enable_fscache(1); /* * Use the multi-threaded preload_index() to refresh most of the * cache entries quickly then in the single threaded loop below, @@ -1606,6 +1607,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, display_progress(progress, istate->cache_nr); stop_progress(&progress); trace_performance_leave("refresh index"); + enable_fscache(0); return has_errors; } From 12f3aea29de878f7ad71d6b3d21641a170dfaf9b Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 23 Oct 2018 11:42:06 -0400 Subject: [PATCH 18/21] fscache: use FindFirstFileExW to avoid retrieving the short name Use FindFirstFileExW with FindExInfoBasic to avoid forcing NTFS to look up the short name. Also switch to a larger (64K vs 4K) buffer using FIND_FIRST_EX_LARGE_FETCH to minimize round trips to the kernel. In a repo with ~200K files, this drops warm cache status times from 3.19 seconds to 2.67 seconds for a 16% savings. Signed-off-by: Ben Peart --- compat/win32/fscache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 1463f8da34..e05ed5feea 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -215,7 +215,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, pattern[wlen] = 0; /* open find handle */ - h = FindFirstFileW(pattern, &fdata); + h = FindFirstFileExW(pattern, FindExInfoBasic, &fdata, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); *dir_not_found = 1; /* or empty directory */ From 5b6d26df4043805e2247dcdf18c6088eea11f96e Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Thu, 4 Oct 2018 18:10:21 -0400 Subject: [PATCH 19/21] fscache: add GIT_TEST_FSCACHE support Add support to fscache to enable running the entire test suite with the fscache enabled. Signed-off-by: Ben Peart --- compat/win32/fscache.c | 5 +++++ t/README | 3 +++ 2 files changed, 8 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index e05ed5feea..0b84c6f03d 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -5,6 +5,7 @@ #include "../../dir.h" #include "../../abspath.h" #include "../../trace.h" +#include "config.h" static int initialized; static volatile long enabled; @@ -408,7 +409,11 @@ int fscache_enable(int enable) int result; if (!initialized) { + int fscache = git_env_bool("GIT_TEST_FSCACHE", -1); + /* allow the cache to be disabled entirely */ + if (fscache != -1) + core_fscache = fscache; if (!core_fscache) return 0; diff --git a/t/README b/t/README index adbbd9acf4..f194681514 100644 --- a/t/README +++ b/t/README @@ -479,6 +479,9 @@ GIT_TEST_NAME_HASH_VERSION=, when set, causes 'git pack-objects' to assume '--name-hash-version='. +GIT_TEST_FSCACHE= exercises the uncommon fscache code path +which adds a cache below mingw's lstat and dirent implementations. + Naming Tests ------------ From 7fd352ff64890b0e4f83e452ff3acb6f98e9adfe Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 25 Sep 2018 16:28:16 -0400 Subject: [PATCH 20/21] fscache: add fscache hit statistics Track fscache hits and misses for lstat and opendir requests. Reporting of statistics is done when the cache is disabled for the last time and freed and is only reported if GIT_TRACE_FSCACHE is set. Sample output is: 11:33:11.836428 compat/win32/fscache.c:433 fscache: lstat 3775, opendir 263, total requests/misses 4052/269 Signed-off-by: Ben Peart --- compat/win32/fscache.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 0b84c6f03d..2f489ced89 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -11,6 +11,10 @@ static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static unsigned int lstat_requests; +static unsigned int opendir_requests; +static unsigned int fscache_requests; +static unsigned int fscache_misses; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* @@ -272,6 +276,8 @@ static void fscache_clear(void) { hashmap_clear_and_free(&map, struct fsentry, ent); hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; } /* @@ -318,6 +324,7 @@ static struct fsentry *fscache_get(struct fsentry *key) int dir_not_found; EnterCriticalSection(&mutex); + fscache_requests++; /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { @@ -381,6 +388,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* add directory listing to the cache */ + fscache_misses++; fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ @@ -418,6 +426,8 @@ int fscache_enable(int enable) return 0; InitializeCriticalSection(&mutex); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); initialized = 1; } @@ -434,6 +444,10 @@ int fscache_enable(int enable) opendir = dirent_opendir; lstat = mingw_lstat; EnterCriticalSection(&mutex); + trace_printf_key(&trace_fscache, "fscache: lstat %u, opendir %u, " + "total requests/misses %u/%u\n", + lstat_requests, opendir_requests, + fscache_requests, fscache_misses); fscache_clear(); LeaveCriticalSection(&mutex); } @@ -471,6 +485,7 @@ int fscache_lstat(const char *filename, struct stat *st) if (!fscache_enabled(filename)) return mingw_lstat(filename, st); + lstat_requests++; /* split filename into path + name */ len = strlen(filename); if (len && is_dir_sep(filename[len - 1])) @@ -564,6 +579,7 @@ DIR *fscache_opendir(const char *dirname) if (!fscache_enabled(dirname)) return dirent_opendir(dirname); + opendir_requests++; /* prepare name (strip trailing '/', replace '.') */ len = strlen(dirname); if ((len == 1 && dirname[0] == '.') || From 35dc411ae2107f6ffd7b44ff006fe6cd0815c803 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 00:58:49 +0000 Subject: [PATCH 21/21] unpack-trees: enable fscache for sparse-checkout When updating the skip-worktree bits in the index to align with new values in a sparse-checkout file, Git scans the entire working directory with lstat() calls. In a sparse-checkout, many of these lstat() calls are for paths that do not exist. Enable the fscache feature during this scan. Since enable_fscache() calls nest, the disable_fscache() method decrements a counter and would only clear the cache if that counter reaches zero. In a local test of a repo with ~2.2 million paths, updating the index with git read-tree -m -u HEAD with a sparse-checkout file containing only /.gitattributes improved from 2-3 minutes to ~6 seconds. Signed-off-by: Derrick Stolee Signed-off-by: Johannes Schindelin --- unpack-trees.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index f38c761ab9..450dbdf7c1 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1823,7 +1823,9 @@ static void mark_new_skip_worktree(struct pattern_list *pl, * 2. Widen worktree according to sparse-checkout file. * Matched entries will have skip_wt_flag cleared (i.e. "in") */ + enable_fscache(istate->cache_nr); clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress); + disable_fscache(); } static void populate_from_existing_patterns(struct unpack_trees_options *o,