From 6d4ec2478db4d87f2dae1fc968a2070c3bac8877 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 1/7] Win32: make FILETIME conversion functions public We will use them in the upcoming "FSCache" patches (to accelerate sequential lstat() calls). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw.c | 18 ------------------ compat/mingw.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index c63f6b3a9b..1345e3ea2c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -788,24 +788,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) -{ - long long hnsec = filetime_to_hnsec(ft); - ts->tv_sec = (time_t)(hnsec / 10000000); - ts->tv_nsec = (hnsec % 10000000) * 100; -} - /** * Verifies that safe_create_leading_directories() would succeed. */ diff --git a/compat/mingw.h b/compat/mingw.h index 61f98ec39c..f59f752469 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -356,6 +356,17 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows, * including our own struct stat with 64 bit st_size and nanosecond-precision @@ -372,6 +383,13 @@ struct timespec { #endif #endif +static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) +{ + long long hnsec = filetime_to_hnsec(ft); + ts->tv_sec = (time_t)(hnsec / 10000000); + ts->tv_nsec = (hnsec % 10000000) * 100; +} + struct mingw_stat { _dev_t st_dev; _ino_t st_ino; From 4e7b63ea7fe24b290a541c1d6dae6ff5f6e77b19 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 2/7] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4..2603a0fa39 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From b08f49f5ec00e980175fad7a24d471bf77556580 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 3/7] mingw: make the dirent implementation pluggable Emulating the POSIX `dirent` API on Windows via `FindFirstFile()`/`FindNextFile()` is pretty staightforward, however, most of the information provided in the `WIN32_FIND_DATA` structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to `lstat()`. Make the `dirent` implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to `readdir()`/`closedir()` that match the `opendir()` implementation (similar to vtable pointers in Object-Oriented Programming). Define `readdir()`/`closedir()` so that they call the function pointers in the `DIR` structure. This allows to choose the `opendir()` implementation on a call-by-call basis. Make the fixed-size `dirent.d_name` buffer a flex array, as `d_name` may be implementation specific (e.g. a caching implementation may allocate a `struct dirent` with _just_ the size needed to hold the `d_name` in question). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/dirent.c | 30 +++++++++++++++++++----------- compat/win32/dirent.h | 26 +++++++++++++++++++------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39..139d2ba3c4 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,21 @@ #include "../../git-compat-util.h" -struct DIR { - struct dirent dd_dir; /* includes d_type */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + struct dirent dd_dir; /* includes d_type */ +} dirent_DIR; +#pragma GCC diagnostic pop + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +24,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +51,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +63,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +90,11 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR) + MAX_PATH); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..e0e0e1700f 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,32 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char d_name[FLEX_ARRAY]; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From 128ef21dd662e7af02fc4be33f6445606b71272e Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 4/7] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw.c | 2 ++ compat/mingw.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 1345e3ea2c..a68141b17c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -927,6 +927,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) { BY_HANDLE_FILE_INFORMATION fdata; diff --git a/compat/mingw.h b/compat/mingw.h index f59f752469..f982cb98bf 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -422,7 +422,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); int mingw_utime(const char *file_name, const struct utimbuf *times); From edc578fc330d10fd83f51463cec6338ec67663c8 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 5/7] mingw: add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config/core.txt | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 3 +++ 6 files changed, 33 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 60ca9f2b68..b894c7e9cc 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -685,6 +685,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other process. diff --git a/builtin/commit.c b/builtin/commit.c index dec78dfb86..d9ae696bfd 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1568,6 +1568,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; diff --git a/compat/mingw.c b/compat/mingw.c index a68141b17c..ca8b116190 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -244,6 +244,7 @@ enum hide_dotfiles_type { static int core_restrict_inherited_handles = -1; static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx, void *cb) @@ -256,6 +257,11 @@ int mingw_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { if (!value) return config_error_nonbool(var); diff --git a/compat/mingw.h b/compat/mingw.h index f982cb98bf..946ff70732 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -11,6 +11,8 @@ typedef _sigset_t sigset_t; #undef _POSIX_THREAD_SAFE_FUNCTIONS #endif +extern int core_fscache; + struct config_context; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx, void *cb); diff --git a/git-compat-util.h b/git-compat-util.h index 710230d732..c8429efc22 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1520,6 +1520,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index 63fd35d64b..ec1d7b46d5 100644 --- a/preload-index.c +++ b/preload-index.c @@ -132,6 +132,7 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -167,6 +168,8 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); + + enable_fscache(0); } int repo_read_index_preload(struct repository *repo, From d50cdd788047a91f7fc71c30fdb8c22eecfaa2ff Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 6/7] mingw: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow `lstat()` emulation (git calls `lstat()` once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an `lstat()` implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent `lstat()` calls for the same directory are served directly from the cache. Also implement `opendir()`/`readdir()`/`closedir()` so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 463 ++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- contrib/buildsystems/CMakeLists.txt | 3 +- git-compat-util.h | 2 + 5 files changed, 479 insertions(+), 3 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..16e5562746 --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,463 @@ +#include "../../git-compat-util.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" +#include "../../dir.h" +#include "../../abspath.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; + } s; + } u; + + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + struct dirent dirent; +}; +#pragma GCC diagnostic pop + +struct heap_fsentry { + union { + struct fsentry ent; + char dummy[sizeof(struct fsentry) + MAX_PATH]; + } u; +}; + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *unused_cmp_data, + const struct fsentry *fse1, const struct fsentry *fse2, + void *unused_keydata) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return fspathncmp(fse1->dirent.d_name, fse2->dirent.d_name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->dirent.d_name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + if (len > MAX_PATH) + BUG("Trying to allocate fsentry for long path '%.*s'", + (int)len, name); + memcpy(fse->dirent.d_name, name, len); + fse->dirent.d_name[len] = 0; + fse->len = len; + hashmap_entry_init(&fse->ent, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + /* init the rest of the structure */ + fsentry_init(fse, list, name, len); + fse->next = NULL; + fse->u.refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->u.refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->u.refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; + fse->u.s.st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->u.s.st_atim)); + filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->u.s.st_mtim)); + filetime_to_timespec(&(fdata->ftCreationTime), &(fse->u.s.st_ctim)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, + dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, &fse->ent); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_clear_and_free(&map, struct fsentry, ent); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get_entry(&map, key->list, ent, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get_entry(&map, key, ent, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; + struct heap_fsentry key[2]; + struct fsentry *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(&key[0].u.ent, NULL, filename, dirlen); + fsentry_init(&key[1].u.ent, &key[0].u.ent, filename + base, len - base); + fse = fscache_get(&key[1].u.ent); + if (!fse) { + errno = ENOENT; + return -1; + } + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->u.s.st_size; + st->st_atim = fse->u.s.st_atim; + st->st_mtim = fse->u.s.st_mtim; + st->st_ctim = fse->u.s.st_ctim; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent *dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent = &next->dirent; + return dir->dirent; +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct heap_fsentry key; + struct fsentry *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key.u.ent, NULL, dirname, len); + list = fscache_get(&key.u.ent); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index 75adc5139c..7515d87270 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -501,7 +501,7 @@ endif compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DENSURE_MSYSTEM_IS_SET -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO # invalidcontinue.obj allows Git's source code to close the same file @@ -703,7 +703,7 @@ ifeq ($(uname_S),MINGW) compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DWIN32 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 6e7624467f..9540631852 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -308,7 +308,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") compat/win32/trace2_win32_process_info.c compat/win32/dirent.c compat/nedmalloc/nedmalloc.c - compat/strdup.c) + compat/strdup.c + compat/win32/fscache.c) set(NO_UNIX_SOCKETS 1) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/git-compat-util.h b/git-compat-util.h index c8429efc22..9b00e08d3e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -284,9 +284,11 @@ static inline int is_xplatform_dir_sep(int c) /* pull in Windows compatibility stuff */ #include "compat/win32/path-utils.h" #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/win32/path-utils.h" #include "compat/msvc.h" +#include "compat/win32/fscache.h" #else #include #include From 0645781c4877b455e9aa6ae194aa38f77b395e4c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 7/7] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 16e5562746..8c7a37c6c0 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -27,6 +27,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -261,16 +263,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->u.refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->u.hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->u.hwait, INFINITE); + CloseHandle(key->u.hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get_entry(&map, key, ent, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get_entry(&map, key, ent, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -278,7 +307,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get_entry(&map, key->list, ent, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -287,16 +316,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->u.refcnt = 0; + hashmap_add(&map, &future->ent); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get_entry(&map, key, ent, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, &future->ent, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->u.hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list)