mirror of
https://github.com/git-for-windows/git.git
synced 2026-03-26 03:51:40 -05:00
When adding many objects to a repo with core.fsyncObjectFiles set to
true, the cost of fsync'ing each object file can become prohibitive.
One major source of the cost of fsync is the implied flush of the
hardware writeback cache within the disk drive. Fortunately, Windows,
and macOS offer mechanisms to write data from the filesystem page cache
without initiating a hardware flush. Linux has the sync_file_range API,
which issues a pagecache writeback request reliably after version 5.2.
This patch introduces a new 'core.fsyncObjectFiles = batch' option that
batches up hardware flushes. It hooks into the bulk-checkin plugging and
unplugging functionality and takes advantage of tmp-objdir.
When the new mode is enabled do the following for each new object:
1. Create the object in a tmp-objdir.
2. Issue a pagecache writeback request and wait for it to complete.
At the end of the entire transaction when unplugging bulk checkin:
1. Issue an fsync against a dummy file to flush the hardware writeback
cache, which should by now have processed the tmp-objdir writes.
2. Rename all of the tmp-objdir files to their final names.
3. When updating the index and/or refs, we assume that Git will issue
another fsync internal to that operation. This is not the case today,
but may be a good extension to those components.
On a filesystem with a singular journal that is updated during name
operations (e.g. create, link, rename, etc), such as NTFS, HFS+, or XFS
we would expect the fsync to trigger a journal writeout so that this
sequence is enough to ensure that the user's data is durable by the time
the git command returns.
This change also updates the macOS code to trigger a real hardware flush
via fnctl(fd, F_FULLFSYNC) when fsync_or_die is called. Previously, on
macOS there was no guarantee of durability since a simple fsync(2) call
does not flush any hardware caches.
_Performance numbers_:
Linux - Hyper-V VM running Kernel 5.11 (Ubuntu 20.04) on a fast SSD.
Mac - macOS 11.5.1 running on a Mac mini on a 1TB Apple SSD.
Windows - Same host as Linux, a preview version of Windows 11.
This number is from a patch later in the series.
Adding 500 files to the repo with 'git add' Times reported in seconds.
core.fsyncObjectFiles | Linux | Mac | Windows
----------------------|-------|-------|--------
false | 0.06 | 0.35 | 0.61
true | 1.88 | 11.18 | 2.47
batch | 0.15 | 0.41 | 1.53
Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
111 lines
2.3 KiB
C
111 lines
2.3 KiB
C
#include "cache.h"
|
|
#include "config.h"
|
|
#include "run-command.h"
|
|
|
|
/*
|
|
* Some cases use stdio, but want to flush after the write
|
|
* to get error handling (and to get better interactive
|
|
* behaviour - not buffering excessively).
|
|
*
|
|
* Of course, if the flush happened within the write itself,
|
|
* we've already lost the error code, and cannot report it any
|
|
* more. So we just ignore that case instead (and hope we get
|
|
* the right error code on the flush).
|
|
*
|
|
* If the file handle is stdout, and stdout is a file, then skip the
|
|
* flush entirely since it's not needed.
|
|
*/
|
|
void maybe_flush_or_die(FILE *f, const char *desc)
|
|
{
|
|
static int skip_stdout_flush = -1;
|
|
struct stat st;
|
|
char *cp;
|
|
|
|
if (f == stdout) {
|
|
if (skip_stdout_flush < 0) {
|
|
cp = getenv("GIT_FLUSH");
|
|
if (cp)
|
|
skip_stdout_flush = (atoi(cp) == 0);
|
|
else if ((fstat(fileno(stdout), &st) == 0) &&
|
|
S_ISREG(st.st_mode))
|
|
skip_stdout_flush = 1;
|
|
else
|
|
skip_stdout_flush = 0;
|
|
}
|
|
if (skip_stdout_flush && !ferror(f))
|
|
return;
|
|
}
|
|
if (fflush(f)) {
|
|
check_pipe(errno);
|
|
die_errno("write failure on '%s'", desc);
|
|
}
|
|
}
|
|
|
|
void fprintf_or_die(FILE *f, const char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
int ret;
|
|
|
|
va_start(ap, fmt);
|
|
ret = vfprintf(f, fmt, ap);
|
|
va_end(ap);
|
|
|
|
if (ret < 0) {
|
|
check_pipe(errno);
|
|
die_errno("write error");
|
|
}
|
|
}
|
|
|
|
static int maybe_fsync(int fd)
|
|
{
|
|
if (use_fsync < 0)
|
|
use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
|
|
if (!use_fsync)
|
|
return 0;
|
|
|
|
if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
|
|
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
|
|
return 0;
|
|
|
|
return git_fsync(fd, FSYNC_HARDWARE_FLUSH);
|
|
}
|
|
|
|
void fsync_or_die(int fd, const char *msg)
|
|
{
|
|
if (maybe_fsync(fd) < 0)
|
|
die_errno("fsync error on '%s'", msg);
|
|
}
|
|
|
|
int fsync_component(enum fsync_component component, int fd)
|
|
{
|
|
if (fsync_components & component)
|
|
return maybe_fsync(fd);
|
|
return 0;
|
|
}
|
|
|
|
void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
|
|
{
|
|
if (fsync_components & component)
|
|
fsync_or_die(fd, msg);
|
|
}
|
|
|
|
void write_or_die(int fd, const void *buf, size_t count)
|
|
{
|
|
if (write_in_full(fd, buf, count) < 0) {
|
|
check_pipe(errno);
|
|
die_errno("write error");
|
|
}
|
|
}
|
|
|
|
void fwrite_or_die(FILE *f, const void *buf, size_t count)
|
|
{
|
|
if (fwrite(buf, 1, count, f) != count)
|
|
die_errno("fwrite error");
|
|
}
|
|
|
|
void fflush_or_die(FILE *f)
|
|
{
|
|
if (fflush(f))
|
|
die_errno("fflush error");
|
|
}
|