Merge branch 'ns/batched-fsync'

This merges the topic branch (specifically backported onto v2.33.1 to
allow for integrating into Git for Windows' `main` branch) that strikes
a better balance between safety and speed: rather than `fsync()`ing each
and every loose object file, we now offer to do it in a batch.

This will become the new default in Git for Windows.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin
2021-10-26 17:40:46 +02:00
committed by Victoria Dye
33 changed files with 607 additions and 46 deletions

View File

@@ -576,12 +576,29 @@ core.whitespace::
errors. The default tab width is 8. Allowed values are 1 to 63.
core.fsyncObjectFiles::
This boolean will enable 'fsync()' when writing object files.
A value indicating the level of effort Git will expend in
trying to make objects added to the repo durable in the event
of an unclean system shutdown. This setting currently only
controls loose objects in the object store, so updates to any
refs or the index may not be equally durable.
+
This is a total waste of time and effort on a filesystem that orders
data writes properly, but can be useful for filesystems that do not use
journalling (traditional UNIX filesystems) or that only journal metadata
and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
* `false` allows data to remain in file system caches according to
operating system policy, whence it may be lost if the system loses power
or crashes.
* `true` triggers a data integrity flush for each loose object added to the
object store. This is the safest setting that is likely to ensure durability
across all operating systems and file systems that honor the 'fsync' system
call. However, this setting comes with a significant performance cost on
common hardware. Git does not currently fsync parent directories for
newly-added files, so some filesystems may still allow data to be lost on
system crash.
* `batch` enables an experimental mode that uses interfaces available in some
operating systems to write loose object data with a minimal set of FLUSH
CACHE (or equivalent) commands sent to the storage controller. If the
operating system interfaces are not available, this mode behaves the same as
`true`. This mode is expected to be as safe as `true` on macOS for repos
stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or
ReFS.
core.preloadIndex::
Enable parallel index preload for operations like 'git diff'

View File

@@ -406,6 +406,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
@@ -1901,6 +1903,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif
ifdef HAVE_SYNC_FILE_RANGE
BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
endif
ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif

View File

@@ -19,6 +19,7 @@ static int show_only;
static int verbose;
static timestamp_t expire;
static int show_progress = -1;
static struct strbuf remove_dir_buf = STRBUF_INIT;
static int prune_tmp_file(const char *fullpath)
{
@@ -27,10 +28,20 @@ static int prune_tmp_file(const char *fullpath)
return error("Could not stat '%s'", fullpath);
if (st.st_mtime > expire)
return 0;
if (show_only || verbose)
printf("Removing stale temporary file %s\n", fullpath);
if (!show_only)
unlink_or_warn(fullpath);
if (S_ISDIR(st.st_mode)) {
if (show_only || verbose)
printf("Removing stale temporary directory %s\n", fullpath);
if (!show_only) {
strbuf_reset(&remove_dir_buf);
strbuf_addstr(&remove_dir_buf, fullpath);
remove_dir_recursively(&remove_dir_buf, 0);
}
} else {
if (show_only || verbose)
printf("Removing stale temporary file %s\n", fullpath);
if (!show_only)
unlink_or_warn(fullpath);
}
return 0;
}
@@ -98,6 +109,9 @@ static int prune_cruft(const char *basename, const char *path, void *data)
static int prune_subdir(unsigned int nr, const char *path, void *data)
{
if (verbose)
printf("Removing directory %s\n", path);
if (!show_only)
rmdir(path);
return 0;
@@ -187,5 +201,6 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
}
strbuf_release(&remove_dir_buf);
return 0;
}

View File

@@ -2213,7 +2213,7 @@ static const char *unpack(int err_fd, struct shallow_info *si)
strvec_push(&child.args, alt_shallow_file);
}
tmp_objdir = tmp_objdir_create();
tmp_objdir = tmp_objdir_create("incoming");
if (!tmp_objdir) {
if (err_fd > 0)
close(err_fd);

View File

@@ -1,5 +1,6 @@
#include "builtin.h"
#include "cache.h"
#include "bulk-checkin.h"
#include "config.h"
#include "object-store.h"
#include "object.h"
@@ -503,10 +504,12 @@ static void unpack_all(void)
if (!quiet)
progress = start_progress(_("Unpacking objects"), nr_objects);
CALLOC_ARRAY(obj_list, nr_objects);
plug_bulk_checkin();
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
unplug_bulk_checkin();
stop_progress(&progress);
if (delta_list)

View File

@@ -5,6 +5,7 @@
*/
#define USE_THE_INDEX_COMPATIBILITY_MACROS
#include "cache.h"
#include "bulk-checkin.h"
#include "config.h"
#include "lockfile.h"
#include "quote.h"
@@ -1088,6 +1089,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
the_index.updated_skipworktree = 1;
/* we might be adding many objects to the object database */
plug_bulk_checkin();
/*
* Custom copy of parse_options() because we want to handle
* filename arguments as they come.
@@ -1168,6 +1172,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}
/* by now we must have added all of the new objects */
unplug_bulk_checkin();
if (split_index > 0) {
if (git_config_get_split_index() == 0)
warning(_("core.splitIndex is set to false; "

View File

@@ -3,16 +3,22 @@
*/
#include "cache.h"
#include "bulk-checkin.h"
#include "lockfile.h"
#include "repository.h"
#include "csum-file.h"
#include "pack.h"
#include "strbuf.h"
#include "string-list.h"
#include "tmp-objdir.h"
#include "packfile.h"
#include "object-store.h"
static struct bulk_checkin_state {
unsigned plugged:1;
static int bulk_checkin_plugged;
static int needs_batch_fsync;
static struct tmp_objdir *bulk_fsync_objdir;
static struct bulk_checkin_state {
char *pack_tmp_name;
struct hashfile *f;
off_t offset;
@@ -21,7 +27,7 @@ static struct bulk_checkin_state {
struct pack_idx_entry **written;
uint32_t alloc_written;
uint32_t nr_written;
} state;
} bulk_checkin_state;
static void finish_tmp_packfile(struct strbuf *basename,
const char *pack_tmp_name,
@@ -79,6 +85,34 @@ clear_exit:
reprepare_packed_git(the_repository);
}
/*
* Cleanup after batch-mode fsync_object_files.
*/
static void do_batch_fsync(void)
{
/*
* Issue a full hardware flush against a temporary file to ensure
* that all objects are durable before any renames occur. The code in
* fsync_loose_object_bulk_checkin has already issued a writeout
* request, but it has not flushed any writeback cache in the storage
* hardware.
*/
if (needs_batch_fsync) {
struct strbuf temp_path = STRBUF_INIT;
struct tempfile *temp;
strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());
temp = xmks_tempfile(temp_path.buf);
fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
delete_tempfile(&temp);
strbuf_release(&temp_path);
}
if (bulk_fsync_objdir)
tmp_objdir_migrate(bulk_fsync_objdir);
}
static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
{
int i;
@@ -273,25 +307,61 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
return 0;
}
void fsync_loose_object_bulk_checkin(int fd)
{
assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH);
/*
* If we have a plugged bulk checkin, we issue a call that
* cleans the filesystem page cache but avoids a hardware flush
* command. Later on we will issue a single hardware flush
* before as part of do_batch_fsync.
*/
if (bulk_checkin_plugged &&
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
if (!needs_batch_fsync)
needs_batch_fsync = 1;
} else {
fsync_or_die(fd, "loose object file");
}
}
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags)
{
int status = deflate_to_pack(&state, oid, fd, size, type,
int status = deflate_to_pack(&bulk_checkin_state, oid, fd, size, type,
path, flags);
if (!state.plugged)
finish_bulk_checkin(&state);
if (!bulk_checkin_plugged)
finish_bulk_checkin(&bulk_checkin_state);
return status;
}
void plug_bulk_checkin(void)
{
state.plugged = 1;
assert(!bulk_checkin_plugged);
/*
* A temporary object directory is used to hold the files
* while they are not fsynced.
*/
if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH) {
bulk_fsync_objdir = tmp_objdir_create("bulk-fsync");
if (!bulk_fsync_objdir)
die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch"));
tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
}
bulk_checkin_plugged = 1;
}
void unplug_bulk_checkin(void)
{
state.plugged = 0;
if (state.f)
finish_bulk_checkin(&state);
assert(bulk_checkin_plugged);
bulk_checkin_plugged = 0;
if (bulk_checkin_state.f)
finish_bulk_checkin(&bulk_checkin_state);
do_batch_fsync();
}

View File

@@ -6,6 +6,8 @@
#include "cache.h"
void fsync_loose_object_bulk_checkin(int fd);
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags);

View File

@@ -985,7 +985,13 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;
extern int fsync_object_files;
enum fsync_object_files_mode {
FSYNC_OBJECT_FILES_OFF,
FSYNC_OBJECT_FILES_ON,
FSYNC_OBJECT_FILES_BATCH
};
extern enum fsync_object_files_mode fsync_object_files;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;

View File

@@ -332,6 +332,9 @@ int mingw_getpagesize(void);
#define getpagesize mingw_getpagesize
#endif
int win32_fsync_no_flush(int fd);
#define fsync_no_flush win32_fsync_no_flush
struct rlimit {
unsigned int rlim_cur;
};

28
compat/win32/flush.c Normal file
View File

@@ -0,0 +1,28 @@
#include "../../git-compat-util.h"
#include <winternl.h>
#include "lazyload.h"
int win32_fsync_no_flush(int fd)
{
IO_STATUS_BLOCK io_status;
#define FLUSH_FLAGS_FILE_DATA_ONLY 1
DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx,
HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
PIO_STATUS_BLOCK IoStatusBlock);
if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
errno = ENOSYS;
return -1;
}
memset(&io_status, 0, sizeof(io_status));
if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
NULL, 0, &io_status)) {
errno = EINVAL;
return -1;
}
return 0;
}

View File

@@ -1491,7 +1491,12 @@ int git_default_core_config(const char *var, const char *value, void *cb)
}
if (!strcmp(var, "core.fsyncobjectfiles")) {
fsync_object_files = git_config_bool(var, value);
if (value && !strcmp(value, "batch"))
fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
else if (git_config_bool(var, value))
fsync_object_files = FSYNC_OBJECT_FILES_ON;
else
fsync_object_files = FSYNC_OBJECT_FILES_OFF;
return 0;
}

View File

@@ -57,6 +57,7 @@ ifeq ($(uname_S),Linux)
HAVE_CLOCK_MONOTONIC = YesPlease
# -lrt is needed for clock_gettime on glibc <= 2.16
NEEDS_LIBRT = YesPlease
HAVE_SYNC_FILE_RANGE = YesPlease
HAVE_GETDELIM = YesPlease
SANE_TEXT_GREP=-a
FREAD_READS_DIRECTORIES = UnfortunatelyYes
@@ -472,6 +473,7 @@ endif
CFLAGS =
BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
compat/win32/flush.o \
compat/win32/path-utils.o \
compat/win32/pthread.o compat/win32/syslog.o \
compat/win32/trace2_win32_process_info.o \
@@ -658,6 +660,7 @@ ifeq ($(uname_S),MINGW)
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
COMPAT_OBJS += compat/mingw.o compat/winansi.o \
compat/win32/trace2_win32_process_info.o \
compat/win32/flush.o \
compat/win32/path-utils.o \
compat/win32/pthread.o compat/win32/syslog.o \
compat/win32/dirent.o compat/win32/fscache.o

View File

@@ -1090,6 +1090,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
[AC_MSG_RESULT([no])
HAVE_CLOCK_MONOTONIC=])
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
#
# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
GIT_CHECK_FUNC(sync_file_range,
[HAVE_SYNC_FILE_RANGE=YesPlease],
[HAVE_SYNC_FILE_RANGE])
GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
#
# Define NO_SETITIMER if you don't have setitimer.
GIT_CHECK_FUNC(setitimer,

View File

@@ -286,7 +286,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET)
list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c
list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c
compat/win32/flush.c compat/win32/path-utils.c
compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c
compat/win32/trace2_win32_process_info.c compat/win32/dirent.c
compat/nedmalloc/nedmalloc.c compat/strdup.c compat/win32/fscache.c)

View File

@@ -17,6 +17,7 @@
#include "commit.h"
#include "strvec.h"
#include "object-store.h"
#include "tmp-objdir.h"
#include "chdir-notify.h"
#include "shallow.h"
@@ -41,7 +42,7 @@ const char *git_attributes_file;
const char *git_hooks_path;
int zlib_compression_level = Z_BEST_SPEED;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
int fsync_object_files;
enum fsync_object_files_mode fsync_object_files;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
@@ -167,6 +168,10 @@ void setup_git_env(const char *git_dir)
args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT);
args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT);
args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT);
if (getenv(GIT_QUARANTINE_ENVIRONMENT)) {
args.disable_ref_updates = 1;
}
repo_set_gitdir(the_repository, git_dir, &args);
strvec_clear(&to_free);
@@ -330,10 +335,14 @@ static void update_relative_gitdir(const char *name,
void *data)
{
char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir());
struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
trace_printf_key(&trace_setup_key,
"setup: move $GIT_DIR to '%s'",
path);
set_git_dir_1(path);
if (tmp_objdir)
tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
free(path);
}

View File

@@ -1235,6 +1235,13 @@ __attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
enum fsync_action {
FSYNC_WRITEOUT_ONLY,
FSYNC_HARDWARE_FLUSH
};
int git_fsync(int fd, enum fsync_action action);
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does

View File

@@ -683,6 +683,49 @@ void add_to_alternates_memory(const char *reference)
'\n', NULL, 0);
}
struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy)
{
struct object_directory *new_odb;
/*
* Make sure alternates are initialized, or else our entry may be
* overwritten when they are.
*/
prepare_alt_odb(the_repository);
/*
* Make a new primary odb and link the old primary ODB in as an
* alternate
*/
new_odb = xcalloc(1, sizeof(*new_odb));
new_odb->path = xstrdup(dir);
/*
* Disable ref updates while a temporary odb is active, since
* the objects in the database may roll back.
*/
new_odb->disable_ref_updates = 1;
new_odb->will_destroy = will_destroy;
new_odb->next = the_repository->objects->odb;
the_repository->objects->odb = new_odb;
return new_odb->next;
}
void restore_primary_odb(struct object_directory *restore_odb, const char *old_path)
{
struct object_directory *cur_odb = the_repository->objects->odb;
if (strcmp(old_path, cur_odb->path))
BUG("expected %s as primary object store; found %s",
old_path, cur_odb->path);
if (cur_odb->next != restore_odb)
BUG("we expect the old primary object store to be the first alternate");
the_repository->objects->odb = restore_odb;
free_object_directory(cur_odb);
}
/*
* Compute the exact path an alternate is at and returns it. In case of
* error NULL is returned and the human readable error is added to `err`
@@ -1809,8 +1852,21 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
/* Finalize a file on disk, and close it. */
static void close_loose_object(int fd)
{
if (fsync_object_files)
fsync_or_die(fd, "loose object file");
if (!the_repository->objects->odb->will_destroy) {
switch (fsync_object_files) {
case FSYNC_OBJECT_FILES_OFF:
break;
case FSYNC_OBJECT_FILES_ON:
fsync_or_die(fd, "loose object file");
break;
case FSYNC_OBJECT_FILES_BATCH:
fsync_loose_object_bulk_checkin(fd);
break;
default:
BUG("Invalid fsync_object_files mode.");
}
}
if (close(fd) != 0)
die_errno(_("error when closing loose object file"));
}

View File

@@ -27,6 +27,18 @@ struct object_directory {
uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
struct oidtree *loose_objects_cache;
/*
* This is a temporary object store created by the tmp_objdir
* facility. Disable ref updates since the objects in the store
* might be discarded on rollback.
*/
unsigned int disable_ref_updates : 1;
/*
* This object store is ephemeral, so there is no need to fsync.
*/
unsigned int will_destroy : 1;
/*
* Path to the alternative object store. If this is a relative path,
* it is relative to the current working directory.
@@ -58,6 +70,17 @@ void add_to_alternates_file(const char *dir);
*/
void add_to_alternates_memory(const char *dir);
/*
* Replace the current writable object directory with the specified temporary
* object directory; returns the former primary object directory.
*/
struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy);
/*
* Restore a previous ODB replaced by set_temporary_main_odb.
*/
void restore_primary_odb(struct object_directory *restore_odb, const char *old_path);
/*
* Populate and return the loose object cache array corresponding to the
* given object ID.
@@ -68,6 +91,9 @@ struct oidtree *odb_loose_cache(struct object_directory *odb,
/* Empty the loose object cache for the specified object directory. */
void odb_clear_loose_cache(struct object_directory *odb);
/* Clear and free the specified object directory */
void free_object_directory(struct object_directory *odb);
struct packed_git {
struct hashmap_entry packmap_ent;
struct packed_git *next;

View File

@@ -513,7 +513,7 @@ struct raw_object_store *raw_object_store_new(void)
return o;
}
static void free_object_directory(struct object_directory *odb)
void free_object_directory(struct object_directory *odb)
{
free(odb->path);
odb_clear_loose_cache(odb);

2
refs.c
View File

@@ -2137,7 +2137,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction,
break;
}
if (getenv(GIT_QUARANTINE_ENVIRONMENT)) {
if (the_repository->objects->odb->disable_ref_updates) {
strbuf_addstr(err,
_("ref updates forbidden inside quarantine environment"));
return -1;

View File

@@ -80,6 +80,8 @@ void repo_set_gitdir(struct repository *repo,
expand_base_dir(&repo->objects->odb->path, o->object_dir,
repo->commondir, "objects");
repo->objects->odb->disable_ref_updates = o->disable_ref_updates;
free(repo->objects->alternate_db);
repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
expand_base_dir(&repo->graft_file, o->graft_file,

View File

@@ -161,6 +161,7 @@ struct set_gitdir_args {
const char *graft_file;
const char *index_file;
const char *alternate_db;
int disable_ref_updates;
};
void repo_set_gitdir(struct repository *repo, const char *root,

36
t/lib-unique-files.sh Normal file
View File

@@ -0,0 +1,36 @@
# Helper to create files with unique contents
# Create multiple files with unique contents. Takes the number of
# directories, the number of files in each directory, and the base
# directory.
#
# test_create_unique_files 2 3 my_dir -- Creates 2 directories with 3 files
# each in my_dir, all with unique
# contents.
test_create_unique_files() {
test "$#" -ne 3 && BUG "3 param"
local dirs=$1
local files=$2
local basedir=$3
local counter=0
test_tick
local basedata=$test_tick
rm -rf $basedir
for i in $(test_seq $dirs)
do
local dir=$basedir/dir$i
mkdir -p "$dir"
for j in $(test_seq $files)
do
counter=$((counter + 1))
echo "$basedata.$counter" >"$dir/file$j.txt"
done
done
}

43
t/perf/p3700-add.sh Executable file
View File

@@ -0,0 +1,43 @@
#!/bin/sh
#
# This test measures the performance of adding new files to the object database
# and index. The test was originally added to measure the effect of the
# core.fsyncObjectFiles=batch mode, which is why we are testing different values
# of that setting explicitly and creating a lot of unique objects.
test_description="Tests performance of add"
. ./perf-lib.sh
. $TEST_DIRECTORY/lib-unique-files.sh
test_perf_default_repo
test_checkout_worktree
dir_count=10
files_per_dir=50
total_files=$((dir_count * files_per_dir))
# We need to create the files each time we run the perf test, but
# we do not want to measure the cost of creating the files, so run
# the tet once.
if test "${GIT_PERF_REPEAT_COUNT-1}" -ne 1
then
echo "warning: Setting GIT_PERF_REPEAT_COUNT=1" >&2
GIT_PERF_REPEAT_COUNT=1
fi
for m in false true batch
do
test_expect_success "create the files for core.fsyncObjectFiles=$m" '
git reset --hard &&
# create files across directories
test_create_unique_files $dir_count $files_per_dir files
'
test_perf "add $total_files files (core.fsyncObjectFiles=$m)" "
git -c core.fsyncobjectfiles=$m add files
"
done
test_done

46
t/perf/p3900-stash.sh Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/sh
#
# This test measures the performance of adding new files to the object database
# and index. The test was originally added to measure the effect of the
# core.fsyncObjectFiles=batch mode, which is why we are testing different values
# of that setting explicitly and creating a lot of unique objects.
test_description="Tests performance of stash"
. ./perf-lib.sh
. $TEST_DIRECTORY/lib-unique-files.sh
test_perf_default_repo
test_checkout_worktree
dir_count=10
files_per_dir=50
total_files=$((dir_count * files_per_dir))
# We need to create the files each time we run the perf test, but
# we do not want to measure the cost of creating the files, so run
# the tet once.
if test "${GIT_PERF_REPEAT_COUNT-1}" -ne 1
then
echo "warning: Setting GIT_PERF_REPEAT_COUNT=1" >&2
GIT_PERF_REPEAT_COUNT=1
fi
for m in false true batch
do
test_expect_success "create the files for core.fsyncObjectFiles=$m" '
git reset --hard &&
# create files across directories
test_create_unique_files $dir_count $files_per_dir files
'
# We only stash files in the 'files' subdirectory since
# the perf test infrastructure creates files in the
# current working directory that need to be preserved
test_perf "stash 500 files (core.fsyncObjectFiles=$m)" "
git -c core.fsyncobjectfiles=$m stash push -u -- files
"
done
test_done

View File

@@ -8,6 +8,8 @@ test_description='Test of git add, including the -- option.'
TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh
. $TEST_DIRECTORY/lib-unique-files.sh
# Test the file mode "$1" of the file "$2" in the index.
test_mode_in_index () {
case "$(git ls-files -s "$2")" in
@@ -34,6 +36,24 @@ test_expect_success \
'Test that "git add -- -q" works' \
'touch -- -q && git add -- -q'
test_expect_success 'git add: core.fsyncobjectfiles=batch' "
test_create_unique_files 2 4 fsync-files &&
git -c core.fsyncobjectfiles=batch add -- ./fsync-files/ &&
rm -f fsynced_files &&
git ls-files --stage fsync-files/ > fsynced_files &&
test_line_count = 8 fsynced_files &&
awk -- '{print \$2}' fsynced_files | xargs -n1 git cat-file -e
"
test_expect_success 'git update-index: core.fsyncobjectfiles=batch' "
test_create_unique_files 2 4 fsync-files2 &&
find fsync-files2 ! -type d -print | xargs git -c core.fsyncobjectfiles=batch update-index --add -- &&
rm -f fsynced_files2 &&
git ls-files --stage fsync-files2/ > fsynced_files2 &&
test_line_count = 8 fsynced_files2 &&
awk -- '{print \$2}' fsynced_files2 | xargs -n1 git cat-file -e
"
test_expect_success \
'git add: Test that executable bit is not used if core.filemode=0' \
'git config core.filemode 0 &&

View File

@@ -9,6 +9,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
. $TEST_DIRECTORY/lib-unique-files.sh
diff_cmp () {
for i in "$1" "$2"
@@ -1293,6 +1294,19 @@ test_expect_success 'stash handles skip-worktree entries nicely' '
git rev-parse --verify refs/stash:A.t
'
test_expect_success 'stash with core.fsyncobjectfiles=batch' "
test_create_unique_files 2 4 fsync-files &&
git -c core.fsyncobjectfiles=batch stash push -u -- ./fsync-files/ &&
rm -f fsynced_files &&
# The files were untracked, so use the third parent,
# which contains the untracked files
git ls-tree -r stash^3 -- ./fsync-files/ > fsynced_files &&
test_line_count = 8 fsynced_files &&
awk -- '{print \$3}' fsynced_files | xargs -n1 git cat-file -e
"
test_expect_success 'stash -c stash.useBuiltin=false warning ' '
expected="stash.useBuiltin support has been removed" &&

View File

@@ -162,23 +162,23 @@ test_expect_success 'pack-objects with bogus arguments' '
check_unpack () {
test_when_finished "rm -rf git2" &&
git init --bare git2 &&
git -C git2 unpack-objects -n <"$1".pack &&
git -C git2 unpack-objects <"$1".pack &&
(cd .git && find objects -type f -print) |
while read path
do
cmp git2/$path .git/$path || {
echo $path differs.
return 1
}
done
git $2 init --bare git2 &&
(
git $2 -C git2 unpack-objects -n <"$1".pack &&
git $2 -C git2 unpack-objects <"$1".pack &&
git $2 -C git2 cat-file --batch-check="%(objectname)"
) <obj-list >current &&
cmp obj-list current
}
test_expect_success 'unpack without delta' '
check_unpack test-1-${packname_1}
'
test_expect_success 'unpack without delta (core.fsyncobjectfiles=batch)' '
check_unpack test-1-${packname_1} "-c core.fsyncobjectfiles=batch"
'
test_expect_success 'pack with REF_DELTA' '
packname_2=$(git pack-objects --progress test-2 <obj-list 2>stderr) &&
check_deltas stderr -gt 0
@@ -188,6 +188,10 @@ test_expect_success 'unpack with REF_DELTA' '
check_unpack test-2-${packname_2}
'
test_expect_success 'unpack with REF_DELTA (core.fsyncobjectfiles=batch)' '
check_unpack test-2-${packname_2} "-c core.fsyncobjectfiles=batch"
'
test_expect_success 'pack with OFS_DELTA' '
packname_3=$(git pack-objects --progress --delta-base-offset test-3 \
<obj-list 2>stderr) &&
@@ -198,6 +202,10 @@ test_expect_success 'unpack with OFS_DELTA' '
check_unpack test-3-${packname_3}
'
test_expect_success 'unpack with OFS_DELTA (core.fsyncobjectfiles=batch)' '
check_unpack test-3-${packname_3} "-c core.fsyncobjectfiles=batch"
'
test_expect_success 'compare delta flavors' '
perl -e '\''
defined($_ = -s $_) or die for @ARGV;

View File

@@ -1,5 +1,6 @@
#include "cache.h"
#include "tmp-objdir.h"
#include "chdir-notify.h"
#include "dir.h"
#include "sigchain.h"
#include "string-list.h"
@@ -11,6 +12,8 @@
struct tmp_objdir {
struct strbuf path;
struct strvec env;
struct object_directory *prev_odb;
int will_destroy;
};
/*
@@ -38,6 +41,9 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
if (t == the_tmp_objdir)
the_tmp_objdir = NULL;
if (!on_signal && t->prev_odb)
restore_primary_odb(t->prev_odb, t->path.buf);
/*
* This may use malloc via strbuf_grow(), but we should
* have pre-grown t->path sufficiently so that this
@@ -52,6 +58,7 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
*/
if (!on_signal)
tmp_objdir_free(t);
return err;
}
@@ -121,7 +128,7 @@ static int setup_tmp_objdir(const char *root)
return ret;
}
struct tmp_objdir *tmp_objdir_create(void)
struct tmp_objdir *tmp_objdir_create(const char *prefix)
{
static int installed_handlers;
struct tmp_objdir *t;
@@ -129,11 +136,16 @@ struct tmp_objdir *tmp_objdir_create(void)
if (the_tmp_objdir)
BUG("only one tmp_objdir can be used at a time");
t = xmalloc(sizeof(*t));
t = xcalloc(1, sizeof(*t));
strbuf_init(&t->path, 0);
strvec_init(&t->env);
strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory());
/*
* Use a string starting with tmp_ so that the builtin/prune.c code
* can recognize any stale objdirs left behind by a crash and delete
* them.
*/
strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", get_object_directory(), prefix);
/*
* Grow the strbuf beyond any filename we expect to be placed in it.
@@ -269,6 +281,13 @@ int tmp_objdir_migrate(struct tmp_objdir *t)
if (!t)
return 0;
if (t->prev_odb) {
if (the_repository->objects->odb->will_destroy)
BUG("migrating an ODB that was marked for destruction");
restore_primary_odb(t->prev_odb, t->path.buf);
t->prev_odb = NULL;
}
strbuf_addbuf(&src, &t->path);
strbuf_addstr(&dst, get_object_directory());
@@ -292,3 +311,33 @@ void tmp_objdir_add_as_alternate(const struct tmp_objdir *t)
{
add_to_alternates_memory(t->path.buf);
}
void tmp_objdir_replace_primary_odb(struct tmp_objdir *t, int will_destroy)
{
if (t->prev_odb)
BUG("the primary object database is already replaced");
t->prev_odb = set_temporary_primary_odb(t->path.buf, will_destroy);
t->will_destroy = will_destroy;
}
struct tmp_objdir *tmp_objdir_unapply_primary_odb(void)
{
if (!the_tmp_objdir || !the_tmp_objdir->prev_odb)
return NULL;
restore_primary_odb(the_tmp_objdir->prev_odb, the_tmp_objdir->path.buf);
the_tmp_objdir->prev_odb = NULL;
return the_tmp_objdir;
}
void tmp_objdir_reapply_primary_odb(struct tmp_objdir *t, const char *old_cwd,
const char *new_cwd)
{
char *path;
path = reparent_relative_path(old_cwd, new_cwd, t->path.buf);
strbuf_reset(&t->path);
strbuf_addstr(&t->path, path);
free(path);
tmp_objdir_replace_primary_odb(t, t->will_destroy);
}

View File

@@ -10,7 +10,7 @@
*
* Example:
*
* struct tmp_objdir *t = tmp_objdir_create();
* struct tmp_objdir *t = tmp_objdir_create("incoming");
* if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
* !tmp_objdir_migrate(t))
* printf("success!\n");
@@ -22,9 +22,10 @@
struct tmp_objdir;
/*
* Create a new temporary object directory; returns NULL on failure.
* Create a new temporary object directory with the specified prefix;
* returns NULL on failure.
*/
struct tmp_objdir *tmp_objdir_create(void);
struct tmp_objdir *tmp_objdir_create(const char *prefix);
/*
* Return a list of environment strings, suitable for use with
@@ -51,4 +52,26 @@ int tmp_objdir_destroy(struct tmp_objdir *);
*/
void tmp_objdir_add_as_alternate(const struct tmp_objdir *);
/*
* Replaces the main object store in the current process with the temporary
* object directory and makes the former main object store an alternate.
* If will_destroy is nonzero, the object directory may not be migrated.
*/
void tmp_objdir_replace_primary_odb(struct tmp_objdir *, int will_destroy);
/*
* If the primary object database was replaced by a temporary object directory,
* restore it to its original value while keeping the directory contents around.
* Returns NULL if the primary object database was not replaced.
*/
struct tmp_objdir *tmp_objdir_unapply_primary_odb(void);
/*
* Reapplies the former primary temporary object database, after protentially
* changing its relative path.
*/
void tmp_objdir_reapply_primary_odb(struct tmp_objdir *, const char *old_cwd,
const char *new_cwd);
#endif /* TMP_OBJDIR_H */

View File

@@ -546,6 +546,54 @@ int xmkstemp_mode(char *filename_template, int mode)
return fd;
}
int git_fsync(int fd, enum fsync_action action)
{
switch (action) {
case FSYNC_WRITEOUT_ONLY:
#ifdef __APPLE__
/*
* on macOS, fsync just causes filesystem cache writeback but does not
* flush hardware caches.
*/
return fsync(fd);
#endif
#ifdef HAVE_SYNC_FILE_RANGE
/*
* On linux 2.6.17 and above, sync_file_range is the way to issue
* a writeback without a hardware flush. An offset of 0 and size of 0
* indicates writeout of the entire file and the wait flags ensure that all
* dirty data is written to the disk (potentially in a disk-side cache)
* before we continue.
*/
return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER);
#endif
#ifdef fsync_no_flush
return fsync_no_flush(fd);
#endif
errno = ENOSYS;
return -1;
case FSYNC_HARDWARE_FLUSH:
#ifdef __APPLE__
return fcntl(fd, F_FULLFSYNC);
#else
return fsync(fd);
#endif
default:
BUG("unexpected git_fsync(%d) call", action);
}
}
static int warn_if_unremovable(const char *op, const char *file, int rc)
{
int err;

View File

@@ -57,7 +57,7 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
void fsync_or_die(int fd, const char *msg)
{
while (fsync(fd) < 0) {
while (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0) {
if (errno != EINTR)
die_errno("fsync error on '%s'", msg);
}