Merge branch 'nd/stream-more' into next

Use API to read blob data in smaller chunks in more places to
reduce the memory footprint.  In general, looked fairly good.

Will merge to 'master'.

By Nguyễn Thái Ngọc Duy (6) and Junio C Hamano (1)
* nd/stream-more:
  update-server-info: respect core.bigfilethreshold
  fsck: use streaming API for writing lost-found blobs
  show: use streaming API for showing blobs
  parse_object: avoid putting whole blob in core
  cat-file: use streaming API to print blobs
  Add more large blob test cases
  streaming: make streaming-write-entry to be more reusable
This commit is contained in:
Junio C Hamano
2012-04-11 12:17:52 -07:00
11 changed files with 221 additions and 75 deletions

View File

@@ -11,6 +11,7 @@
#include "parse-options.h"
#include "diff.h"
#include "userdiff.h"
#include "streaming.h"
#define BATCH 1
#define BATCH_CHECK 2
@@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
return cmd_ls_tree(2, ls_args, NULL);
}
if (type == OBJ_BLOB)
return stream_blob_to_fd(1, sha1, NULL, 0);
buf = read_sha1_file(sha1, &type, &size);
if (!buf)
die("Cannot read object %s", obj_name);
@@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
break;
case 0:
if (type_from_string(exp_type) == OBJ_BLOB) {
unsigned char blob_sha1[20];
if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
enum object_type type;
unsigned long size;
char *buffer = read_sha1_file(sha1, &type, &size);
if (memcmp(buffer, "object ", 7) ||
get_sha1_hex(buffer + 7, blob_sha1))
die("%s not a valid tag", sha1_to_hex(sha1));
free(buffer);
} else
hashcpy(blob_sha1, sha1);
if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
return stream_blob_to_fd(1, blob_sha1, NULL, 0);
/*
* we attempted to dereference a tag to a blob
* and failed; there may be new dereference
* mechanisms this code is not aware of.
* fall-back to the usual case.
*/
}
buf = read_object_with_reference(sha1, exp_type, &size, NULL);
break;

View File

@@ -12,6 +12,7 @@
#include "parse-options.h"
#include "dir.h"
#include "progress.h"
#include "streaming.h"
#define REACHABLE 0x0001
#define SEEN 0x0002
@@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj)
if (!(f = fopen(filename, "w")))
die_errno("Could not open '%s'", filename);
if (obj->type == OBJ_BLOB) {
enum object_type type;
unsigned long size;
char *buf = read_sha1_file(obj->sha1,
&type, &size);
if (buf && fwrite(buf, 1, size, f) != size)
if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
die_errno("Could not write '%s'", filename);
free(buf);
} else
fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
if (fclose(f))

View File

@@ -20,6 +20,7 @@
#include "string-list.h"
#include "parse-options.h"
#include "branch.h"
#include "streaming.h"
/* Set a default date-time format for git log ("log.date" config variable) */
static const char *default_date_mode = NULL;
@@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
strbuf_release(&out);
}
static int show_object(const unsigned char *sha1, int show_tag_object,
struct rev_info *rev)
static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
{
fflush(stdout);
return stream_blob_to_fd(1, sha1, NULL, 0);
}
static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
{
unsigned long size;
enum object_type type;
@@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object,
if (!buf)
return error(_("Could not read object %s"), sha1_to_hex(sha1));
if (show_tag_object)
while (offset < size && buf[offset] != '\n') {
int new_offset = offset + 1;
while (new_offset < size && buf[new_offset++] != '\n')
; /* do nothing */
if (!prefixcmp(buf + offset, "tagger "))
show_tagger(buf + offset + 7,
new_offset - offset - 7, rev);
offset = new_offset;
}
assert(type == OBJ_TAG);
while (offset < size && buf[offset] != '\n') {
int new_offset = offset + 1;
while (new_offset < size && buf[new_offset++] != '\n')
; /* do nothing */
if (!prefixcmp(buf + offset, "tagger "))
show_tagger(buf + offset + 7,
new_offset - offset - 7, rev);
offset = new_offset;
}
if (offset < size)
fwrite(buf + offset, size - offset, 1, stdout);
@@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
const char *name = objects[i].name;
switch (o->type) {
case OBJ_BLOB:
ret = show_object(o->sha1, 0, NULL);
ret = show_blob_object(o->sha1, NULL);
break;
case OBJ_TAG: {
struct tag *t = (struct tag *)o;
@@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
diff_get_color_opt(&rev.diffopt, DIFF_COMMIT),
t->tag,
diff_get_color_opt(&rev.diffopt, DIFF_RESET));
ret = show_object(o->sha1, 1, &rev);
ret = show_tag_object(o->sha1, &rev);
rev.shown_one = 1;
if (ret)
break;

View File

@@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix)
OPT_END()
};
git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix, options,
update_server_info_usage, 0);
if (argc > 0)

53
entry.c
View File

@@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
int result = -1;
ssize_t kept = 0;
int fd = -1;
st = open_istream(ce->sha1, &type, &sz, filter);
if (!st)
return -1;
if (type != OBJ_BLOB)
goto close_and_exit;
int fd;
fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (!readlen)
break;
if (sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);
if (wrote != readlen)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
write(fd, "", 1) != 1))
goto close_and_exit;
*fstat_done = fstat_output(fd, state, statbuf);
close_and_exit:
close_istream(st);
if (0 <= fd)
if (0 <= fd) {
result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
*fstat_done = fstat_output(fd, state, statbuf);
result = close(fd);
}
if (result && 0 <= fd)
unlink(path);
return result;

View File

@@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
if (obj && obj->parsed)
return obj;
if ((obj && obj->type == OBJ_BLOB) ||
(!obj && has_sha1_file(sha1) &&
sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
error("sha1 mismatch %s\n", sha1_to_hex(repl));
return NULL;
}
parse_blob_buffer(lookup_blob(sha1), NULL, 0);
return lookup_object(sha1);
}
buffer = read_sha1_file(sha1, &type, &size);
if (buffer) {
if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {

View File

@@ -19,6 +19,7 @@
#include "pack-revindex.h"
#include "sha1-lookup.h"
#include "bulk-checkin.h"
#include "streaming.h"
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
return NULL;
}
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
/*
* With an in-core object data in "map", rehash it to make sure the
* object name actually matches "sha1" to detect object corruption.
* With "map" == NULL, try reading the object named with "sha1" using
* the streaming interface and rehash it to do the same.
*/
int check_sha1_signature(const unsigned char *sha1, void *map,
unsigned long size, const char *type)
{
unsigned char real_sha1[20];
hash_sha1_file(map, size, type, real_sha1);
enum object_type obj_type;
struct git_istream *st;
git_SHA_CTX c;
char hdr[32];
int hdrlen;
if (map) {
hash_sha1_file(map, size, type, real_sha1);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}
st = open_istream(sha1, &obj_type, &size, NULL);
if (!st)
return -1;
/* Generate the header */
hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
/* Sha1.. */
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, hdrlen);
for (;;) {
char buf[1024 * 16];
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (!readlen)
break;
git_SHA1_Update(&c, buf, readlen);
}
git_SHA1_Final(real_sha1, &c);
close_istream(st);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}

View File

@@ -489,3 +489,58 @@ static open_method_decl(incore)
return st->u.incore.buf ? 0 : -1;
}
/****************************************************************
* Users of streaming interface
****************************************************************/
int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
int can_seek)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
ssize_t kept = 0;
int result = -1;
st = open_istream(sha1, &type, &sz, filter);
if (!st)
return result;
if (type != OBJ_BLOB)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (!readlen)
break;
if (can_seek && sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);
if (wrote != readlen)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
write(fd, "", 1) != 1))
goto close_and_exit;
result = 0;
close_and_exit:
close_istream(st);
return result;
}

View File

@@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
extern int close_istream(struct git_istream *);
extern ssize_t read_istream(struct git_istream *, char *, size_t);
extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);
#endif /* STREAMING_H */

View File

@@ -6,11 +6,15 @@ test_description='adding and checking out large blobs'
. ./test-lib.sh
test_expect_success setup '
git config core.bigfilethreshold 200k &&
# clone does not allow us to pass core.bigfilethreshold to
# new repos, so set core.bigfilethreshold globally
git config --global core.bigfilethreshold 200k &&
echo X | dd of=large1 bs=1k seek=2000 &&
echo X | dd of=large2 bs=1k seek=2000 &&
echo X | dd of=large3 bs=1k seek=2000 &&
echo Y | dd of=huge bs=1k seek=2500
echo Y | dd of=huge bs=1k seek=2500 &&
GIT_ALLOC_LIMIT=1500 &&
export GIT_ALLOC_LIMIT
'
test_expect_success 'add a large file or two' '
@@ -100,4 +104,34 @@ test_expect_success 'packsize limit' '
)
'
test_expect_success 'diff --raw' '
git commit -q -m initial &&
echo modified >>large1 &&
git add large1 &&
git commit -q -m modified &&
git diff --raw HEAD^
'
test_expect_success 'hash-object' '
git hash-object large1
'
test_expect_success 'cat-file a large file' '
git cat-file blob :large1 >/dev/null
'
test_expect_success 'cat-file a large file from a tag' '
git tag -m largefile largefiletag :large1 &&
git cat-file blob largefiletag >/dev/null
'
test_expect_success 'git-show a large file' '
git show :large1 >/dev/null
'
test_expect_success 'repack' '
git repack -ad
'
test_done

View File

@@ -9,6 +9,18 @@ static void do_nothing(size_t size)
static void (*try_to_free_routine)(size_t size) = do_nothing;
static void memory_limit_check(size_t size)
{
static int limit = -1;
if (limit == -1) {
const char *env = getenv("GIT_ALLOC_LIMIT");
limit = env ? atoi(env) * 1024 : 0;
}
if (limit && size > limit)
die("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
}
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
{
try_to_free_t old = try_to_free_routine;
@@ -32,7 +44,10 @@ char *xstrdup(const char *str)
void *xmalloc(size_t size)
{
void *ret = malloc(size);
void *ret;
memory_limit_check(size);
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret) {
@@ -79,7 +94,10 @@ char *xstrndup(const char *str, size_t len)
void *xrealloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
void *ret;
memory_limit_check(size);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
if (!ret) {
@@ -95,7 +113,10 @@ void *xrealloc(void *ptr, size_t size)
void *xcalloc(size_t nmemb, size_t size)
{
void *ret = calloc(nmemb, size);
void *ret;
memory_limit_check(size * nmemb);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
if (!ret) {