diff --git a/Makefile b/Makefile index 9eff9117dd..a350f63ab0 100644 --- a/Makefile +++ b/Makefile @@ -731,6 +731,7 @@ TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-path-utils.o TEST_BUILTINS_OBJS += test-pkt-line.o +TEST_BUILTINS_OBJS += test-prefix-map.o TEST_BUILTINS_OBJS += test-prio-queue.o TEST_BUILTINS_OBJS += test-reach.o TEST_BUILTINS_OBJS += test-read-cache.o @@ -947,6 +948,7 @@ LIB_OBJS += patch-ids.o LIB_OBJS += path.o LIB_OBJS += pathspec.o LIB_OBJS += pkt-line.o +LIB_OBJS += prefix-map.o LIB_OBJS += preload-index.o LIB_OBJS += pretty.o LIB_OBJS += prio-queue.o diff --git a/prefix-map.c b/prefix-map.c new file mode 100644 index 0000000000..747ddb4ebc --- /dev/null +++ b/prefix-map.c @@ -0,0 +1,109 @@ +#include "cache.h" +#include "prefix-map.h" + +static int map_cmp(const void *unused_cmp_data, + const void *entry, + const void *entry_or_key, + const void *unused_keydata) +{ + const struct prefix_map_entry *a = entry; + const struct prefix_map_entry *b = entry_or_key; + + return a->prefix_length != b->prefix_length || + strncmp(a->name, b->name, a->prefix_length); +} + +static void add_prefix_entry(struct hashmap *map, const char *name, + size_t prefix_length, struct prefix_item *item) +{ + struct prefix_map_entry *result = xmalloc(sizeof(*result)); + result->name = name; + result->prefix_length = prefix_length; + result->item = item; + hashmap_entry_init(result, memhash(name, prefix_length)); + hashmap_add(map, result); +} + +static void init_prefix_map(struct prefix_map *prefix_map, + int min_prefix_length, int max_prefix_length) +{ + hashmap_init(&prefix_map->map, map_cmp, NULL, 0); + prefix_map->min_length = min_prefix_length; + prefix_map->max_length = max_prefix_length; +} + +static void add_prefix_item(struct prefix_map *prefix_map, + struct prefix_item *item) +{ + struct prefix_map_entry e = { { NULL } }, *e2; + int j; + + e.item = item; + e.name = item->name; + + for (j = prefix_map->min_length; + j <= prefix_map->max_length && e.name[j]; j++) { + /* Avoid breaking UTF-8 multi-byte sequences */ + if (!isascii(e.name[j])) + break; + + e.prefix_length = j; + hashmap_entry_init(&e, memhash(e.name, j)); + e2 = hashmap_get(&prefix_map->map, &e, NULL); + if (!e2) { + /* prefix is unique at this stage */ + item->prefix_length = j; + add_prefix_entry(&prefix_map->map, e.name, j, item); + break; + } + + if (!e2->item) + continue; /* non-unique prefix */ + + if (j != e2->item->prefix_length || memcmp(e.name, e2->name, j)) + BUG("unexpected prefix length: %d != %d (%s != %s)", + j, (int)e2->item->prefix_length, e.name, e2->name); + + /* skip common prefix */ + for (; j < prefix_map->max_length && e.name[j]; j++) { + if (e.item->name[j] != e2->item->name[j]) + break; + add_prefix_entry(&prefix_map->map, e.name, j + 1, + NULL); + } + + /* e2 no longer refers to a unique prefix */ + if (j < prefix_map->max_length && e2->name[j]) { + /* found a new unique prefix for e2's item */ + e2->item->prefix_length = j + 1; + add_prefix_entry(&prefix_map->map, e2->name, j + 1, + e2->item); + } + else + e2->item->prefix_length = 0; + e2->item = NULL; + + if (j < prefix_map->max_length && e.name[j]) { + /* found a unique prefix for the item */ + e.item->prefix_length = j + 1; + add_prefix_entry(&prefix_map->map, e.name, j + 1, + e.item); + } else + /* item has no (short enough) unique prefix */ + e.item->prefix_length = 0; + + break; + } +} + +void find_unique_prefixes(struct prefix_item **list, size_t nr, + int min_length, int max_length) +{ + int i; + struct prefix_map prefix_map; + + init_prefix_map(&prefix_map, min_length, max_length); + for (i = 0; i < nr; i++) + add_prefix_item(&prefix_map, list[i]); + hashmap_free(&prefix_map.map, 1); +} diff --git a/prefix-map.h b/prefix-map.h new file mode 100644 index 0000000000..ce3b8a4a32 --- /dev/null +++ b/prefix-map.h @@ -0,0 +1,40 @@ +#ifndef PREFIX_MAP_H +#define PREFIX_MAP_H + +#include "hashmap.h" + +struct prefix_item { + const char *name; + size_t prefix_length; +}; + +struct prefix_map_entry { + struct hashmap_entry e; + const char *name; + size_t prefix_length; + /* if item is NULL, the prefix is not unique */ + struct prefix_item *item; +}; + +struct prefix_map { + struct hashmap map; + int min_length, max_length; +}; + +/* + * Find unique prefixes in a given list of strings. + * + * Typically, the `struct prefix_item` information will be but a field in the + * actual item struct; For this reason, the `list` parameter is specified as a + * list of pointers to the items. + * + * The `min_length`/`max_length` parameters define what length the unique + * prefixes should have. + * + * If no unique prefix could be found for a given item, its `prefix_length` + * will be set to 0. + */ +void find_unique_prefixes(struct prefix_item **list, size_t nr, + int min_length, int max_length); + +#endif diff --git a/t/helper/test-prefix-map.c b/t/helper/test-prefix-map.c new file mode 100644 index 0000000000..3f1c90eaf0 --- /dev/null +++ b/t/helper/test-prefix-map.c @@ -0,0 +1,58 @@ +#include "test-tool.h" +#include "cache.h" +#include "prefix-map.h" + +static size_t test_count, failed_count; + +static void check(int succeeded, const char *file, size_t line_no, + const char *fmt, ...) +{ + va_list ap; + + test_count++; + if (succeeded) + return; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, (int)line_no); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + va_end(ap); + + failed_count++; +} + +#define EXPECT_SIZE_T_EQUALS(expect, actual, hint) \ + check(expect == actual, __FILE__, __LINE__, \ + "size_t's do not match: %" \ + PRIdMAX " != %" PRIdMAX " (%s) (%s)", \ + (intmax_t)expect, (intmax_t)actual, #actual, hint) + +int cmd__prefix_map(int argc, const char **argv) +{ +#define NR 5 + struct prefix_item items[NR] = { + { "unique" }, + { "hell" }, + { "hello" }, + { "wok" }, + { "world" }, + }; + struct prefix_item *list[NR] = { + items, items + 1, items + 2, items + 3, items + 4 + }; + + find_unique_prefixes(list, NR, 1, 3); + +#define EXPECT_PREFIX_LENGTH_EQUALS(expect, index) \ + EXPECT_SIZE_T_EQUALS(expect, list[index]->prefix_length, \ + list[index]->name) + + EXPECT_PREFIX_LENGTH_EQUALS(1, 0); + EXPECT_PREFIX_LENGTH_EQUALS(0, 1); + EXPECT_PREFIX_LENGTH_EQUALS(0, 2); + EXPECT_PREFIX_LENGTH_EQUALS(3, 3); + EXPECT_PREFIX_LENGTH_EQUALS(3, 4); + + return !!failed_count; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 087a8c0cc9..e076771b96 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -39,6 +39,7 @@ static struct test_cmd cmds[] = { { "parse-options", cmd__parse_options }, { "path-utils", cmd__path_utils }, { "pkt-line", cmd__pkt_line }, + { "prefix-map", cmd__prefix_map }, { "prio-queue", cmd__prio_queue }, { "reach", cmd__reach }, { "read-cache", cmd__read_cache }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7e703f3038..891bac703b 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -29,6 +29,7 @@ int cmd__online_cpus(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv); int cmd__path_utils(int argc, const char **argv); int cmd__pkt_line(int argc, const char **argv); +int cmd__prefix_map(int argc, const char **argv); int cmd__prio_queue(int argc, const char **argv); int cmd__reach(int argc, const char **argv); int cmd__read_cache(int argc, const char **argv); diff --git a/t/t0016-prefix-map.sh b/t/t0016-prefix-map.sh new file mode 100755 index 0000000000..187fa92aec --- /dev/null +++ b/t/t0016-prefix-map.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +test_description='basic tests for prefix map' +. ./test-lib.sh + +test_expect_success 'prefix map' ' + test-tool prefix-map +' + +test_done