diff options
author | Vivien Kraus <vivien@planete-kraus.eu> | 2023-04-11 19:57:52 +0200 |
---|---|---|
committer | Vivien Kraus <vivien@planete-kraus.eu> | 2023-04-29 11:58:03 +0200 |
commit | 2547ad75960088f1aced986ef541c57bab856e34 (patch) | |
tree | 6f8b80a87859a92fc386e605e385a9efabecb23b | |
parent | e5b8a280da6511b606e2750207df16596cee27a2 (diff) |
trie node: introduce a struct to hold all the fields together
-rw-r--r-- | src/libdisfluid/disfluid-tests.h | 75 | ||||
-rw-r--r-- | src/libdisfluid/disfluid-trie-node.h | 318 |
2 files changed, 236 insertions, 157 deletions
diff --git a/src/libdisfluid/disfluid-tests.h b/src/libdisfluid/disfluid-tests.h index 730739e..e77b497 100644 --- a/src/libdisfluid/disfluid-tests.h +++ b/src/libdisfluid/disfluid-tests.h @@ -437,32 +437,33 @@ START_TEST (test_db_trie_inner) char *database; prepare_database (&database); static const char *hello = "hello"; - const string_desc_t common_part = { + string_desc_t common_part = { ._nbytes = 5, ._data = (char *) hello }; static const char series[32] = { 1, 2, 3, 4, 5 }; - const string_desc_t branches[256] = { - {0}, - { - ._nbytes = 32, - ._data = (char *) series}, - {0} + string_desc_t interesting_branch = { + ._nbytes = 32, + ._data = (char *) series }; ck_assert_int_eq (common_part._nbytes, strlen (hello)); - ck_assert_int_eq (branches[0]._nbytes, 0); - ck_assert_int_eq (branches[1]._nbytes, sizeof (series)); + ck_assert_int_eq (interesting_branch._nbytes, sizeof (series)); + const struct trie_node node = { + .common_part = common_part, + .branches = {{0}, interesting_branch, {0}}, + .is_leaf = false + }; string_desc_t node_id = { 0 }; - ck_assert_int_eq (db_write_inner_trie_node - (database, &node_id, common_part, branches), 0); - string_desc_t actual_common_part = { 0 }; - string_desc_t actual_leaf_id = { 0 }; - string_desc_t actual_branches[256] = { 0 }; - bool actually_leaf = true; - ck_assert_int_eq (db_read_trie_node - (database, &node_id, &actual_common_part, &actually_leaf, - &actual_leaf_id, actual_branches), 0); - ck_assert (!actually_leaf); + ck_assert_int_eq (db_write_trie_node (database, &node_id, &node), 0); + struct trie_node *actual_node = NULL; + ck_assert_int_eq (db_read_trie_node (database, node_id, &actual_node), 0); + ck_assert (!trie_node_is_leaf (actual_node)); + const string_desc_t actual_common_part = + trie_node_common_part (actual_node); + const string_desc_t actual_branches[2] = { + trie_node_branch (actual_node, 0), + trie_node_branch (actual_node, 1) + }; ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes); ck_assert_int_eq (memcmp (actual_common_part._data, common_part._data, @@ -471,12 +472,7 @@ START_TEST (test_db_trie_inner) ck_assert_ptr_null (actual_branches[0]._data); ck_assert_int_eq (actual_branches[1]._nbytes, 32); ck_assert_int_eq (memcmp (actual_branches[1]._data, series, 32), 0); - FREE (actual_common_part._data); - FREE (actual_leaf_id._data); - for (size_t i = 0; i < 256; i++) - { - FREE (actual_branches[i]._data); - } + trie_node_free (&actual_node); FREE (node_id._data); cleanup_database (&database); } @@ -503,17 +499,19 @@ START_TEST (test_db_trie_leaf) }; ck_assert_int_eq (common_part._nbytes, strlen (hello)); ck_assert_int_eq (leaf_id._nbytes, sizeof (series)); + const struct trie_node node = { + .common_part = common_part, + .branches = {leaf_id, {0}}, + .is_leaf = true + }; string_desc_t node_id = { 0 }; - ck_assert_int_eq (db_write_leaf_trie_node - (database, &node_id, common_part, &leaf_id), 0); - string_desc_t actual_common_part = { 0 }; - string_desc_t actual_leaf_id = { 0 }; - string_desc_t actual_branches[256] = { 0 }; - bool actually_leaf = false; - ck_assert_int_eq (db_read_trie_node - (database, &node_id, &actual_common_part, &actually_leaf, - &actual_leaf_id, actual_branches), 0); - ck_assert (actually_leaf); + ck_assert_int_eq (db_write_trie_node (database, &node_id, &node), 0); + struct trie_node *actual_node = NULL; + ck_assert_int_eq (db_read_trie_node (database, node_id, &actual_node), 0); + const string_desc_t actual_common_part = + trie_node_common_part (actual_node); + ck_assert (trie_node_is_leaf (actual_node)); + const string_desc_t actual_leaf_id = trie_node_leaf_id (actual_node); ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes); ck_assert_int_eq (memcmp (actual_common_part._data, common_part._data, @@ -521,12 +519,7 @@ START_TEST (test_db_trie_leaf) ck_assert_int_eq (memcmp (actual_leaf_id._data, leaf_id._data, leaf_id._nbytes), 0); - FREE (actual_common_part._data); - FREE (actual_leaf_id._data); - for (size_t i = 0; i < 256; i++) - { - FREE (actual_branches[i]._data); - } + trie_node_free (&actual_node); FREE (node_id._data); cleanup_database (&database); } diff --git a/src/libdisfluid/disfluid-trie-node.h b/src/libdisfluid/disfluid-trie-node.h index 276d7f9..caa5df7 100644 --- a/src/libdisfluid/disfluid-trie-node.h +++ b/src/libdisfluid/disfluid-trie-node.h @@ -4,63 +4,188 @@ # include <config.h> # include "string-desc.h" +struct trie_node; + MAYBE_UNUSED static int -db_mark_trie_node (const char *db_root, const string_desc_t * id, - int (*mark_leaf) (void *, const string_desc_t *), - void *context); +trie_node_init_leaf (struct trie_node **node, + const string_desc_t common_part, + const string_desc_t leaf_id); MAYBE_UNUSED static int -db_read_trie_node (const char *db_root, - const string_desc_t * id, - string_desc_t * common_part, - bool *is_leaf, - string_desc_t * leaf_id, string_desc_t branches[256]); +trie_node_init_node (struct trie_node **node, + const string_desc_t common_part, + const string_desc_t branches[256]); + +MAYBE_UNUSED static void trie_node_free (struct trie_node **node); + +MAYBE_UNUSED static const string_desc_t +trie_node_common_part (const struct trie_node *node); + +MAYBE_UNUSED static bool trie_node_is_leaf (const struct trie_node *node); + +MAYBE_UNUSED static const string_desc_t +trie_node_leaf_id (const struct trie_node *node); + +MAYBE_UNUSED static const string_desc_t +trie_node_branch (const struct trie_node *node, char branch); + +MAYBE_UNUSED static int +db_mark_trie_node (const char *db_root, const string_desc_t id, + int (*mark_leaf) (void *, const string_desc_t), + void *context); MAYBE_UNUSED static int -db_write_leaf_trie_node (const char *db_root, - string_desc_t * id, - const string_desc_t common_part, - const string_desc_t * leaf_id); +db_read_trie_node (const char *db_root, + const string_desc_t id, struct trie_node **node); MAYBE_UNUSED static int -db_write_inner_trie_node (const char *db_root, - string_desc_t * id, - const string_desc_t common_part, - const string_desc_t branches[256]); +db_write_trie_node (const char *db_root, + string_desc_t * id, const struct trie_node *node); # include "disfluid-db.h" +# include "disfluid-init.h" # include "safe-alloc.h" +struct trie_node +{ + string_desc_t common_part; + string_desc_t branches[256]; + /* If a leaf, only branches[0] is set. */ + bool is_leaf; +}; + +static int +trie_node_init_full (struct trie_node **node, + const string_desc_t common_part, + bool is_leaf, const string_desc_t branches[256]) +{ + assert (*node == NULL); + ensure_init (); + int error = 0; + if (ALLOC (*node) < 0) + { + error = -2; + goto cleanup; + } + if (ALLOC_N ((*node)->common_part._data, common_part._nbytes) < 0) + { + error = -2; + goto cleanup; + } + (*node)->common_part._nbytes = common_part._nbytes; + memcpy ((*node)->common_part._data, common_part._data, common_part._nbytes); + (*node)->is_leaf = is_leaf; + for (size_t i = 0; i < 256; i++) + { + (*node)->branches[i]._nbytes = branches[i]._nbytes; + if (branches[i]._nbytes != 0) + { + if (ALLOC_N ((*node)->branches[i]._data, branches[i]._nbytes) < 0) + { + error = -2; + goto cleanup; + } + memcpy ((*node)->branches[i]._data, branches[i]._data, + branches[i]._nbytes); + } + } +cleanup: + if (error < 0) + { + trie_node_free (node); + } + return error; +} + +MAYBE_UNUSED static int +trie_node_init_leaf (struct trie_node **node, + const string_desc_t common_part, + const string_desc_t leaf_id) +{ + const string_desc_t branches[256] = { + /* Only the first one is set. */ + { + ._nbytes = leaf_id._nbytes, + ._data = (char *) (leaf_id._data)}, + {0} + }; + return trie_node_init_full (node, common_part, true, branches); +} + +MAYBE_UNUSED static int +trie_node_init_node (struct trie_node **node, + const string_desc_t common_part, + const string_desc_t branches[256]) +{ + return trie_node_init_full (node, common_part, false, branches); +} + +MAYBE_UNUSED static void +trie_node_free (struct trie_node **node) +{ + if (*node != NULL) + { + for (size_t i = 0; i < 256; i++) + { + FREE ((*node)->branches[i]._data); + } + FREE ((*node)->common_part._data); + } + FREE (*node); +} + +MAYBE_UNUSED static const string_desc_t +trie_node_common_part (const struct trie_node *node) +{ + return node->common_part; +} + +MAYBE_UNUSED static bool +trie_node_is_leaf (const struct trie_node *node) +{ + return node->is_leaf; +} + +MAYBE_UNUSED static const string_desc_t +trie_node_leaf_id (const struct trie_node *node) +{ + assert (node->is_leaf); + return node->branches[0]; +} + +MAYBE_UNUSED static const string_desc_t +trie_node_branch (const struct trie_node *node, char branch) +{ + assert (!node->is_leaf); + return node->branches[(uint8_t) branch]; +} + static int db_mark_trie_node (const char *db_root, - const string_desc_t * id, - int (*mark_leaf) (void *, const string_desc_t *), + const string_desc_t id, + int (*mark_leaf) (void *, const string_desc_t), void *context) { - bool is_leaf; - string_desc_t common_part = { 0 }; - string_desc_t leaf_id = { 0 }; - string_desc_t branches[256] = { 0 }; - int error = - db_read_trie_node (db_root, id, &common_part, &is_leaf, &leaf_id, - branches); + struct trie_node *node = NULL; + int error = db_read_trie_node (db_root, id, &node); if (error < 0) { goto cleanup; } - if (is_leaf) + if (trie_node_is_leaf (node)) { - error = mark_leaf (context, &leaf_id); + const string_desc_t leaf_id = trie_node_leaf_id (node); + error = mark_leaf (context, leaf_id); } else { - for (size_t i = 0; i < 256; i++) + for (uint8_t i = 0; i < 256; i++) { - if (branches[i]._data != NULL) + const string_desc_t branch = trie_node_branch (node, i); + if (branch._data != NULL) { - int this_error = - db_mark_trie_node (db_root, &(branches[i]), mark_leaf, - context); + int this_error = db_mark_trie_node (db_root, branch, mark_leaf, + context); if (this_error < 0) { error = -1; @@ -69,12 +194,7 @@ db_mark_trie_node (const char *db_root, } } cleanup: - FREE (common_part._data); - FREE (leaf_id._data); - for (size_t i = 0; i < sizeof (branches) / sizeof (branches[0]); i++) - { - FREE (branches[i]._data); - } + trie_node_free (&node); return error; } @@ -90,19 +210,11 @@ cleanup: static int db_read_trie_node (const char *db_root, - const string_desc_t * id, - string_desc_t * common_part, - bool *is_leaf, - string_desc_t * leaf_id, string_desc_t branches[256]) + const string_desc_t id, struct trie_node **node) { string_desc_t raw_data = { 0 }; - assert (common_part->_data == NULL); - assert (leaf_id->_data == NULL); - for (size_t i = 0; i < 256; i++) - { - assert (branches[i]._data == NULL); - } - int error = db_read (db_root, id, &raw_data); + size_t n_common = 0; + int error = db_read (db_root, &id, &raw_data); if (error < 0) { goto cleanup; @@ -112,44 +224,37 @@ db_read_trie_node (const char *db_root, error = -1; goto cleanup; } - common_part->_nbytes = 0; + n_common = 0; for (size_t i = 0; i < 8; i++) { - common_part->_nbytes *= 256; - common_part->_nbytes += (uint8_t) (char) raw_data._data[4 + i]; - } - if (ALLOC_N (common_part->_data, common_part->_nbytes) < 0) - { - error = -2; - goto cleanup; + n_common *= 256; + n_common += (uint8_t) (char) raw_data._data[4 + i]; } if (memcmp (raw_data._data, "leaf", 4) == 0 - && raw_data._nbytes == 4 + 8 + 32 + common_part->_nbytes) + && raw_data._nbytes == 4 + 8 + 32 + n_common) { - *is_leaf = true; - leaf_id->_nbytes = 32; - if (ALLOC_N (leaf_id->_data, 32) < 0) + const string_desc_t common_part = { + ._nbytes = n_common, + ._data = (char *) raw_data._data + 4 + 8 + 32 + }; + const string_desc_t leaf_id = { + ._nbytes = 32, + ._data = (char *) raw_data._data + 4 + 8 + }; + error = trie_node_init_leaf (node, common_part, leaf_id); + if (error < 0) { - error = -2; goto cleanup; } - memcpy (leaf_id->_data, raw_data._data + 4 + 8, 32); - memcpy (common_part->_data, raw_data._data + 4 + 8 + 32, - common_part->_nbytes); } else if (memcmp (raw_data._data, "node", 4) == 0 - && raw_data._nbytes == 4 + 8 + (256 * 32) + common_part->_nbytes) + && raw_data._nbytes == 4 + 8 + (256 * 32) + n_common) { - *is_leaf = false; + string_desc_t branches[256] = { 0 }; for (size_t i = 0; i < 256; i++) { branches[i]._nbytes = 32; - if (ALLOC_N (branches[i]._data, 32) < 0) - { - error = -2; - goto cleanup; - } - memcpy (branches[i]._data, raw_data._data + 4 + 8 + (i * 32), 32); + branches[i]._data = (char *) raw_data._data + 4 + 8 + (i * 32); size_t n_zero = 0; while (n_zero < branches[i]._nbytes && branches[i]._data[n_zero] == 0) @@ -158,12 +263,19 @@ db_read_trie_node (const char *db_root, } if (n_zero == branches[i]._nbytes) { - FREE (branches[i]._data); branches[i]._nbytes = 0; + branches[i]._data = NULL; } } - memcpy (common_part->_data, raw_data._data + 4 + 8 + (256 * 32), - common_part->_nbytes); + const string_desc_t common_part = { + ._nbytes = n_common, + ._data = (char *) raw_data._data + 4 + 8 + (256 * 32) + }; + error = trie_node_init_node (node, common_part, branches); + if (error < 0) + { + goto cleanup; + } } else { @@ -174,31 +286,24 @@ cleanup: FREE (raw_data._data); if (error < 0) { - FREE (common_part->_data); - FREE (leaf_id->_data); - for (size_t i = 0; i < 256; i++) - { - FREE (branches[i]._data); - } + trie_node_free (node); } return error; } static int -db_write_trie_node_aux (const char *db_root, - string_desc_t * id, - bool is_leaf, - const string_desc_t common_part, - const string_desc_t * leaf_id, - const string_desc_t branches[256]) +db_write_trie_node (const char *db_root, + string_desc_t * id, const struct trie_node *node) { assert (id->_data == NULL); int error = 0; size_t offset = 0; - if (is_leaf) + const string_desc_t common_part = trie_node_common_part (node); + if (trie_node_is_leaf (node)) { + const string_desc_t leaf_id = trie_node_leaf_id (node); string_desc_t data = { 0 }; - if (leaf_id->_nbytes != 32) + if (leaf_id._nbytes != 32) { error = -1; goto cleanup_as_leaf; @@ -218,7 +323,7 @@ db_write_trie_node_aux (const char *db_root, nbytes /= 256; } offset += 8; - memcpy (data._data + offset, leaf_id->_data, leaf_id->_nbytes); + memcpy (data._data + offset, leaf_id._data, leaf_id._nbytes); offset += 32; memcpy (data._data + offset, common_part._data, common_part._nbytes); error = db_write (db_root, id, &data); @@ -246,18 +351,21 @@ db_write_trie_node_aux (const char *db_root, offset += 8; for (size_t i = 0; i < 256; i++) { - if (branches[i]._data == NULL || branches[i]._nbytes == 0) + const string_desc_t branch = trie_node_branch (node, (uint8_t) i); + if (branch._data == NULL || branch._nbytes == 0) { /* I suppose we avoid this one. */ + assert (branch._data == NULL); + assert (branch._nbytes == 0); } - else if (branches[i]._nbytes != 32) + else if (branch._nbytes != 32) { error = -1; goto cleanup_as_node; } else { - memcpy (data._data + offset + (i * 32), branches[i]._data, 32); + memcpy (data._data + offset + (i * 32), branch._data, 32); } } offset += 32 * 256; @@ -275,26 +383,4 @@ cleanup: return error; } -static int -db_write_leaf_trie_node (const char *db_root, - string_desc_t * id, - const string_desc_t common_part, - const string_desc_t * leaf_id) -{ - string_desc_t unused[256] = { 0 }; - return db_write_trie_node_aux (db_root, id, true, common_part, leaf_id, - unused); -} - -static int -db_write_inner_trie_node (const char *db_root, - string_desc_t * id, - const string_desc_t common_part, - const string_desc_t branches[256]) -{ - string_desc_t unused = { 0 }; - return db_write_trie_node_aux (db_root, id, false, common_part, &unused, - branches); -} - #endif /* not DISFLUID_TRIE_NODE_INCLUDED */ |