summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVivien Kraus <vivien@planete-kraus.eu>2023-04-11 19:57:52 +0200
committerVivien Kraus <vivien@planete-kraus.eu>2023-04-29 11:58:03 +0200
commit2547ad75960088f1aced986ef541c57bab856e34 (patch)
tree6f8b80a87859a92fc386e605e385a9efabecb23b
parente5b8a280da6511b606e2750207df16596cee27a2 (diff)
trie node: introduce a struct to hold all the fields together
-rw-r--r--src/libdisfluid/disfluid-tests.h75
-rw-r--r--src/libdisfluid/disfluid-trie-node.h318
2 files changed, 236 insertions, 157 deletions
diff --git a/src/libdisfluid/disfluid-tests.h b/src/libdisfluid/disfluid-tests.h
index 730739e..e77b497 100644
--- a/src/libdisfluid/disfluid-tests.h
+++ b/src/libdisfluid/disfluid-tests.h
@@ -437,32 +437,33 @@ START_TEST (test_db_trie_inner)
char *database;
prepare_database (&database);
static const char *hello = "hello";
- const string_desc_t common_part = {
+ string_desc_t common_part = {
._nbytes = 5,
._data = (char *) hello
};
static const char series[32] = { 1, 2, 3, 4, 5 };
- const string_desc_t branches[256] = {
- {0},
- {
- ._nbytes = 32,
- ._data = (char *) series},
- {0}
+ string_desc_t interesting_branch = {
+ ._nbytes = 32,
+ ._data = (char *) series
};
ck_assert_int_eq (common_part._nbytes, strlen (hello));
- ck_assert_int_eq (branches[0]._nbytes, 0);
- ck_assert_int_eq (branches[1]._nbytes, sizeof (series));
+ ck_assert_int_eq (interesting_branch._nbytes, sizeof (series));
+ const struct trie_node node = {
+ .common_part = common_part,
+ .branches = {{0}, interesting_branch, {0}},
+ .is_leaf = false
+ };
string_desc_t node_id = { 0 };
- ck_assert_int_eq (db_write_inner_trie_node
- (database, &node_id, common_part, branches), 0);
- string_desc_t actual_common_part = { 0 };
- string_desc_t actual_leaf_id = { 0 };
- string_desc_t actual_branches[256] = { 0 };
- bool actually_leaf = true;
- ck_assert_int_eq (db_read_trie_node
- (database, &node_id, &actual_common_part, &actually_leaf,
- &actual_leaf_id, actual_branches), 0);
- ck_assert (!actually_leaf);
+ ck_assert_int_eq (db_write_trie_node (database, &node_id, &node), 0);
+ struct trie_node *actual_node = NULL;
+ ck_assert_int_eq (db_read_trie_node (database, node_id, &actual_node), 0);
+ ck_assert (!trie_node_is_leaf (actual_node));
+ const string_desc_t actual_common_part =
+ trie_node_common_part (actual_node);
+ const string_desc_t actual_branches[2] = {
+ trie_node_branch (actual_node, 0),
+ trie_node_branch (actual_node, 1)
+ };
ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes);
ck_assert_int_eq (memcmp
(actual_common_part._data, common_part._data,
@@ -471,12 +472,7 @@ START_TEST (test_db_trie_inner)
ck_assert_ptr_null (actual_branches[0]._data);
ck_assert_int_eq (actual_branches[1]._nbytes, 32);
ck_assert_int_eq (memcmp (actual_branches[1]._data, series, 32), 0);
- FREE (actual_common_part._data);
- FREE (actual_leaf_id._data);
- for (size_t i = 0; i < 256; i++)
- {
- FREE (actual_branches[i]._data);
- }
+ trie_node_free (&actual_node);
FREE (node_id._data);
cleanup_database (&database);
}
@@ -503,17 +499,19 @@ START_TEST (test_db_trie_leaf)
};
ck_assert_int_eq (common_part._nbytes, strlen (hello));
ck_assert_int_eq (leaf_id._nbytes, sizeof (series));
+ const struct trie_node node = {
+ .common_part = common_part,
+ .branches = {leaf_id, {0}},
+ .is_leaf = true
+ };
string_desc_t node_id = { 0 };
- ck_assert_int_eq (db_write_leaf_trie_node
- (database, &node_id, common_part, &leaf_id), 0);
- string_desc_t actual_common_part = { 0 };
- string_desc_t actual_leaf_id = { 0 };
- string_desc_t actual_branches[256] = { 0 };
- bool actually_leaf = false;
- ck_assert_int_eq (db_read_trie_node
- (database, &node_id, &actual_common_part, &actually_leaf,
- &actual_leaf_id, actual_branches), 0);
- ck_assert (actually_leaf);
+ ck_assert_int_eq (db_write_trie_node (database, &node_id, &node), 0);
+ struct trie_node *actual_node = NULL;
+ ck_assert_int_eq (db_read_trie_node (database, node_id, &actual_node), 0);
+ const string_desc_t actual_common_part =
+ trie_node_common_part (actual_node);
+ ck_assert (trie_node_is_leaf (actual_node));
+ const string_desc_t actual_leaf_id = trie_node_leaf_id (actual_node);
ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes);
ck_assert_int_eq (memcmp
(actual_common_part._data, common_part._data,
@@ -521,12 +519,7 @@ START_TEST (test_db_trie_leaf)
ck_assert_int_eq (memcmp
(actual_leaf_id._data, leaf_id._data, leaf_id._nbytes),
0);
- FREE (actual_common_part._data);
- FREE (actual_leaf_id._data);
- for (size_t i = 0; i < 256; i++)
- {
- FREE (actual_branches[i]._data);
- }
+ trie_node_free (&actual_node);
FREE (node_id._data);
cleanup_database (&database);
}
diff --git a/src/libdisfluid/disfluid-trie-node.h b/src/libdisfluid/disfluid-trie-node.h
index 276d7f9..caa5df7 100644
--- a/src/libdisfluid/disfluid-trie-node.h
+++ b/src/libdisfluid/disfluid-trie-node.h
@@ -4,63 +4,188 @@
# include <config.h>
# include "string-desc.h"
+struct trie_node;
+
MAYBE_UNUSED static int
-db_mark_trie_node (const char *db_root, const string_desc_t * id,
- int (*mark_leaf) (void *, const string_desc_t *),
- void *context);
+trie_node_init_leaf (struct trie_node **node,
+ const string_desc_t common_part,
+ const string_desc_t leaf_id);
MAYBE_UNUSED static int
-db_read_trie_node (const char *db_root,
- const string_desc_t * id,
- string_desc_t * common_part,
- bool *is_leaf,
- string_desc_t * leaf_id, string_desc_t branches[256]);
+trie_node_init_node (struct trie_node **node,
+ const string_desc_t common_part,
+ const string_desc_t branches[256]);
+
+MAYBE_UNUSED static void trie_node_free (struct trie_node **node);
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_common_part (const struct trie_node *node);
+
+MAYBE_UNUSED static bool trie_node_is_leaf (const struct trie_node *node);
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_leaf_id (const struct trie_node *node);
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_branch (const struct trie_node *node, char branch);
+
+MAYBE_UNUSED static int
+db_mark_trie_node (const char *db_root, const string_desc_t id,
+ int (*mark_leaf) (void *, const string_desc_t),
+ void *context);
MAYBE_UNUSED static int
-db_write_leaf_trie_node (const char *db_root,
- string_desc_t * id,
- const string_desc_t common_part,
- const string_desc_t * leaf_id);
+db_read_trie_node (const char *db_root,
+ const string_desc_t id, struct trie_node **node);
MAYBE_UNUSED static int
-db_write_inner_trie_node (const char *db_root,
- string_desc_t * id,
- const string_desc_t common_part,
- const string_desc_t branches[256]);
+db_write_trie_node (const char *db_root,
+ string_desc_t * id, const struct trie_node *node);
# include "disfluid-db.h"
+# include "disfluid-init.h"
# include "safe-alloc.h"
+struct trie_node
+{
+ string_desc_t common_part;
+ string_desc_t branches[256];
+ /* If a leaf, only branches[0] is set. */
+ bool is_leaf;
+};
+
+static int
+trie_node_init_full (struct trie_node **node,
+ const string_desc_t common_part,
+ bool is_leaf, const string_desc_t branches[256])
+{
+ assert (*node == NULL);
+ ensure_init ();
+ int error = 0;
+ if (ALLOC (*node) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ if (ALLOC_N ((*node)->common_part._data, common_part._nbytes) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ (*node)->common_part._nbytes = common_part._nbytes;
+ memcpy ((*node)->common_part._data, common_part._data, common_part._nbytes);
+ (*node)->is_leaf = is_leaf;
+ for (size_t i = 0; i < 256; i++)
+ {
+ (*node)->branches[i]._nbytes = branches[i]._nbytes;
+ if (branches[i]._nbytes != 0)
+ {
+ if (ALLOC_N ((*node)->branches[i]._data, branches[i]._nbytes) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ memcpy ((*node)->branches[i]._data, branches[i]._data,
+ branches[i]._nbytes);
+ }
+ }
+cleanup:
+ if (error < 0)
+ {
+ trie_node_free (node);
+ }
+ return error;
+}
+
+MAYBE_UNUSED static int
+trie_node_init_leaf (struct trie_node **node,
+ const string_desc_t common_part,
+ const string_desc_t leaf_id)
+{
+ const string_desc_t branches[256] = {
+ /* Only the first one is set. */
+ {
+ ._nbytes = leaf_id._nbytes,
+ ._data = (char *) (leaf_id._data)},
+ {0}
+ };
+ return trie_node_init_full (node, common_part, true, branches);
+}
+
+MAYBE_UNUSED static int
+trie_node_init_node (struct trie_node **node,
+ const string_desc_t common_part,
+ const string_desc_t branches[256])
+{
+ return trie_node_init_full (node, common_part, false, branches);
+}
+
+MAYBE_UNUSED static void
+trie_node_free (struct trie_node **node)
+{
+ if (*node != NULL)
+ {
+ for (size_t i = 0; i < 256; i++)
+ {
+ FREE ((*node)->branches[i]._data);
+ }
+ FREE ((*node)->common_part._data);
+ }
+ FREE (*node);
+}
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_common_part (const struct trie_node *node)
+{
+ return node->common_part;
+}
+
+MAYBE_UNUSED static bool
+trie_node_is_leaf (const struct trie_node *node)
+{
+ return node->is_leaf;
+}
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_leaf_id (const struct trie_node *node)
+{
+ assert (node->is_leaf);
+ return node->branches[0];
+}
+
+MAYBE_UNUSED static const string_desc_t
+trie_node_branch (const struct trie_node *node, char branch)
+{
+ assert (!node->is_leaf);
+ return node->branches[(uint8_t) branch];
+}
+
static int
db_mark_trie_node (const char *db_root,
- const string_desc_t * id,
- int (*mark_leaf) (void *, const string_desc_t *),
+ const string_desc_t id,
+ int (*mark_leaf) (void *, const string_desc_t),
void *context)
{
- bool is_leaf;
- string_desc_t common_part = { 0 };
- string_desc_t leaf_id = { 0 };
- string_desc_t branches[256] = { 0 };
- int error =
- db_read_trie_node (db_root, id, &common_part, &is_leaf, &leaf_id,
- branches);
+ struct trie_node *node = NULL;
+ int error = db_read_trie_node (db_root, id, &node);
if (error < 0)
{
goto cleanup;
}
- if (is_leaf)
+ if (trie_node_is_leaf (node))
{
- error = mark_leaf (context, &leaf_id);
+ const string_desc_t leaf_id = trie_node_leaf_id (node);
+ error = mark_leaf (context, leaf_id);
}
else
{
- for (size_t i = 0; i < 256; i++)
+ for (uint8_t i = 0; i < 256; i++)
{
- if (branches[i]._data != NULL)
+ const string_desc_t branch = trie_node_branch (node, i);
+ if (branch._data != NULL)
{
- int this_error =
- db_mark_trie_node (db_root, &(branches[i]), mark_leaf,
- context);
+ int this_error = db_mark_trie_node (db_root, branch, mark_leaf,
+ context);
if (this_error < 0)
{
error = -1;
@@ -69,12 +194,7 @@ db_mark_trie_node (const char *db_root,
}
}
cleanup:
- FREE (common_part._data);
- FREE (leaf_id._data);
- for (size_t i = 0; i < sizeof (branches) / sizeof (branches[0]); i++)
- {
- FREE (branches[i]._data);
- }
+ trie_node_free (&node);
return error;
}
@@ -90,19 +210,11 @@ cleanup:
static int
db_read_trie_node (const char *db_root,
- const string_desc_t * id,
- string_desc_t * common_part,
- bool *is_leaf,
- string_desc_t * leaf_id, string_desc_t branches[256])
+ const string_desc_t id, struct trie_node **node)
{
string_desc_t raw_data = { 0 };
- assert (common_part->_data == NULL);
- assert (leaf_id->_data == NULL);
- for (size_t i = 0; i < 256; i++)
- {
- assert (branches[i]._data == NULL);
- }
- int error = db_read (db_root, id, &raw_data);
+ size_t n_common = 0;
+ int error = db_read (db_root, &id, &raw_data);
if (error < 0)
{
goto cleanup;
@@ -112,44 +224,37 @@ db_read_trie_node (const char *db_root,
error = -1;
goto cleanup;
}
- common_part->_nbytes = 0;
+ n_common = 0;
for (size_t i = 0; i < 8; i++)
{
- common_part->_nbytes *= 256;
- common_part->_nbytes += (uint8_t) (char) raw_data._data[4 + i];
- }
- if (ALLOC_N (common_part->_data, common_part->_nbytes) < 0)
- {
- error = -2;
- goto cleanup;
+ n_common *= 256;
+ n_common += (uint8_t) (char) raw_data._data[4 + i];
}
if (memcmp (raw_data._data, "leaf", 4) == 0
- && raw_data._nbytes == 4 + 8 + 32 + common_part->_nbytes)
+ && raw_data._nbytes == 4 + 8 + 32 + n_common)
{
- *is_leaf = true;
- leaf_id->_nbytes = 32;
- if (ALLOC_N (leaf_id->_data, 32) < 0)
+ const string_desc_t common_part = {
+ ._nbytes = n_common,
+ ._data = (char *) raw_data._data + 4 + 8 + 32
+ };
+ const string_desc_t leaf_id = {
+ ._nbytes = 32,
+ ._data = (char *) raw_data._data + 4 + 8
+ };
+ error = trie_node_init_leaf (node, common_part, leaf_id);
+ if (error < 0)
{
- error = -2;
goto cleanup;
}
- memcpy (leaf_id->_data, raw_data._data + 4 + 8, 32);
- memcpy (common_part->_data, raw_data._data + 4 + 8 + 32,
- common_part->_nbytes);
}
else if (memcmp (raw_data._data, "node", 4) == 0
- && raw_data._nbytes == 4 + 8 + (256 * 32) + common_part->_nbytes)
+ && raw_data._nbytes == 4 + 8 + (256 * 32) + n_common)
{
- *is_leaf = false;
+ string_desc_t branches[256] = { 0 };
for (size_t i = 0; i < 256; i++)
{
branches[i]._nbytes = 32;
- if (ALLOC_N (branches[i]._data, 32) < 0)
- {
- error = -2;
- goto cleanup;
- }
- memcpy (branches[i]._data, raw_data._data + 4 + 8 + (i * 32), 32);
+ branches[i]._data = (char *) raw_data._data + 4 + 8 + (i * 32);
size_t n_zero = 0;
while (n_zero < branches[i]._nbytes
&& branches[i]._data[n_zero] == 0)
@@ -158,12 +263,19 @@ db_read_trie_node (const char *db_root,
}
if (n_zero == branches[i]._nbytes)
{
- FREE (branches[i]._data);
branches[i]._nbytes = 0;
+ branches[i]._data = NULL;
}
}
- memcpy (common_part->_data, raw_data._data + 4 + 8 + (256 * 32),
- common_part->_nbytes);
+ const string_desc_t common_part = {
+ ._nbytes = n_common,
+ ._data = (char *) raw_data._data + 4 + 8 + (256 * 32)
+ };
+ error = trie_node_init_node (node, common_part, branches);
+ if (error < 0)
+ {
+ goto cleanup;
+ }
}
else
{
@@ -174,31 +286,24 @@ cleanup:
FREE (raw_data._data);
if (error < 0)
{
- FREE (common_part->_data);
- FREE (leaf_id->_data);
- for (size_t i = 0; i < 256; i++)
- {
- FREE (branches[i]._data);
- }
+ trie_node_free (node);
}
return error;
}
static int
-db_write_trie_node_aux (const char *db_root,
- string_desc_t * id,
- bool is_leaf,
- const string_desc_t common_part,
- const string_desc_t * leaf_id,
- const string_desc_t branches[256])
+db_write_trie_node (const char *db_root,
+ string_desc_t * id, const struct trie_node *node)
{
assert (id->_data == NULL);
int error = 0;
size_t offset = 0;
- if (is_leaf)
+ const string_desc_t common_part = trie_node_common_part (node);
+ if (trie_node_is_leaf (node))
{
+ const string_desc_t leaf_id = trie_node_leaf_id (node);
string_desc_t data = { 0 };
- if (leaf_id->_nbytes != 32)
+ if (leaf_id._nbytes != 32)
{
error = -1;
goto cleanup_as_leaf;
@@ -218,7 +323,7 @@ db_write_trie_node_aux (const char *db_root,
nbytes /= 256;
}
offset += 8;
- memcpy (data._data + offset, leaf_id->_data, leaf_id->_nbytes);
+ memcpy (data._data + offset, leaf_id._data, leaf_id._nbytes);
offset += 32;
memcpy (data._data + offset, common_part._data, common_part._nbytes);
error = db_write (db_root, id, &data);
@@ -246,18 +351,21 @@ db_write_trie_node_aux (const char *db_root,
offset += 8;
for (size_t i = 0; i < 256; i++)
{
- if (branches[i]._data == NULL || branches[i]._nbytes == 0)
+ const string_desc_t branch = trie_node_branch (node, (uint8_t) i);
+ if (branch._data == NULL || branch._nbytes == 0)
{
/* I suppose we avoid this one. */
+ assert (branch._data == NULL);
+ assert (branch._nbytes == 0);
}
- else if (branches[i]._nbytes != 32)
+ else if (branch._nbytes != 32)
{
error = -1;
goto cleanup_as_node;
}
else
{
- memcpy (data._data + offset + (i * 32), branches[i]._data, 32);
+ memcpy (data._data + offset + (i * 32), branch._data, 32);
}
}
offset += 32 * 256;
@@ -275,26 +383,4 @@ cleanup:
return error;
}
-static int
-db_write_leaf_trie_node (const char *db_root,
- string_desc_t * id,
- const string_desc_t common_part,
- const string_desc_t * leaf_id)
-{
- string_desc_t unused[256] = { 0 };
- return db_write_trie_node_aux (db_root, id, true, common_part, leaf_id,
- unused);
-}
-
-static int
-db_write_inner_trie_node (const char *db_root,
- string_desc_t * id,
- const string_desc_t common_part,
- const string_desc_t branches[256])
-{
- string_desc_t unused = { 0 };
- return db_write_trie_node_aux (db_root, id, false, common_part, &unused,
- branches);
-}
-
#endif /* not DISFLUID_TRIE_NODE_INCLUDED */