summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVivien Kraus <vivien@planete-kraus.eu>2023-04-04 20:02:01 +0200
committerVivien Kraus <vivien@planete-kraus.eu>2023-04-29 11:58:03 +0200
commitaff2a2daebd4626cd6efb7663818962c0c22bc9a (patch)
treee0526a8a7ea14c0659607ccad25b6cddda4d0f17
parent2b2c08a19e32955cc5eacae9af9bca01829270cc (diff)
Store cache trie nodes in the database
-rw-r--r--src/libdisfluid/Makefile.am1
-rw-r--r--src/libdisfluid/disfluid-tests.h111
-rw-r--r--src/libdisfluid/disfluid-trie-node.h270
3 files changed, 382 insertions, 0 deletions
diff --git a/src/libdisfluid/Makefile.am b/src/libdisfluid/Makefile.am
index dc377c9..aeba503 100644
--- a/src/libdisfluid/Makefile.am
+++ b/src/libdisfluid/Makefile.am
@@ -8,6 +8,7 @@ lib_LTLIBRARIES += %D%/libdisfluid.la
%D%/disfluid-db.h \
%D%/disfluid-init.h \
%D%/disfluid-tests.h \
+ %D%/disfluid-trie-node.h \
%D%/disfluid-ui.h \
%D%/disfluid-version.h \
%D%/main.c
diff --git a/src/libdisfluid/disfluid-tests.h b/src/libdisfluid/disfluid-tests.h
index 325f2ca..c949940 100644
--- a/src/libdisfluid/disfluid-tests.h
+++ b/src/libdisfluid/disfluid-tests.h
@@ -7,6 +7,7 @@ static inline char *run_tests (size_t *n_tests, size_t *n_errors);
# include <check.h>
+# include "disfluid-trie-node.h"
# include "disfluid-init.h"
# include "disfluid-version.h"
# include "disfluid-cache-entry.h"
@@ -428,6 +429,114 @@ START_TEST (test_db)
END_TEST
/* *INDENT-ON* */
+/* *INDENT-OFF* */
+START_TEST (test_db_trie_inner)
+/* *INDENT-ON* */
+
+{
+ char *database;
+ prepare_database (&database);
+ static const char *hello = "hello";
+ const string_desc_t common_part = {
+ ._nbytes = 5,
+ ._data = (char *) hello
+ };
+ static const char series[32] = { 1, 2, 3, 4, 5 };
+ const string_desc_t branches[256] = {
+ {0},
+ {
+ ._nbytes = 32,
+ ._data = (char *) series},
+ {0}
+ };
+ ck_assert_int_eq (common_part._nbytes, strlen (hello));
+ ck_assert_int_eq (branches[0]._nbytes, 0);
+ ck_assert_int_eq (branches[1]._nbytes, sizeof (series));
+ string_desc_t node_id = { 0 };
+ ck_assert_int_eq (db_write_inner_trie_node
+ (database, &node_id, common_part, branches), 0);
+ string_desc_t actual_common_part = { 0 };
+ string_desc_t actual_leaf_id = { 0 };
+ string_desc_t actual_branches[256] = { 0 };
+ bool actually_leaf = true;
+ ck_assert_int_eq (db_read_trie_node
+ (database, &node_id, &actual_common_part, &actually_leaf,
+ &actual_leaf_id, actual_branches), 0);
+ ck_assert (!actually_leaf);
+ ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes);
+ ck_assert_int_eq (memcmp
+ (actual_common_part._data, common_part._data,
+ common_part._nbytes), 0);
+ ck_assert_int_eq (actual_branches[0]._nbytes, 32);
+ for (size_t i = 0; i < 32; i++)
+ {
+ ck_assert_int_eq (actual_branches[0]._data[i], 0);
+ }
+ ck_assert_int_eq (actual_branches[1]._nbytes, 32);
+ ck_assert_int_eq (memcmp (actual_branches[1]._data, series, 32), 0);
+ FREE (actual_common_part._data);
+ FREE (actual_leaf_id._data);
+ for (size_t i = 0; i < 256; i++)
+ {
+ FREE (actual_branches[i]._data);
+ }
+ FREE (node_id._data);
+ cleanup_database (&database);
+}
+/* *INDENT-OFF* */
+END_TEST
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+START_TEST (test_db_trie_leaf)
+/* *INDENT-ON* */
+
+{
+ char *database;
+ prepare_database (&database);
+ static const char *hello = "hello";
+ const string_desc_t common_part = {
+ ._nbytes = 5,
+ ._data = (char *) hello
+ };
+ static const char series[32] = { 1, 2, 3, 4, 5 };
+ const string_desc_t leaf_id = {
+ ._nbytes = 32,
+ ._data = (char *) series
+ };
+ ck_assert_int_eq (common_part._nbytes, strlen (hello));
+ ck_assert_int_eq (leaf_id._nbytes, sizeof (series));
+ string_desc_t node_id = { 0 };
+ ck_assert_int_eq (db_write_leaf_trie_node
+ (database, &node_id, common_part, &leaf_id), 0);
+ string_desc_t actual_common_part = { 0 };
+ string_desc_t actual_leaf_id = { 0 };
+ string_desc_t actual_branches[256] = { 0 };
+ bool actually_leaf = false;
+ ck_assert_int_eq (db_read_trie_node
+ (database, &node_id, &actual_common_part, &actually_leaf,
+ &actual_leaf_id, actual_branches), 0);
+ ck_assert (actually_leaf);
+ ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes);
+ ck_assert_int_eq (memcmp
+ (actual_common_part._data, common_part._data,
+ common_part._nbytes), 0);
+ ck_assert_int_eq (memcmp
+ (actual_leaf_id._data, leaf_id._data, leaf_id._nbytes),
+ 0);
+ FREE (actual_common_part._data);
+ FREE (actual_leaf_id._data);
+ for (size_t i = 0; i < 256; i++)
+ {
+ FREE (actual_branches[i]._data);
+ }
+ FREE (node_id._data);
+ cleanup_database (&database);
+}
+/* *INDENT-OFF* */
+END_TEST
+/* *INDENT-ON* */
+
static inline char *
tests_read_whole_file (int file)
{
@@ -479,6 +588,8 @@ run_tests (size_t *n_tests, size_t *n_errors)
suite_add_tcase (suite, cache_hash);
TCase *db = tcase_create (_("disfluid database"));
tcase_add_test (db, test_db);
+ tcase_add_test (db, test_db_trie_inner);
+ tcase_add_test (db, test_db_trie_leaf);
suite_add_tcase (suite, db);
SRunner *runner = srunner_create (suite);
char log_file_name[] = "/tmp/disfluid-unit-tests-XXXXXX";
diff --git a/src/libdisfluid/disfluid-trie-node.h b/src/libdisfluid/disfluid-trie-node.h
new file mode 100644
index 0000000..855a236
--- /dev/null
+++ b/src/libdisfluid/disfluid-trie-node.h
@@ -0,0 +1,270 @@
+#ifndef DISFLUID_TRIE_NODE_INCLUDED
+# define DISFLUID_TRIE_NODE_INCLUDED
+
+# include <config.h>
+# include "string-desc.h"
+
+MAYBE_UNUSED static int
+db_mark_trie_node (const char *db_root, const string_desc_t * id,
+ int (*mark_leaf) (void *, const string_desc_t *),
+ void *context);
+
+MAYBE_UNUSED static int
+db_read_trie_node (const char *db_root,
+ const string_desc_t * id,
+ string_desc_t * common_part,
+ bool *is_leaf,
+ string_desc_t * leaf_id, string_desc_t branches[256]);
+
+MAYBE_UNUSED static int
+db_write_leaf_trie_node (const char *db_root,
+ string_desc_t * id,
+ const string_desc_t common_part,
+ const string_desc_t * leaf_id);
+
+MAYBE_UNUSED static int
+db_write_inner_trie_node (const char *db_root,
+ string_desc_t * id,
+ const string_desc_t common_part,
+ const string_desc_t branches[256]);
+
+# include "disfluid-db.h"
+# include "safe-alloc.h"
+
+static int
+db_mark_trie_node (const char *db_root,
+ const string_desc_t * id,
+ int (*mark_leaf) (void *, const string_desc_t *),
+ void *context)
+{
+ bool is_leaf;
+ string_desc_t common_part = { 0 };
+ string_desc_t leaf_id = { 0 };
+ string_desc_t branches[256] = { 0 };
+ int error =
+ db_read_trie_node (db_root, id, &common_part, &is_leaf, &leaf_id,
+ branches);
+ if (error < 0)
+ {
+ goto cleanup;
+ }
+ if (is_leaf)
+ {
+ error = mark_leaf (context, &leaf_id);
+ }
+ else
+ {
+ for (size_t i = 0; i < 256; i++)
+ {
+ if (branches[i]._data != NULL)
+ {
+ int this_error =
+ db_mark_trie_node (db_root, &(branches[i]), mark_leaf,
+ context);
+ if (this_error < 0)
+ {
+ error = -1;
+ }
+ }
+ }
+ }
+cleanup:
+ FREE (common_part._data);
+ FREE (leaf_id._data);
+ for (size_t i = 0; i < sizeof (branches) / sizeof (branches[0]); i++)
+ {
+ FREE (branches[i]._data);
+ }
+ return error;
+}
+
+/* The trie node is a file composed of: */
+/* - "leaf" if this is a leaf node, "node" otherwise; */
+/* - the common part length, on 1 byte, minimum 1, maximum 251; */
+/* - the common part bytes, padded with 0s for 251 bytes; */
+/* - if this is a leaf node: 32 bytes for the ID of the content; */
+/* - if this is a non-leaf node: 256 * 32 bytes for the ID of the branches. */
+
+static int
+db_read_trie_node (const char *db_root,
+ const string_desc_t * id,
+ string_desc_t * common_part,
+ bool *is_leaf,
+ string_desc_t * leaf_id, string_desc_t branches[256])
+{
+ string_desc_t raw_data = { 0 };
+ assert (common_part->_data == NULL);
+ assert (leaf_id->_data == NULL);
+ for (size_t i = 0; i < 256; i++)
+ {
+ assert (branches[i]._data == NULL);
+ }
+ int error = db_read (db_root, id, &raw_data);
+ if (error < 0)
+ {
+ goto cleanup;
+ }
+ if (raw_data._nbytes != 256 + 32 && raw_data._nbytes != 256 + (256 * 32))
+ {
+ error = -1;
+ goto cleanup;
+ }
+ common_part->_nbytes = (uint8_t) (char) raw_data._data[4];
+ if (common_part->_nbytes < 1 || common_part->_nbytes > 251)
+ {
+ error = -1;
+ goto cleanup;
+ }
+ if (ALLOC_N (common_part->_data, common_part->_nbytes) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ memcpy (common_part->_data, raw_data._data + 5, common_part->_nbytes);
+ if (memcmp (raw_data._data, "leaf", 4) == 0 && raw_data._nbytes == 256 + 32)
+ {
+ *is_leaf = true;
+ leaf_id->_nbytes = 32;
+ if (ALLOC_N (leaf_id->_data, 32) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ memcpy (leaf_id->_data, raw_data._data + 256, 32);
+ }
+ else if (memcmp (raw_data._data, "node", 4) == 0
+ && raw_data._nbytes == 256 + (256 * 32))
+ {
+ *is_leaf = false;
+ for (size_t i = 0; i < 256; i++)
+ {
+ branches[i]._nbytes = 32;
+ if (ALLOC_N (branches[i]._data, 32) < 0)
+ {
+ error = -2;
+ goto cleanup;
+ }
+ memcpy (branches[i]._data, raw_data._data + 256 + (i * 32), 32);
+ }
+ }
+ else
+ {
+ error = -1;
+ goto cleanup;
+ }
+cleanup:
+ FREE (raw_data._data);
+ if (error < 0)
+ {
+ FREE (common_part->_data);
+ FREE (leaf_id->_data);
+ for (size_t i = 0; i < 256; i++)
+ {
+ FREE (branches[i]._data);
+ }
+ }
+ return error;
+}
+
+static int
+db_write_trie_node_aux (const char *db_root,
+ string_desc_t * id,
+ bool is_leaf,
+ const string_desc_t common_part,
+ const string_desc_t * leaf_id,
+ const string_desc_t branches[256])
+{
+ assert (id->_data == NULL);
+ int error = 0;
+ if (common_part._nbytes == 0 || common_part._nbytes > 251)
+ {
+ error = -1;
+ goto cleanup;
+ }
+ if (is_leaf)
+ {
+ string_desc_t data = { 0 };
+ if (leaf_id->_nbytes != 32)
+ {
+ error = -1;
+ goto cleanup_as_leaf;
+ }
+ data._nbytes = 256 + 32;
+ if (ALLOC_N (data._data, data._nbytes) < 0)
+ {
+ error = -2;
+ goto cleanup_as_leaf;
+ }
+ memcpy (data._data, "leaf", 4);
+ data._data[4] = (char) (uint8_t) common_part._nbytes;
+ memcpy (data._data + 5, common_part._data, common_part._nbytes);
+ memcpy (data._data + 256, leaf_id->_data, leaf_id->_nbytes);
+ error = db_write (db_root, id, &data);
+ cleanup_as_leaf:
+ FREE (data._data);
+ goto cleanup;
+ }
+ else
+ {
+ string_desc_t data = { 0 };
+ data._nbytes = 256 + 256 * 32;
+ if (ALLOC_N (data._data, data._nbytes) < 0)
+ {
+ error = -2;
+ goto cleanup_as_node;
+ }
+ memcpy (data._data, "node", 4);
+ data._data[4] = (char) (uint8_t) common_part._nbytes;
+ memcpy (data._data + 5, common_part._data, common_part._nbytes);
+ for (size_t i = 0; i < 256; i++)
+ {
+ if (branches[i]._data == NULL || branches[i]._nbytes == 0)
+ {
+ /* I suppose we avoid this one. */
+ }
+ else if (branches[i]._nbytes != 32)
+ {
+ error = -1;
+ goto cleanup_as_node;
+ }
+ else
+ {
+ memcpy (data._data + 256 + (i * 32), branches[i]._data, 32);
+ }
+ }
+ error = db_write (db_root, id, &data);
+ cleanup_as_node:
+ FREE (data._data);
+ goto cleanup;
+ }
+cleanup:
+ if (error != 0)
+ {
+ FREE (id->_data);
+ }
+ return error;
+}
+
+static int
+db_write_leaf_trie_node (const char *db_root,
+ string_desc_t * id,
+ const string_desc_t common_part,
+ const string_desc_t * leaf_id)
+{
+ string_desc_t unused[256] = { 0 };
+ return db_write_trie_node_aux (db_root, id, true, common_part, leaf_id,
+ unused);
+}
+
+static int
+db_write_inner_trie_node (const char *db_root,
+ string_desc_t * id,
+ const string_desc_t common_part,
+ const string_desc_t branches[256])
+{
+ string_desc_t unused = { 0 };
+ return db_write_trie_node_aux (db_root, id, false, common_part, &unused,
+ branches);
+}
+
+#endif /* not DISFLUID_TRIE_NODE_INCLUDED */