diff options
author | Vivien Kraus <vivien@planete-kraus.eu> | 2023-04-04 20:02:01 +0200 |
---|---|---|
committer | Vivien Kraus <vivien@planete-kraus.eu> | 2023-04-29 11:58:03 +0200 |
commit | aff2a2daebd4626cd6efb7663818962c0c22bc9a (patch) | |
tree | e0526a8a7ea14c0659607ccad25b6cddda4d0f17 | |
parent | 2b2c08a19e32955cc5eacae9af9bca01829270cc (diff) |
Store cache trie nodes in the database
-rw-r--r-- | src/libdisfluid/Makefile.am | 1 | ||||
-rw-r--r-- | src/libdisfluid/disfluid-tests.h | 111 | ||||
-rw-r--r-- | src/libdisfluid/disfluid-trie-node.h | 270 |
3 files changed, 382 insertions, 0 deletions
diff --git a/src/libdisfluid/Makefile.am b/src/libdisfluid/Makefile.am index dc377c9..aeba503 100644 --- a/src/libdisfluid/Makefile.am +++ b/src/libdisfluid/Makefile.am @@ -8,6 +8,7 @@ lib_LTLIBRARIES += %D%/libdisfluid.la %D%/disfluid-db.h \ %D%/disfluid-init.h \ %D%/disfluid-tests.h \ + %D%/disfluid-trie-node.h \ %D%/disfluid-ui.h \ %D%/disfluid-version.h \ %D%/main.c diff --git a/src/libdisfluid/disfluid-tests.h b/src/libdisfluid/disfluid-tests.h index 325f2ca..c949940 100644 --- a/src/libdisfluid/disfluid-tests.h +++ b/src/libdisfluid/disfluid-tests.h @@ -7,6 +7,7 @@ static inline char *run_tests (size_t *n_tests, size_t *n_errors); # include <check.h> +# include "disfluid-trie-node.h" # include "disfluid-init.h" # include "disfluid-version.h" # include "disfluid-cache-entry.h" @@ -428,6 +429,114 @@ START_TEST (test_db) END_TEST /* *INDENT-ON* */ +/* *INDENT-OFF* */ +START_TEST (test_db_trie_inner) +/* *INDENT-ON* */ + +{ + char *database; + prepare_database (&database); + static const char *hello = "hello"; + const string_desc_t common_part = { + ._nbytes = 5, + ._data = (char *) hello + }; + static const char series[32] = { 1, 2, 3, 4, 5 }; + const string_desc_t branches[256] = { + {0}, + { + ._nbytes = 32, + ._data = (char *) series}, + {0} + }; + ck_assert_int_eq (common_part._nbytes, strlen (hello)); + ck_assert_int_eq (branches[0]._nbytes, 0); + ck_assert_int_eq (branches[1]._nbytes, sizeof (series)); + string_desc_t node_id = { 0 }; + ck_assert_int_eq (db_write_inner_trie_node + (database, &node_id, common_part, branches), 0); + string_desc_t actual_common_part = { 0 }; + string_desc_t actual_leaf_id = { 0 }; + string_desc_t actual_branches[256] = { 0 }; + bool actually_leaf = true; + ck_assert_int_eq (db_read_trie_node + (database, &node_id, &actual_common_part, &actually_leaf, + &actual_leaf_id, actual_branches), 0); + ck_assert (!actually_leaf); + ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes); + ck_assert_int_eq (memcmp + (actual_common_part._data, common_part._data, + common_part._nbytes), 0); + ck_assert_int_eq (actual_branches[0]._nbytes, 32); + for (size_t i = 0; i < 32; i++) + { + ck_assert_int_eq (actual_branches[0]._data[i], 0); + } + ck_assert_int_eq (actual_branches[1]._nbytes, 32); + ck_assert_int_eq (memcmp (actual_branches[1]._data, series, 32), 0); + FREE (actual_common_part._data); + FREE (actual_leaf_id._data); + for (size_t i = 0; i < 256; i++) + { + FREE (actual_branches[i]._data); + } + FREE (node_id._data); + cleanup_database (&database); +} +/* *INDENT-OFF* */ +END_TEST +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +START_TEST (test_db_trie_leaf) +/* *INDENT-ON* */ + +{ + char *database; + prepare_database (&database); + static const char *hello = "hello"; + const string_desc_t common_part = { + ._nbytes = 5, + ._data = (char *) hello + }; + static const char series[32] = { 1, 2, 3, 4, 5 }; + const string_desc_t leaf_id = { + ._nbytes = 32, + ._data = (char *) series + }; + ck_assert_int_eq (common_part._nbytes, strlen (hello)); + ck_assert_int_eq (leaf_id._nbytes, sizeof (series)); + string_desc_t node_id = { 0 }; + ck_assert_int_eq (db_write_leaf_trie_node + (database, &node_id, common_part, &leaf_id), 0); + string_desc_t actual_common_part = { 0 }; + string_desc_t actual_leaf_id = { 0 }; + string_desc_t actual_branches[256] = { 0 }; + bool actually_leaf = false; + ck_assert_int_eq (db_read_trie_node + (database, &node_id, &actual_common_part, &actually_leaf, + &actual_leaf_id, actual_branches), 0); + ck_assert (actually_leaf); + ck_assert_int_eq (actual_common_part._nbytes, common_part._nbytes); + ck_assert_int_eq (memcmp + (actual_common_part._data, common_part._data, + common_part._nbytes), 0); + ck_assert_int_eq (memcmp + (actual_leaf_id._data, leaf_id._data, leaf_id._nbytes), + 0); + FREE (actual_common_part._data); + FREE (actual_leaf_id._data); + for (size_t i = 0; i < 256; i++) + { + FREE (actual_branches[i]._data); + } + FREE (node_id._data); + cleanup_database (&database); +} +/* *INDENT-OFF* */ +END_TEST +/* *INDENT-ON* */ + static inline char * tests_read_whole_file (int file) { @@ -479,6 +588,8 @@ run_tests (size_t *n_tests, size_t *n_errors) suite_add_tcase (suite, cache_hash); TCase *db = tcase_create (_("disfluid database")); tcase_add_test (db, test_db); + tcase_add_test (db, test_db_trie_inner); + tcase_add_test (db, test_db_trie_leaf); suite_add_tcase (suite, db); SRunner *runner = srunner_create (suite); char log_file_name[] = "/tmp/disfluid-unit-tests-XXXXXX"; diff --git a/src/libdisfluid/disfluid-trie-node.h b/src/libdisfluid/disfluid-trie-node.h new file mode 100644 index 0000000..855a236 --- /dev/null +++ b/src/libdisfluid/disfluid-trie-node.h @@ -0,0 +1,270 @@ +#ifndef DISFLUID_TRIE_NODE_INCLUDED +# define DISFLUID_TRIE_NODE_INCLUDED + +# include <config.h> +# include "string-desc.h" + +MAYBE_UNUSED static int +db_mark_trie_node (const char *db_root, const string_desc_t * id, + int (*mark_leaf) (void *, const string_desc_t *), + void *context); + +MAYBE_UNUSED static int +db_read_trie_node (const char *db_root, + const string_desc_t * id, + string_desc_t * common_part, + bool *is_leaf, + string_desc_t * leaf_id, string_desc_t branches[256]); + +MAYBE_UNUSED static int +db_write_leaf_trie_node (const char *db_root, + string_desc_t * id, + const string_desc_t common_part, + const string_desc_t * leaf_id); + +MAYBE_UNUSED static int +db_write_inner_trie_node (const char *db_root, + string_desc_t * id, + const string_desc_t common_part, + const string_desc_t branches[256]); + +# include "disfluid-db.h" +# include "safe-alloc.h" + +static int +db_mark_trie_node (const char *db_root, + const string_desc_t * id, + int (*mark_leaf) (void *, const string_desc_t *), + void *context) +{ + bool is_leaf; + string_desc_t common_part = { 0 }; + string_desc_t leaf_id = { 0 }; + string_desc_t branches[256] = { 0 }; + int error = + db_read_trie_node (db_root, id, &common_part, &is_leaf, &leaf_id, + branches); + if (error < 0) + { + goto cleanup; + } + if (is_leaf) + { + error = mark_leaf (context, &leaf_id); + } + else + { + for (size_t i = 0; i < 256; i++) + { + if (branches[i]._data != NULL) + { + int this_error = + db_mark_trie_node (db_root, &(branches[i]), mark_leaf, + context); + if (this_error < 0) + { + error = -1; + } + } + } + } +cleanup: + FREE (common_part._data); + FREE (leaf_id._data); + for (size_t i = 0; i < sizeof (branches) / sizeof (branches[0]); i++) + { + FREE (branches[i]._data); + } + return error; +} + +/* The trie node is a file composed of: */ +/* - "leaf" if this is a leaf node, "node" otherwise; */ +/* - the common part length, on 1 byte, minimum 1, maximum 251; */ +/* - the common part bytes, padded with 0s for 251 bytes; */ +/* - if this is a leaf node: 32 bytes for the ID of the content; */ +/* - if this is a non-leaf node: 256 * 32 bytes for the ID of the branches. */ + +static int +db_read_trie_node (const char *db_root, + const string_desc_t * id, + string_desc_t * common_part, + bool *is_leaf, + string_desc_t * leaf_id, string_desc_t branches[256]) +{ + string_desc_t raw_data = { 0 }; + assert (common_part->_data == NULL); + assert (leaf_id->_data == NULL); + for (size_t i = 0; i < 256; i++) + { + assert (branches[i]._data == NULL); + } + int error = db_read (db_root, id, &raw_data); + if (error < 0) + { + goto cleanup; + } + if (raw_data._nbytes != 256 + 32 && raw_data._nbytes != 256 + (256 * 32)) + { + error = -1; + goto cleanup; + } + common_part->_nbytes = (uint8_t) (char) raw_data._data[4]; + if (common_part->_nbytes < 1 || common_part->_nbytes > 251) + { + error = -1; + goto cleanup; + } + if (ALLOC_N (common_part->_data, common_part->_nbytes) < 0) + { + error = -2; + goto cleanup; + } + memcpy (common_part->_data, raw_data._data + 5, common_part->_nbytes); + if (memcmp (raw_data._data, "leaf", 4) == 0 && raw_data._nbytes == 256 + 32) + { + *is_leaf = true; + leaf_id->_nbytes = 32; + if (ALLOC_N (leaf_id->_data, 32) < 0) + { + error = -2; + goto cleanup; + } + memcpy (leaf_id->_data, raw_data._data + 256, 32); + } + else if (memcmp (raw_data._data, "node", 4) == 0 + && raw_data._nbytes == 256 + (256 * 32)) + { + *is_leaf = false; + for (size_t i = 0; i < 256; i++) + { + branches[i]._nbytes = 32; + if (ALLOC_N (branches[i]._data, 32) < 0) + { + error = -2; + goto cleanup; + } + memcpy (branches[i]._data, raw_data._data + 256 + (i * 32), 32); + } + } + else + { + error = -1; + goto cleanup; + } +cleanup: + FREE (raw_data._data); + if (error < 0) + { + FREE (common_part->_data); + FREE (leaf_id->_data); + for (size_t i = 0; i < 256; i++) + { + FREE (branches[i]._data); + } + } + return error; +} + +static int +db_write_trie_node_aux (const char *db_root, + string_desc_t * id, + bool is_leaf, + const string_desc_t common_part, + const string_desc_t * leaf_id, + const string_desc_t branches[256]) +{ + assert (id->_data == NULL); + int error = 0; + if (common_part._nbytes == 0 || common_part._nbytes > 251) + { + error = -1; + goto cleanup; + } + if (is_leaf) + { + string_desc_t data = { 0 }; + if (leaf_id->_nbytes != 32) + { + error = -1; + goto cleanup_as_leaf; + } + data._nbytes = 256 + 32; + if (ALLOC_N (data._data, data._nbytes) < 0) + { + error = -2; + goto cleanup_as_leaf; + } + memcpy (data._data, "leaf", 4); + data._data[4] = (char) (uint8_t) common_part._nbytes; + memcpy (data._data + 5, common_part._data, common_part._nbytes); + memcpy (data._data + 256, leaf_id->_data, leaf_id->_nbytes); + error = db_write (db_root, id, &data); + cleanup_as_leaf: + FREE (data._data); + goto cleanup; + } + else + { + string_desc_t data = { 0 }; + data._nbytes = 256 + 256 * 32; + if (ALLOC_N (data._data, data._nbytes) < 0) + { + error = -2; + goto cleanup_as_node; + } + memcpy (data._data, "node", 4); + data._data[4] = (char) (uint8_t) common_part._nbytes; + memcpy (data._data + 5, common_part._data, common_part._nbytes); + for (size_t i = 0; i < 256; i++) + { + if (branches[i]._data == NULL || branches[i]._nbytes == 0) + { + /* I suppose we avoid this one. */ + } + else if (branches[i]._nbytes != 32) + { + error = -1; + goto cleanup_as_node; + } + else + { + memcpy (data._data + 256 + (i * 32), branches[i]._data, 32); + } + } + error = db_write (db_root, id, &data); + cleanup_as_node: + FREE (data._data); + goto cleanup; + } +cleanup: + if (error != 0) + { + FREE (id->_data); + } + return error; +} + +static int +db_write_leaf_trie_node (const char *db_root, + string_desc_t * id, + const string_desc_t common_part, + const string_desc_t * leaf_id) +{ + string_desc_t unused[256] = { 0 }; + return db_write_trie_node_aux (db_root, id, true, common_part, leaf_id, + unused); +} + +static int +db_write_inner_trie_node (const char *db_root, + string_desc_t * id, + const string_desc_t common_part, + const string_desc_t branches[256]) +{ + string_desc_t unused = { 0 }; + return db_write_trie_node_aux (db_root, id, false, common_part, &unused, + branches); +} + +#endif /* not DISFLUID_TRIE_NODE_INCLUDED */ |