diff options
Diffstat (limited to 'src/libdisfluid/disfluid-trie-node.h')
-rw-r--r-- | src/libdisfluid/disfluid-trie-node.h | 386 |
1 files changed, 0 insertions, 386 deletions
diff --git a/src/libdisfluid/disfluid-trie-node.h b/src/libdisfluid/disfluid-trie-node.h deleted file mode 100644 index caa5df7..0000000 --- a/src/libdisfluid/disfluid-trie-node.h +++ /dev/null @@ -1,386 +0,0 @@ -#ifndef DISFLUID_TRIE_NODE_INCLUDED -# define DISFLUID_TRIE_NODE_INCLUDED - -# include <config.h> -# include "string-desc.h" - -struct trie_node; - -MAYBE_UNUSED static int -trie_node_init_leaf (struct trie_node **node, - const string_desc_t common_part, - const string_desc_t leaf_id); - -MAYBE_UNUSED static int -trie_node_init_node (struct trie_node **node, - const string_desc_t common_part, - const string_desc_t branches[256]); - -MAYBE_UNUSED static void trie_node_free (struct trie_node **node); - -MAYBE_UNUSED static const string_desc_t -trie_node_common_part (const struct trie_node *node); - -MAYBE_UNUSED static bool trie_node_is_leaf (const struct trie_node *node); - -MAYBE_UNUSED static const string_desc_t -trie_node_leaf_id (const struct trie_node *node); - -MAYBE_UNUSED static const string_desc_t -trie_node_branch (const struct trie_node *node, char branch); - -MAYBE_UNUSED static int -db_mark_trie_node (const char *db_root, const string_desc_t id, - int (*mark_leaf) (void *, const string_desc_t), - void *context); - -MAYBE_UNUSED static int -db_read_trie_node (const char *db_root, - const string_desc_t id, struct trie_node **node); - -MAYBE_UNUSED static int -db_write_trie_node (const char *db_root, - string_desc_t * id, const struct trie_node *node); - -# include "disfluid-db.h" -# include "disfluid-init.h" -# include "safe-alloc.h" - -struct trie_node -{ - string_desc_t common_part; - string_desc_t branches[256]; - /* If a leaf, only branches[0] is set. */ - bool is_leaf; -}; - -static int -trie_node_init_full (struct trie_node **node, - const string_desc_t common_part, - bool is_leaf, const string_desc_t branches[256]) -{ - assert (*node == NULL); - ensure_init (); - int error = 0; - if (ALLOC (*node) < 0) - { - error = -2; - goto cleanup; - } - if (ALLOC_N ((*node)->common_part._data, common_part._nbytes) < 0) - { - error = -2; - goto cleanup; - } - (*node)->common_part._nbytes = common_part._nbytes; - memcpy ((*node)->common_part._data, common_part._data, common_part._nbytes); - (*node)->is_leaf = is_leaf; - for (size_t i = 0; i < 256; i++) - { - (*node)->branches[i]._nbytes = branches[i]._nbytes; - if (branches[i]._nbytes != 0) - { - if (ALLOC_N ((*node)->branches[i]._data, branches[i]._nbytes) < 0) - { - error = -2; - goto cleanup; - } - memcpy ((*node)->branches[i]._data, branches[i]._data, - branches[i]._nbytes); - } - } -cleanup: - if (error < 0) - { - trie_node_free (node); - } - return error; -} - -MAYBE_UNUSED static int -trie_node_init_leaf (struct trie_node **node, - const string_desc_t common_part, - const string_desc_t leaf_id) -{ - const string_desc_t branches[256] = { - /* Only the first one is set. */ - { - ._nbytes = leaf_id._nbytes, - ._data = (char *) (leaf_id._data)}, - {0} - }; - return trie_node_init_full (node, common_part, true, branches); -} - -MAYBE_UNUSED static int -trie_node_init_node (struct trie_node **node, - const string_desc_t common_part, - const string_desc_t branches[256]) -{ - return trie_node_init_full (node, common_part, false, branches); -} - -MAYBE_UNUSED static void -trie_node_free (struct trie_node **node) -{ - if (*node != NULL) - { - for (size_t i = 0; i < 256; i++) - { - FREE ((*node)->branches[i]._data); - } - FREE ((*node)->common_part._data); - } - FREE (*node); -} - -MAYBE_UNUSED static const string_desc_t -trie_node_common_part (const struct trie_node *node) -{ - return node->common_part; -} - -MAYBE_UNUSED static bool -trie_node_is_leaf (const struct trie_node *node) -{ - return node->is_leaf; -} - -MAYBE_UNUSED static const string_desc_t -trie_node_leaf_id (const struct trie_node *node) -{ - assert (node->is_leaf); - return node->branches[0]; -} - -MAYBE_UNUSED static const string_desc_t -trie_node_branch (const struct trie_node *node, char branch) -{ - assert (!node->is_leaf); - return node->branches[(uint8_t) branch]; -} - -static int -db_mark_trie_node (const char *db_root, - const string_desc_t id, - int (*mark_leaf) (void *, const string_desc_t), - void *context) -{ - struct trie_node *node = NULL; - int error = db_read_trie_node (db_root, id, &node); - if (error < 0) - { - goto cleanup; - } - if (trie_node_is_leaf (node)) - { - const string_desc_t leaf_id = trie_node_leaf_id (node); - error = mark_leaf (context, leaf_id); - } - else - { - for (uint8_t i = 0; i < 256; i++) - { - const string_desc_t branch = trie_node_branch (node, i); - if (branch._data != NULL) - { - int this_error = db_mark_trie_node (db_root, branch, mark_leaf, - context); - if (this_error < 0) - { - error = -1; - } - } - } - } -cleanup: - trie_node_free (&node); - return error; -} - -/* The trie node is a file composed of: */ -/* - "leaf" if this is a leaf node, "node" otherwise; */ -/* - the common part length, on 8 bytes, big endian; */ -/* - the common part bytes, padded with 0s for 251 bytes; */ -/* - if this is a leaf node: 32 bytes for the ID of the content; */ -/* - if this is a non-leaf node: 256 * 32 bytes for the ID of the - branches. If an entry has 32 zeros, then it is considered - free; */ -/* - the common part. */ - -static int -db_read_trie_node (const char *db_root, - const string_desc_t id, struct trie_node **node) -{ - string_desc_t raw_data = { 0 }; - size_t n_common = 0; - int error = db_read (db_root, &id, &raw_data); - if (error < 0) - { - goto cleanup; - } - if (raw_data._nbytes < 4 + 8 + 32) - { - error = -1; - goto cleanup; - } - n_common = 0; - for (size_t i = 0; i < 8; i++) - { - n_common *= 256; - n_common += (uint8_t) (char) raw_data._data[4 + i]; - } - if (memcmp (raw_data._data, "leaf", 4) == 0 - && raw_data._nbytes == 4 + 8 + 32 + n_common) - { - const string_desc_t common_part = { - ._nbytes = n_common, - ._data = (char *) raw_data._data + 4 + 8 + 32 - }; - const string_desc_t leaf_id = { - ._nbytes = 32, - ._data = (char *) raw_data._data + 4 + 8 - }; - error = trie_node_init_leaf (node, common_part, leaf_id); - if (error < 0) - { - goto cleanup; - } - } - else if (memcmp (raw_data._data, "node", 4) == 0 - && raw_data._nbytes == 4 + 8 + (256 * 32) + n_common) - { - string_desc_t branches[256] = { 0 }; - for (size_t i = 0; i < 256; i++) - { - branches[i]._nbytes = 32; - branches[i]._data = (char *) raw_data._data + 4 + 8 + (i * 32); - size_t n_zero = 0; - while (n_zero < branches[i]._nbytes - && branches[i]._data[n_zero] == 0) - { - n_zero++; - } - if (n_zero == branches[i]._nbytes) - { - branches[i]._nbytes = 0; - branches[i]._data = NULL; - } - } - const string_desc_t common_part = { - ._nbytes = n_common, - ._data = (char *) raw_data._data + 4 + 8 + (256 * 32) - }; - error = trie_node_init_node (node, common_part, branches); - if (error < 0) - { - goto cleanup; - } - } - else - { - error = -1; - goto cleanup; - } -cleanup: - FREE (raw_data._data); - if (error < 0) - { - trie_node_free (node); - } - return error; -} - -static int -db_write_trie_node (const char *db_root, - string_desc_t * id, const struct trie_node *node) -{ - assert (id->_data == NULL); - int error = 0; - size_t offset = 0; - const string_desc_t common_part = trie_node_common_part (node); - if (trie_node_is_leaf (node)) - { - const string_desc_t leaf_id = trie_node_leaf_id (node); - string_desc_t data = { 0 }; - if (leaf_id._nbytes != 32) - { - error = -1; - goto cleanup_as_leaf; - } - data._nbytes = 4 + 8 + 32 + common_part._nbytes; - if (ALLOC_N (data._data, data._nbytes) < 0) - { - error = -2; - goto cleanup_as_leaf; - } - memcpy (data._data, "leaf", 4); - offset += 4; - size_t nbytes = common_part._nbytes; - for (size_t i = 8; i-- > 0;) - { - data._data[offset + i] = nbytes % 256; - nbytes /= 256; - } - offset += 8; - memcpy (data._data + offset, leaf_id._data, leaf_id._nbytes); - offset += 32; - memcpy (data._data + offset, common_part._data, common_part._nbytes); - error = db_write (db_root, id, &data); - cleanup_as_leaf: - FREE (data._data); - goto cleanup; - } - else - { - string_desc_t data = { 0 }; - data._nbytes = 4 + 8 + 256 * 32 + common_part._nbytes; - if (ALLOC_N (data._data, data._nbytes) < 0) - { - error = -2; - goto cleanup_as_node; - } - memcpy (data._data, "node", 4); - offset += 4; - size_t nbytes = common_part._nbytes; - for (size_t i = 8; i-- > 0;) - { - data._data[offset + i] = nbytes % 256; - nbytes /= 256; - } - offset += 8; - for (size_t i = 0; i < 256; i++) - { - const string_desc_t branch = trie_node_branch (node, (uint8_t) i); - if (branch._data == NULL || branch._nbytes == 0) - { - /* I suppose we avoid this one. */ - assert (branch._data == NULL); - assert (branch._nbytes == 0); - } - else if (branch._nbytes != 32) - { - error = -1; - goto cleanup_as_node; - } - else - { - memcpy (data._data + offset + (i * 32), branch._data, 32); - } - } - offset += 32 * 256; - memcpy (data._data + offset, common_part._data, common_part._nbytes); - error = db_write (db_root, id, &data); - cleanup_as_node: - FREE (data._data); - goto cleanup; - } -cleanup: - if (error != 0) - { - FREE (id->_data); - } - return error; -} - -#endif /* not DISFLUID_TRIE_NODE_INCLUDED */ |