From 6fa7797f7b55655f3ecacf89dc94e1b5f4c94a2f Mon Sep 17 00:00:00 2001 From: hgn Date: Sun, 27 Apr 2025 02:07:55 +0100 Subject: [PATCH] Epic database --- src/vg/vg_store.h | 680 ++++++++++++++++++++++++++++++++++++++++++++++ vg_db.c | 569 ++++++++++++++++++++++++++++++++++++++ vg_db.h | 124 +++++++++ 3 files changed, 1373 insertions(+) create mode 100644 src/vg/vg_store.h create mode 100644 vg_db.c create mode 100644 vg_db.h diff --git a/src/vg/vg_store.h b/src/vg/vg_store.h new file mode 100644 index 0000000..00a8540 --- /dev/null +++ b/src/vg/vg_store.h @@ -0,0 +1,680 @@ +#ifndef VG_STORE_H +#define VG_STORE_H + +#include "vg_stdint.h" +#include "vg_io.h" + +/* + * Anderson tree implementation with extensions: + * parents are kept track of + * duplicates are allowed + * data is never allocated or destroyed here + * + * TODO: seperate offset,stride,base into 'generic array', seperate pool + */ + +typedef struct aatree aatree; +typedef struct aatree_node aatree_node; +typedef struct aatree_pool_node aatree_pool_node; + +typedef u32 aatree_ptr; +#define AATREE_PTR_NIL 0xffffffff + +struct aatree +{ + u32 offset, stride; /* distance between elements */ + void *base; + + int (*p_cmp)( void *a, void *b ); +}; + +#pragma pack(push,1) +struct aatree_node +{ + aatree_ptr left, right, parent; + u32 level, count; +}; + +struct aatree_pool_node +{ + aatree_ptr next_free; +}; +#pragma pack(pop) + +/* api + * ===========================================================================*/ + +/* return a pointer to the start of the data referenced by t */ +static void *aatree_get_data( aatree *tree, aatree_ptr t ); + +/* link node x into the tree with root t */ +static aatree_ptr aatree_insert( aatree *tree, aatree_ptr t, aatree_ptr x ); + +/* delete node x from tree, does not free memory */ +static aatree_ptr aatree_del( aatree *tree, aatree_ptr x ); + +/* get pointer to element in tree with index k */ +static aatree_ptr aatree_kth( aatree *tree, aatree_ptr t, u32 k ); + +/* get pointer to the element above x */ +static aatree_ptr aatree_next( aatree *tree, aatree_ptr x ); + +/* get pointer by value, returns NIL if not found. + * + * if duplicates values are present then the result is undefined, but it will be + * AN node with that value, just maybe not the first lexicographically + */ +static aatree_ptr aatree_find( aatree *tree, aatree_ptr t, void *value ); + +/* implementation + * ===========================================================================*/ + +static void *aatree_get_data( aatree *tree, aatree_ptr t ) +{ + return (u8 *)tree->base + tree->stride*t; +} + +static u8 *aatree_node_base( aatree *tree, aatree_ptr t ) +{ + return (u8 *)tree->base + tree->stride*t + tree->offset; +} + +static aatree_pool_node *aatree_get_pool_node( aatree *tree, aatree_ptr t ) +{ + return (aatree_pool_node *)aatree_node_base( tree, t ); +} + +static aatree_node *aatree_get_node( aatree *tree, aatree_ptr t ) +{ + return (aatree_node *)aatree_node_base( tree, t ); +} + +static void aatree_recount( aatree *tree, aatree_ptr n ) +{ + aatree_node *pnode = aatree_get_node( tree, n ); + pnode->count = 1; + + if( pnode->left != AATREE_PTR_NIL ) + pnode->count += aatree_get_node( tree, pnode->left )->count; + + if( pnode->right != AATREE_PTR_NIL ) + pnode->count += aatree_get_node( tree, pnode->right )->count; +} + +/* . . + * | | + * L <- T L -> T + * / \ \ -> / / \ + * A B R A B R + */ +static aatree_ptr aatree_skew( aatree *tree, u32 t ) +{ + if( t == AATREE_PTR_NIL ) return t; + + aatree_node *ptnode = aatree_get_node( tree, t ); + if( ptnode->left == AATREE_PTR_NIL ) return t; + + aatree_node *plnode = aatree_get_node( tree, ptnode->left ); + if( plnode->level == ptnode->level ) + { + aatree_ptr l = ptnode->left; + ptnode->left = plnode->right; + plnode->right = t; + + aatree_recount( tree, t ); + aatree_recount( tree, l ); + + plnode->parent = ptnode->parent; + ptnode->parent = l; + if( ptnode->left != AATREE_PTR_NIL ) + aatree_get_node( tree, ptnode->left )->parent = t; + + return l; + } + + return t; +} + +/* . . + * | | + * T -> R -> X -> R + * / / / \ + * A B T X + * / \ + * A B + */ +static aatree_ptr aatree_split( aatree *tree, aatree_ptr t ) +{ + if( t == AATREE_PTR_NIL ) return t; + + aatree_node *ptnode = aatree_get_node( tree, t ); + if( ptnode->right == AATREE_PTR_NIL ) return t; + + aatree_node *prnode = aatree_get_node( tree, ptnode->right ); + if( prnode->right == AATREE_PTR_NIL ) return t; + + aatree_node *prrnode = aatree_get_node( tree, prnode->right ); + if( ptnode->level == prrnode->level ) + { + aatree_ptr r = ptnode->right; + ptnode->right = prnode->left; + prnode->left = t; + prnode->level ++; + + aatree_recount( tree, t ); + aatree_recount( tree, r ); + + prnode->parent = ptnode->parent; + ptnode->parent = r; + if( ptnode->right != AATREE_PTR_NIL ) + aatree_get_node( tree, ptnode->right )->parent = t; + + return r; + } + + return t; +} + +static aatree_ptr aatree_insert( aatree *tree, aatree_ptr t, aatree_ptr x ) +{ + aatree_node *pxnode = aatree_get_node( tree, x ); + + if( t == AATREE_PTR_NIL ) + { + pxnode->left = AATREE_PTR_NIL; + pxnode->right = AATREE_PTR_NIL; + pxnode->parent = AATREE_PTR_NIL; + pxnode->level = 0; + pxnode->count = 1; + return x; + } + + aatree_node *ptnode = aatree_get_node( tree, t ); + int cmp_result = tree->p_cmp( aatree_get_data( tree, t ), + aatree_get_data( tree, x ) ); + + ptnode->count ++; + + if( cmp_result <= 0 ) + { + ptnode->left = aatree_insert( tree, ptnode->left, x ); + aatree_node *plnode = aatree_get_node( tree, ptnode->left ); + plnode->parent = t; + } + else + { + ptnode->right = aatree_insert( tree, ptnode->right, x ); + aatree_node *prnode = aatree_get_node( tree, ptnode->right ); + prnode->parent = t; + } + + t = aatree_skew( tree, t ); + t = aatree_split( tree, t ); + return t; +} + +static void aatree_link_down( aatree *tree, aatree_ptr p, aatree_ptr *pl, + aatree_ptr l ) +{ + *pl = l; + + if( *pl != AATREE_PTR_NIL ) + aatree_get_node( tree, *pl )->parent = p; +} + +static aatree_ptr aatree_copy_links( aatree *tree, aatree_ptr root, + aatree_ptr src, aatree_ptr dst ) +{ + aatree_node *pdst = aatree_get_node( tree, dst ), + *psrc = aatree_get_node( tree, src ); + + pdst->count = psrc->count; + pdst->level = psrc->level; + pdst->parent = psrc->parent; + + aatree_link_down( tree, dst, &pdst->left, psrc->left ); + aatree_link_down( tree, dst, &pdst->right, psrc->right ); + + if( pdst->parent != AATREE_PTR_NIL ) + { + aatree_node *parent = aatree_get_node( tree, pdst->parent ); + + if( parent->left == src ) + parent->left = dst; + else if( parent->right == src ) + parent->right = dst; + } + else + return dst; + + return root; +} + +static aatree_ptr aatree_del( aatree *tree, aatree_ptr x ) +{ + aatree_ptr it = x, + up[32]; + + int count = 1, dir = 0; + + /* TODO: maybe be a better way to do this, without counting back up */ + for( aatree_node *s = aatree_get_node( tree, x ); + s->parent != AATREE_PTR_NIL; + count ++ ) + s = aatree_get_node( tree, s->parent ); + + int top=0; + while(1) + { + int index = count - (++top); + + up[ index ] = it; + aatree_node *itnode = aatree_get_node( tree, it ); + if( itnode->parent == AATREE_PTR_NIL ) + break; + else + it = itnode->parent; + } + + aatree_ptr _ptrswap_src = AATREE_PTR_NIL, + _ptrswap_dst = AATREE_PTR_NIL; + + aatree_node *pxnode = aatree_get_node( tree, x ); + aatree_ptr root = up[ count-1 ]; + if( pxnode->left == AATREE_PTR_NIL || pxnode->right == AATREE_PTR_NIL ) + { + if( --top != 0 ) + { + aatree_node *pnode = aatree_get_node( tree, up[top-1] ), + *parent = aatree_get_node( tree, pxnode->parent ); + + aatree_ptr next = pxnode->left == AATREE_PTR_NIL? + pxnode->right: + pxnode->left; + + if( parent->left == x ) pnode->left = next; + else pnode->right = next; + + if( next != AATREE_PTR_NIL ) + { + aatree_node *pnext = aatree_get_node( tree, next ); + pnext->parent = up[top-1]; + } + } + else + { + if( pxnode->right != AATREE_PTR_NIL ) root = pxnode->right; + else if( pxnode->left != AATREE_PTR_NIL ) root = pxnode->left; + else return AATREE_PTR_NIL; + + aatree_node *newroot = aatree_get_node( tree, root ); + newroot->parent = AATREE_PTR_NIL; + } + } + else + { + aatree_ptr heir = pxnode->right, + prev = x; + + aatree_node *pheir = aatree_get_node( tree, heir ); + + while( pheir->left != AATREE_PTR_NIL ) + { + up[top++] = prev = heir; + heir = pheir->left; + pheir = aatree_get_node( tree, heir ); + } + + _ptrswap_dst = heir; + _ptrswap_src = x; + + aatree_node *pprev = aatree_get_node( tree, prev ); + + if( prev == x ) + aatree_link_down( tree, prev, &pprev->right, pheir->right ); + else + aatree_link_down( tree, prev, &pprev->left, pheir->right ); + } + + /* Tail */ + while( --top >= 0 ) + { + if( top != 0 ) + { + aatree_node *above = aatree_get_node( tree, up[top-1] ); + dir = above->right == up[top]; + } + + aatree_recount( tree, up[top] ); + aatree_node *pntop = aatree_get_node( tree, up[top] ); + + if( !(pntop->left == AATREE_PTR_NIL || pntop->right == AATREE_PTR_NIL) ) + { + aatree_node *pnl = aatree_get_node( tree, pntop->left ), + *pnr = aatree_get_node( tree, pntop->right ); + + if( pnl->level < pntop->level-1 || pnr->level < pntop->level-1 ) + { + if( pnr->level > --pntop->level ) + pnr->level = pntop->level; + + up[top] = aatree_skew( tree, up[top] ); + + aatree_node *ut = aatree_get_node( tree, up[top] ); + ut->right = aatree_skew( tree, ut->right ); + + aatree_node *utr = aatree_get_node( tree, ut->right ); + utr->right = aatree_skew( tree, utr->right ); + + up[top] = aatree_split( tree, up[top] ); + ut = aatree_get_node( tree, up[top] ); + + ut->right = aatree_split( tree, ut->right ); + } + } + + if( top != 0 ) + { + aatree_node *ut1 = aatree_get_node( tree, up[top-1] ); + + if( dir == 1 ) + aatree_link_down( tree, up[top-1], &ut1->right, up[top] ); + else + aatree_link_down( tree, up[top-1], &ut1->left, up[top] ); + } + else + { + root = up[top]; + aatree_get_node( tree, root )->parent = AATREE_PTR_NIL; + } + } + + /* This is our extension to the original non-recursive delete, so no data + * has to be moved */ + if( _ptrswap_dst != AATREE_PTR_NIL ) + root = aatree_copy_links( tree, root, _ptrswap_src, _ptrswap_dst ); + + return root; +} + +static aatree_ptr aatree_kth( aatree *tree, aatree_ptr t, u32 k ) +{ + u32 i = 0; + + while( t != AATREE_PTR_NIL ) + { + aatree_node *ptnode = aatree_get_node( tree, t ); + + u32 j = i; + if( ptnode->left != AATREE_PTR_NIL ) + j += aatree_get_node( tree, ptnode->left )->count; + + if( j < k ) + { + i = j+1; + t = ptnode->right; + } + else + { + if( j > k ) + { + t = ptnode->left; + } + else + { + return t; + } + } + } + + return AATREE_PTR_NIL; +} + +static aatree_ptr aatree_next( aatree *tree, aatree_ptr x ) +{ + /* if can go left, go left then all the way right, + * else go up, if it was right link accept + */ + + aatree_node *pnode = aatree_get_node( tree, x ); + if( pnode->right != AATREE_PTR_NIL ) + { + aatree_ptr next = pnode->right; + + while(1) + { + aatree_node *pnext = aatree_get_node( tree, next ); + + if( pnext->left != AATREE_PTR_NIL ) + next = pnext->left; + else + return next; + } + } + else + { + aatree_ptr next = x; + + while(1) + { + aatree_node *pnode = aatree_get_node( tree, next ); + + if( pnode->parent == AATREE_PTR_NIL ) + return AATREE_PTR_NIL; + + aatree_node *pabove = aatree_get_node( tree, pnode->parent ); + if( pabove->left == next ) + return pnode->parent; + else + next = pnode->parent; + } + } +} + +static aatree_ptr aatree_find( aatree *tree, aatree_ptr t, void *value ) +{ + while( t != AATREE_PTR_NIL ) + { + int cmp_result = tree->p_cmp( aatree_get_data( tree, t ), value ); + + if( cmp_result == 0 ) + return t; + else + { + aatree_node *ptnode = aatree_get_node( tree, t ); + + if( cmp_result < 0 ) + t = ptnode->left; + else + t = ptnode->right; + } + } + return t; +} + +/* + * Debugging stuff, everything below is scaffholding and will be removed + * ============================================================================= + */ + +static int aatree_verify_split( aatree *tree, aatree_ptr t ) +{ + if( t == AATREE_PTR_NIL ) return 1; + + aatree_node *ptnode = aatree_get_node( tree, t ); + if( ptnode->right == AATREE_PTR_NIL ) return 1; + + aatree_node *prnode = aatree_get_node( tree, ptnode->right ); + if( prnode->right == AATREE_PTR_NIL ) return 1; + + aatree_node *prrnode = aatree_get_node( tree, prnode->right ); + if( ptnode->level == prrnode->level ) + return 0; + + return 1; +} + +static int aatree_verify_skew( aatree *tree, aatree_ptr t ) +{ + if( t == AATREE_PTR_NIL ) return 1; + + aatree_node *ptnode = aatree_get_node( tree, t ); + if( ptnode->left == AATREE_PTR_NIL ) return 1; + + aatree_node *plnode = aatree_get_node( tree, ptnode->left ); + if( plnode->level == ptnode->level ) + return 0; + + return 1; +} + +static int aatree_verify( aatree *tree, aatree_ptr t ) +{ + aatree_node *ptnode = aatree_get_node( tree, t ); + if( ptnode->parent != AATREE_PTR_NIL ) + { + aatree_node *parent = aatree_get_node( tree, ptnode->parent ); + if( !(parent->left == t || parent->right == t) ) + return 0; + } + + if( ptnode->left != AATREE_PTR_NIL ) + if( aatree_get_node( tree, ptnode->left )->parent != t ) + return 0; + if( ptnode->right != AATREE_PTR_NIL ) + if( aatree_get_node( tree, ptnode->right )->parent != t ) + return 0; + + return aatree_verify_skew( tree, t ) && + aatree_verify_split( tree, t ); +} + + +static void aatree_show_r( aatree *tree, aatree_ptr t, int lvl, + void(*p_show)(void *data) ) +{ + if( t != AATREE_PTR_NIL ) + { + aatree_node *ptnode = aatree_get_node( tree, t ); + aatree_show_r( tree, ptnode->left, lvl+1, p_show ); + + void *data = aatree_get_data( tree, t ); + + for( int i=0; iright, lvl+1, p_show ); + } +} + +static void aatree_show( aatree *tree, aatree_ptr t, void(*p_show)(void *data)) +{ + if( t != AATREE_PTR_NIL ) + { + aatree_node *ptnode = aatree_get_node( tree, t ); + aatree_show( tree, ptnode->left, p_show ); + void *data = aatree_get_data( tree, t ); + + for( int i=0; ilevel; i++ ) + { + vg_log( " " ); + } + p_show( data ); + vg_log( " (%d) \n", t ); + + aatree_show( tree, ptnode->right, p_show ); + } +} + +static void aatree_show_counts( aatree *tree, aatree_ptr t, int lvl, int *ln, + int *err, + void(*p_show)(void *data), int show ) +{ + if( lvl > 20 ) + return; + if( t == AATREE_PTR_NIL ) return; + + aatree_node *ptnode = aatree_get_node( tree, t ); + void *data = aatree_get_data( tree, t ); + + aatree_show_counts( tree, ptnode->left, lvl+1, ln, err, p_show, show ); + + if( show ) vg_log( "%03d| ", *ln ); + *ln = *ln +1; + + if( show ) + for( int i=0; ileft != AATREE_PTR_NIL && ptnode->right != AATREE_PTR_NIL ) + printf( "|" ); + if( ptnode->left != AATREE_PTR_NIL && ptnode->right == AATREE_PTR_NIL ) + printf( "/" ); + if( ptnode->left == AATREE_PTR_NIL && ptnode->right != AATREE_PTR_NIL ) + printf( "\\" ); + + printf( " (%d, %d, parent: %d. V: %d, level: %d) \n", t, + ptnode->count, ptnode->parent, + aatree_verify( tree, t ), ptnode->level); + } + + if( !aatree_verify( tree, t ) ) + { + if( show ) + vg_log( "error\n" ); + *err = 1; + } + + aatree_show_counts( tree, ptnode->right, lvl+1, ln, err, p_show, show ); +} + +/* + * Pool allocator utility which can be placed in a union with regular aa nodes. + */ + +static aatree_ptr aatree_init_pool( aatree *info, u32 item_count ) +{ + for( aatree_ptr i=0; inext_free = AATREE_PTR_NIL; + else + pn->next_free = i+1; + } + + return 0; +} + +static aatree_ptr aatree_pool_alloc( aatree *info, aatree_ptr *head ) +{ + if( *head == AATREE_PTR_NIL ) + { + vg_error( "No nodes free in pool allocator!\n" ); + return AATREE_PTR_NIL; + } + else + { + aatree_ptr gap = *head; + *head = aatree_get_pool_node( info, *head )->next_free; + return gap; + } +} + +static void aatree_pool_free( aatree *info, aatree_ptr node, aatree_ptr *head ) +{ + aatree_pool_node *pn = aatree_get_pool_node( info, node ); + pn->next_free = *head; + *head = node; +} + +#endif /* VG_STORE_H */ diff --git a/vg_db.c b/vg_db.c new file mode 100644 index 0000000..69dec03 --- /dev/null +++ b/vg_db.c @@ -0,0 +1,569 @@ +#include "vg_db.h" +#include +#include + +static void vg_db_abort( vg_db *db, const char *message, ... ) +{ + fclose( db->fp ); + db->fp = NULL; + vg_fatal_condition(); + va_list args; + va_start( args, message ); + _vg_logx_va( stderr, NULL, "vg_db fatal", KRED, message, args ); + va_end( args ); + vg_fatal_exit(); +} + +static u32 vg_dbhash( u8 *buf, u32 len ) +{ + /* djb2 */ + u32 hash = 5381; + for( u32 i=0; ifp, 0, SEEK_END ) ) + vg_db_abort( db, "SEEK_END(0) failed\n" ); + u64 zero = 0; + for( u64 i=0; ifp ) ) + vg_db_abort( db, "fwrite failed\n" ); + return ftell( db->fp ) - VG_PAGE_SIZE; +} + +static void vg_db_sync_page( vg_db *db, u16 cache_id ) +{ + vg_db_page *page = &db->page_cache[ cache_id-1 ]; + if( page->unwritten ) + { + if( fseek( db->fp, page->physical_offset, SEEK_SET ) ) + vg_db_abort( db, "SEEK_SET(%lx) failed\n", page->physical_offset ); + void *page_data = db->page_data + (u64)(cache_id-1)*VG_PAGE_SIZE; + if( !fwrite( page_data, VG_PAGE_SIZE, 1, db->fp ) ) + vg_db_abort( db, "fwrite failed\n" ); + page->unwritten = 0; + } +} + +static u64 vg_db_skew( vg_db *db, u64 t_offset ) +{ + if( t_offset == 0 ) + return t_offset; + vg_db_address_node t_node; + vg_db_read( db, t_offset, &t_node, sizeof(t_node) ); + if( t_node.left_offset == 0 ) + return t_offset; + + u64 l_offset = t_node.left_offset; + vg_db_address_node l_node; + vg_db_read( db, l_offset, &l_node, sizeof(l_node) ); + if( l_node.level == t_node.level ) + { + t_node.left_offset = l_node.right_offset; + l_node.right_offset = t_offset; + vg_db_write( db, t_offset, &t_node, sizeof(t_node) ); + vg_db_write( db, l_offset, &l_node, sizeof(l_node) ); + return l_offset; + } + else return t_offset; +} + +static u64 vg_db_split( vg_db *db, u64 t_offset ) +{ + if( t_offset == 0 ) + return t_offset; + vg_db_address_node t_node; + vg_db_read( db, t_offset, &t_node, sizeof(t_node) ); + if( t_node.right_offset == 0 ) + return t_offset; + + u64 r_offset = t_node.right_offset; + vg_db_address_node r_node; + vg_db_read( db, r_offset, &r_node, sizeof(r_node) ); + if( r_node.right_offset == 0 ) + return t_offset; + + u64 rr_node_offset = r_node.right_offset; + vg_db_address_node rr_node; + vg_db_read( db, rr_node_offset, &rr_node, sizeof(rr_node) ); + if( t_node.level == rr_node.level ) + { + t_node.right_offset = r_node.left_offset; + r_node.left_offset = t_offset; + r_node.level ++; + vg_db_write( db, t_offset, &t_node, sizeof(t_node) ); + vg_db_write( db, r_offset, &r_node, sizeof(r_node) ); + return r_offset; + } + else return t_offset; +} + +static u64 vg_db_tree_insert( vg_db *db, u64 t_offset, u64 x_offset, u64 key ) +{ + if( t_offset == 0 ) + return x_offset; + vg_db_address_node t_node; + vg_db_read( db, t_offset, &t_node, sizeof(t_node) ); + if( t_node.key <= key ) + t_node.left_offset = vg_db_tree_insert( db, t_node.left_offset, x_offset, key ); + else + t_node.right_offset= vg_db_tree_insert( db, t_node.right_offset,x_offset, key ); + vg_db_write( db, t_offset, &t_node, sizeof(t_node) ); + t_offset = vg_db_skew( db, t_offset ); + t_offset = vg_db_split( db, t_offset ); + return t_offset; +} + +void vg_db_tree_map( vg_db *db, u64 tree_address, u64 key, u64 value ) +{ + vg_db_address_tree tree; + vg_db_read( db, tree_address, &tree, sizeof(tree) ); + + u64 cluster_offset = (tree.last_node_offset & ~(VG_PAGE_SIZE-1lu)); + vg_db_address_cluster cluster; + vg_db_read( db, cluster_offset, &cluster, sizeof(vg_db_address_cluster) ); + if( cluster.count == VG_ADDRESS_NODES_PER_CLUSTER ) + { + cluster.count = 0; + cluster_offset = vg_db_allocate_physical_page( db ); + } + + u64 new_node_offset = cluster_offset + sizeof(vg_db_address_cluster) + cluster.count*sizeof(vg_db_address_node); + cluster.count ++; + vg_db_write( db, cluster_offset, &cluster, sizeof(vg_db_address_cluster) ); + + vg_db_address_node new_node; + new_node.left_offset = 0; + new_node.right_offset = 0; + new_node.key = key; + new_node.value = value; + new_node.level = 0; + vg_db_write( db, new_node_offset, &new_node, sizeof(new_node) ); + + tree.last_node_offset = new_node_offset; + tree.root_node_offset = vg_db_tree_insert( db, tree.root_node_offset, new_node_offset, key ); + vg_db_write( db, tree_address, &tree, sizeof(tree) ); +} + +static void vg_db_touch( vg_db *db, u16 cache_id ) +{ + vg_db_page *page = &db->page_cache[ cache_id-1 ]; + /* unlink entirely */ + if( page->lru_younger ) + db->page_cache[ page->lru_younger-1 ].lru_older = page->lru_older; + else if( db->lru_young == cache_id ) + db->lru_young = page->lru_older; + if( page->lru_older ) + db->page_cache[ page->lru_older-1 ].lru_younger = page->lru_younger; + else if( db->lru_old == cache_id ) + db->lru_old = page->lru_younger; + /* re-link */ + page->lru_younger = 0; + page->lru_older = db->lru_young; + if( db->lru_young ) + { + page->lru_older = db->lru_young; + db->page_cache[ db->lru_young-1 ].lru_younger = cache_id; + } + db->lru_young = cache_id; + if( !db->lru_old ) + db->lru_old = cache_id; +} + +u64 vg_db_translate( vg_db *db, u64 tree_address, u64 key ) +{ + vg_db_address_tree tree; + vg_db_read( db, tree_address, &tree, sizeof(tree) ); + + u64 t_offset = tree.root_node_offset; + while( t_offset ) + { + vg_db_address_node t_node; + vg_db_read( db, t_offset, &t_node, sizeof(t_node) ); + + if( t_node.key == key ) + { + return t_node.value; + break; + } + else + { + if( t_node.key <= key ) + t_offset = t_node.left_offset; + else + t_offset = t_node.right_offset; + } + } + return 0; +} + +void vg_db_tree_init( vg_db *db, u64 tree_address ) +{ + vg_db_address_tree tree = {0}; + vg_db_read( db, tree_address, &tree, sizeof(tree) ); + if( tree.last_node_offset == 0 ) + { + tree.root_node_offset = 0; + tree.last_node_offset = vg_db_allocate_physical_page( db ); + vg_db_write( db, tree_address, &tree, sizeof(tree) ); + } +} + +void vg_db_skipper_init( vg_db *db, u64 skipper_address, u32 max_entries ) +{ + vg_db_skipper skipper; + vg_db_read( db, skipper_address, &skipper, sizeof(skipper) ); + if( skipper.skips_array_address == 0 ) + { + skipper.skips_array_address = vg_db_virtual_allocate( db, sizeof(vg_db_skip)*max_entries ); + skipper.sentry.height = 7; + for( u32 i=0; i<7; i ++ ) + skipper.sentry.links[i] = 0xffff; + vg_db_write( db, skipper_address, &skipper, sizeof(skipper) ); + } +} + +bool vg_db_skipper_find( vg_db *db, vg_skipper_context *ctx, u16 *out_index, void *comparand ) +{ + vg_db_skipper skipper; + vg_db_read( db, ctx->address, &skipper, sizeof(skipper) ); + + i32 level = skipper.sentry.height-1; + while( level>=0 ) + { + u16 next = skipper.sentry.links[level]; + if( next != 0xffff ) + { + i32 cmp = ctx->fn_compare( ctx, comparand, next ); + if( cmp == 0 ) + { + *out_index = next; + return 1; + } + else if( cmp > 0 ) + level --; + else + { + u64 skip_addr = skipper.skips_array_address + (u64)next*sizeof(vg_db_skip); + vg_db_read( db, skip_addr, &skipper.sentry, sizeof(vg_db_skip) ); + } + } + else level --; + } + return 0; +} + +void vg_db_skipper_placement( vg_db *db, vg_skipper_context *ctx, u16 item_index, void *comparand ) +{ + vg_db_skipper skipper; + vg_db_read( db, ctx->address, &skipper, sizeof(skipper) ); + + u16 path[7]; + i32 level = skipper.sentry.height-1; + u16 current_index = 0xffff; + + while( level>=0 ) + { + path[level] = current_index; + u16 next = skipper.sentry.links[level]; + if( next == 0xffff ) + level --; + else + { + i32 cmp = ctx->fn_compare( ctx, comparand, next ); + if( cmp >= 0 ) + level --; + else + { + current_index = next; + u64 skip_addr = skipper.skips_array_address + (u64)current_index*sizeof(vg_db_skip); + vg_db_read( db, skip_addr, &skipper.sentry, sizeof(vg_db_skip) ); + } + } + } + + vg_db_skip skip={0}; + skip.height = 1; + while( (skip.height < 7) && (vg_randu32( &db->rand ) & 0x1) ) + skip.height ++; + for( u32 i=0; i<7; i ++ ) + skip.links[i] = 0xffff; + for( i32 i=skip.height-1; i>=0; -- i ) + { + u64 prev_addr; + if( path[i] == 0xffff ) prev_addr = ctx->address+offsetof(vg_db_skipper,sentry); + else prev_addr = skipper.skips_array_address + (u64)path[i]*sizeof(vg_db_skip); + vg_db_skip prev; + vg_db_read( db, prev_addr, &prev, sizeof(prev) ); + skip.links[i] = prev.links[i]; + prev.links[i] = item_index; + vg_db_write( db, prev_addr, &prev, sizeof(prev) ); + } + u64 our_addr = skipper.skips_array_address + (u64)item_index*sizeof(vg_db_skip); + vg_db_write( db, our_addr, &skip, sizeof(skip) ); +} + +void vg_db_skipper_unplace( vg_db *db, vg_skipper_context *ctx, u16 item_index, void *comparand ) +{ + vg_db_skipper skipper; + vg_db_read( db, ctx->address, &skipper, sizeof(skipper) ); + + u16 path[7]; + i32 level = skipper.sentry.height-1; + u16 current_index = 0xffff; + + while( level>=0 ) + { + path[level] = current_index; + u16 next = skipper.sentry.links[level]; + if( (next == 0xffff) || (next == item_index) ) + level --; + else + { + i32 cmp = ctx->fn_compare( ctx, comparand, next ); + if( cmp > 0 ) + level --; + else + { + current_index = next; + u64 skip_addr = skipper.skips_array_address + (u64)current_index*sizeof(vg_db_skip); + vg_db_read( db, skip_addr, &skipper.sentry, sizeof(vg_db_skip) ); + } + } + } + + u64 our_addr = skipper.skips_array_address + (u64)item_index*sizeof(vg_db_skip); + vg_db_skip skip; + vg_db_read( db, our_addr, &skip, sizeof(vg_db_skip) ); + for( u32 i=0; iaddress+offsetof(vg_db_skipper,sentry); + else prev_addr = skipper.skips_array_address + (u64)path[i]*sizeof(vg_db_skip); + vg_db_skip prev; + vg_db_read( db, prev_addr, &prev, sizeof(prev) ); + prev.links[i] = skip.links[i]; + vg_db_write( db, prev_addr, &prev, sizeof(prev) ); + } +} + +void vg_db_skipper_replace( vg_db *db, vg_skipper_context *ctx, u16 item_index, void *comparand_old, void *comparand_new ) +{ + vg_db_skipper_unplace( db, ctx, item_index, comparand_old ); + vg_db_skipper_placement( db, ctx, item_index, comparand_new ); +} + +void vg_db_skipper_iter_start( vg_db *db, vg_skipper_context *ctx ) +{ + vg_db_skipper skipper; + vg_db_read( db, ctx->address, &skipper, sizeof(skipper) ); + ctx->iter_array_address = skipper.skips_array_address; + ctx->iter_index = skipper.sentry.links[0]; +} + +bool vg_db_skipper_iter( vg_db *db, vg_skipper_context *ctx, u16 *out_index ) +{ + u16 return_index = ctx->iter_index; + if( ctx->iter_index == 0xffff ) + return 0; + else + { + vg_db_skip skip; + u64 skip_addr = ctx->iter_array_address + (u64)ctx->iter_index*sizeof(vg_db_skip); + vg_db_read( db, skip_addr, &skip, sizeof(skip) ); + ctx->iter_index = skip.links[0]; + *out_index = return_index; + return 1; + } +} + +void vg_db_dumb_table_init( vg_db *db, u64 table_address, u32 structure_size, u32 max_entries ) +{ + vg_db_dumb_table table; + vg_db_read( db, table_address, &table, sizeof(table) ); + if( table.array_address == 0 ) + { + table.array_address = vg_db_virtual_allocate( db, structure_size*max_entries ); + table.max_entries = max_entries; + table.structure_size = structure_size; + vg_db_write( db, table_address, &table, sizeof(table) ); + } +} + +u16 vg_db_dumb_table_count( vg_db *db, u64 table_address ) +{ + u16 count; + vg_db_read( db, table_address + offsetof(vg_db_dumb_table,current_entries), &count, sizeof(count) ); + return count; +} + +u64 vg_db_dumb_table_get( vg_db *db, u64 table_address, u16 index ) +{ + vg_db_dumb_table table; + vg_db_read( db, table_address, &table, sizeof(table) ); + return table.array_address + (u64)index * (u64)table.structure_size; +} + +u64 vg_db_dumb_table_append( vg_db *db, u64 table_address ) +{ + vg_db_dumb_table table; + vg_db_read( db, table_address, &table, sizeof(table) ); + if( table.current_entries < table.max_entries ) + { + u64 address = table.array_address + (u64)table.current_entries * (u64)table.structure_size; + table.current_entries ++; + vg_db_write( db, table_address, &table, sizeof(table) ); + return address; + } + else return 0; +} + +static void *vg_db_devirtualize( vg_db *db, u64 address, bool write ) +{ + u64 page_base = address & ~(VG_PAGE_SIZE-1lu), + inner_offset = address & (VG_PAGE_SIZE-1lu); + + /* Check hash table for our page */ + u32 hash = vg_dbhash( (void *)(&page_base), sizeof(page_base) ) & (VG_PAGE_CACHE_HASH_WIDTH-1u); + u16 current = db->hash_table[ hash ]; + while( current ) + { + vg_db_page *page = &db->page_cache[ current-1 ]; + if( page->virtual_id == page_base ) + { + page->unwritten |= write; + vg_db_touch( db, current ); + return db->page_data + ((u64)(current-1)*VG_PAGE_SIZE + inner_offset); + } + else + current = page->hash_prev; + } + + /* Translate address & create page if need be */ + u64 translated_page_base = page_base; + if( address & VG_VIRTUAL_ADDRESS_BIT ) + { + u64 tree_address = offsetof(vg_db_header,address_tree); + translated_page_base = vg_db_translate( db, tree_address, page_base ); + if( translated_page_base == 0 ) + { + u64 new_page = vg_db_allocate_physical_page( db ); + vg_db_tree_map( db, tree_address, page_base, new_page ); + translated_page_base = new_page; + } + } + + /* Allocate cache ID */ + u16 cache_id = 0; + vg_db_page *page = NULL; + if( db->cache_count < VG_MAX_CACHED_PAGES ) + { + cache_id = ++db->cache_count; + page = &db->page_cache[ cache_id-1 ]; + memset( page, 0, sizeof(vg_db_page) ); + } + else + { + cache_id = db->lru_old; + vg_db_sync_page( db, cache_id ); + page = &db->page_cache[ cache_id-1 ]; + u32 old_hash = vg_dbhash( (void *)(&page->virtual_id), sizeof(page->virtual_id) ) & (VG_PAGE_CACHE_HASH_WIDTH-1u); + u16 current = db->hash_table[ old_hash ], before = 0; + while( current != cache_id ) + { + before = current; + current = db->page_cache[ current-1 ].hash_prev; + } + if( before ) + db->page_cache[ before-1 ].hash_prev = page->hash_prev; + else + db->hash_table[ old_hash ] = 0; + } + page->hash_prev = db->hash_table[ hash ]; + db->hash_table[ hash ] = cache_id; + vg_db_touch( db, cache_id ); + page->virtual_id = page_base; + page->physical_offset = translated_page_base; + page->unwritten = write; + + /* read into memory */ + void *page_data = db->page_data + (u64)(cache_id-1)*VG_PAGE_SIZE; + if( fseek( db->fp, translated_page_base, SEEK_SET ) ) + vg_db_abort( db, "SEEK_SET (%lx) failed\n", translated_page_base ); + if( !fread( page_data, VG_PAGE_SIZE, 1, db->fp ) ) + vg_db_abort( db, "fread page failed\n" ); + return page_data + inner_offset; +} + +void vg_db_xch( vg_db *db, u64 base_address, void *buf, u32 length, bool write ) +{ + u64 address = base_address, + end = base_address + (u64)length; + + while( address != end ) + { + u64 byte_count = VG_PAGE_SIZE - (address & (VG_PAGE_SIZE-1lu)); + if( address + byte_count > end ) + byte_count = end - address; + + void *cache_buffer = vg_db_devirtualize( db, address, write ), + *user_buffer = buf + (address-base_address); + if( write ) memcpy( cache_buffer, user_buffer, byte_count ); + else memcpy( user_buffer, cache_buffer, byte_count ); + address += byte_count; + } +} + +void vg_db_open( vg_db *db, const char *path ) +{ + u32 k_ident = 0xf32b1a00; + vg_rand_seed( &db->rand, k_ident + time(NULL) ); + db->fp = fopen( path, "rb+" ); + db->page_data = malloc( VG_PAGE_SIZE*VG_MAX_CACHED_PAGES ); + db->cache_count = 0; + db->lru_old = 0; + db->lru_young = 0; + if( db->fp ) + { + u32 ident; + vg_db_read( db, 0, &ident, 4 ); + if( ident != k_ident ) + vg_db_abort( db, "Ident not found in db file '%s'\n", path ); + vg_db_read( db, offsetof(vg_db_header,userdata_address), &db->userdata_address, 8 ); + } + else + { + db->fp = fopen( path, "wb+" ); + if( !db->fp ) + vg_db_abort( db, "fopen(wb+) failed for '%s'\n", path ); + vg_db_allocate_physical_page( db ); // Allocate header as 0 + vg_db_write( db, offsetof(vg_db_header,ident), &k_ident, 4 ); + db->userdata_address = vg_db_virtual_allocate( db, VG_1GB ); + vg_db_write( db, offsetof(vg_db_header,userdata_address), &db->userdata_address, 8 ); + vg_db_tree_init( db, offsetof(vg_db_header,address_tree) ); + } +} + +void vg_db_close( vg_db *db ) +{ + for( u32 i=0; ifp ); + db->fp = NULL; + free( db->page_data ); + db->page_data = NULL; +} + +u64 vg_db_virtual_allocate( vg_db *db, u64 bytes ) +{ + u64 page_count = 0; + vg_db_read( db, 0x0lu + offsetof(vg_db_header,virtual_pages), &page_count, sizeof(page_count) ); + u64 pages = (bytes + (VG_PAGE_SIZE-1lu)) >> VG_PAGE_BITS, + addr = page_count * VG_PAGE_SIZE; + page_count += pages; + vg_db_write( db,0x0lu + offsetof(vg_db_header,virtual_pages), &page_count, sizeof(page_count) ); + return VG_VIRTUAL_ADDRESS_BIT | addr; +} diff --git a/vg_db.h b/vg_db.h new file mode 100644 index 0000000..ffade0c --- /dev/null +++ b/vg_db.h @@ -0,0 +1,124 @@ +#pragma once +#include "vg_m.h" + +#define VG_PAGE_BITS 11 +#define VG_PAGE_SIZE (0x1lu<