diff --git a/cmd/radFS/arttest/main.go b/cmd/radFS/arttest/main.go new file mode 100644 index 0000000..6388106 --- /dev/null +++ b/cmd/radFS/arttest/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "github.com/acmpesuecc/radFS/internal/art" +) + +func main() { + tree := &art.Tree{} + + tree.Insert([]byte("abbbbbbbc"), "first") + tree.Insert([]byte("abbbbbbbcb"), "second") + tree.Insert([]byte("abbbbbbbcbc"), "second") + + art.PrintTree(tree.Root(), 0) +} diff --git a/internal/art/art.go b/internal/art/art.go new file mode 100644 index 0000000..0e1f922 --- /dev/null +++ b/internal/art/art.go @@ -0,0 +1,23 @@ +package art + +// TODO: Public API (Tree struct, Insert, Search, Delete) + +type Tree struct { + root *Node +} + +func (t *Tree) Root() *Node { + return t.root +} + +func (t *Tree) Insert(key []byte, value string) { + t.root = insert(t.root, value, key, 0) +} + +func (t *Tree) Search(key []byte) (string, bool) { + leaf := search(t.root, key, 0) // start from root and depth 0 + if leaf != nil && isleaf(leaf) { + return leaf.leaf.values, true //Node->innerleaf->values + } + return "", false +} diff --git a/internal/art/insert.go b/internal/art/insert.go new file mode 100644 index 0000000..1839d78 --- /dev/null +++ b/internal/art/insert.go @@ -0,0 +1,86 @@ +package art + +import "fmt" + +func insert(n *Node, value string, key []byte, depth int) *Node { + + if n == nil { + return newleaf(value, key) + } + if isleaf(n) { + new_node := newNode4() + oldkey := n.leaf.key + i := depth + fmt.Println("depth:", depth) + fmt.Println("oldkey[depth:]:", string(oldkey[depth:])) + fmt.Println("key[depth:]:", string(key[depth:])) + + for i < len(oldkey) && i < len(key) && oldkey[i] == key[i] { + prefix_index := i - depth + if prefix_index < maxprefixlen { //index goes till 7 so prefix index<8 and not ==8 + new_node.innerNode.meta.prefix[prefix_index] = key[i] // stores only the till max prefix + + } + + i++ + } + + new_node.innerNode.meta.prefixlen = i - depth // stores full prefix len even after maxprefixlen + depth = i + + new_node = addchild(new_node, keycheck(key, depth), newleaf(value, key)) + + new_node = addchild(new_node, keycheck(oldkey, depth), n) + fmt.Println("depth:", depth) + fmt.Println("oldkey[depth:]:", string(oldkey[depth:])) + fmt.Println("key[depth:]:", string(key[depth:])) + fmt.Println(" ") + fmt.Println("prefix", string(new_node.innerNode.meta.prefix)) + return new_node + + } + p := checkprefix(n, key, depth) + if p != n.innerNode.meta.prefixlen { + new_node := newNode4() + new_node = addchild(new_node, keycheck(key, depth+p), newleaf(value, key)) + if p < maxprefixlen { + new_node = addchild(new_node, n.innerNode.meta.prefix[p], n) + + } else { + leaf := fetchleaf(n) + new_node = addchild(new_node, keycheck(leaf.leaf.key, depth+p), n) // the logic is teh leaf will contain the full key with the same prefix + } + + new_node.innerNode.meta.prefixlen = p + if p < maxprefixlen { + copy(new_node.innerNode.meta.prefix, n.innerNode.meta.prefix[:p]) + + } else { + copy(new_node.innerNode.meta.prefix, n.innerNode.meta.prefix[:maxprefixlen]) + } + + oldprefixlen := n.innerNode.meta.prefixlen + n.innerNode.meta.prefixlen = n.innerNode.meta.prefixlen - (p + 1) + if len(n.innerNode.meta.prefix[p+1:oldprefixlen]) < maxprefixlen { + copy(n.innerNode.meta.prefix, n.innerNode.meta.prefix[p+1:oldprefixlen]) + + } else { + copy(n.innerNode.meta.prefix, n.innerNode.meta.prefix[p+1:maxprefixlen]) + } + + return new_node + } + + depth += n.innerNode.meta.prefixlen + next, pos := findchild(keycheck(key, depth), n) + if next != nil { + n.innerNode.children[pos] = insert(next, value, key, depth+1) + return n + + } else { + n = addchild(n, keycheck(key, depth), newleaf(value, key)) + return n + + } + +} diff --git a/internal/art/leaf.go b/internal/art/leaf.go new file mode 100644 index 0000000..87074f1 --- /dev/null +++ b/internal/art/leaf.go @@ -0,0 +1,18 @@ +package art + +// TODO: Leaf node structure for storing values + +type leaf struct { + key []byte + values string +} + +func newleaf(value string, key []byte) *Node { + return &Node{ + leaf: &leaf{key: key, values: value}, + } + +} +func isleaf(n *Node) bool { + return n.leaf != nil +} diff --git a/internal/art/node.go b/internal/art/node.go new file mode 100644 index 0000000..e41ecd5 --- /dev/null +++ b/internal/art/node.go @@ -0,0 +1,38 @@ +package art + +// TODO: Interfaces and shared node header (meta) + +type NodeType int + +const ( + Node4 NodeType = iota + Node16 + Node48 + Node256 +) +const ( + Node4max = 4 + Node16Max = 16 + Node48Max = 48 + Node256Max = 256 + + maxprefixlen = 8 +) + +type Node struct { + innerNode *innerNode + leaf *leaf +} + +type innerNode struct { + nodeType NodeType + keys []byte + children []*Node + num_children int + meta meta +} + +type meta struct { + prefix []byte + prefixlen int +} diff --git a/internal/art/node16.go b/internal/art/node16.go new file mode 100644 index 0000000..a931480 --- /dev/null +++ b/internal/art/node16.go @@ -0,0 +1,15 @@ +package art + +// TODO: Node16 implementation +func newNode16() *Node { + in := &innerNode{ + nodeType: Node16, + keys: make([]byte, Node16Max), + children: make([]*Node, Node16Max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/node256.go b/internal/art/node256.go new file mode 100644 index 0000000..0a891fc --- /dev/null +++ b/internal/art/node256.go @@ -0,0 +1,16 @@ +package art + +// TODO: Node256 implementation (direct map) + +func newNode256() *Node { + in := &innerNode{ + nodeType: Node256, + children: make([]*Node, Node256Max), + num_children: 0, + + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/node4.go b/internal/art/node4.go new file mode 100644 index 0000000..aa996b8 --- /dev/null +++ b/internal/art/node4.go @@ -0,0 +1,15 @@ +package art + +func newNode4() *Node { + in := &innerNode{ + nodeType: Node4, + keys: make([]byte, Node4max), + children: make([]*Node, Node4max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} + +} diff --git a/internal/art/node48.go b/internal/art/node48.go new file mode 100644 index 0000000..f99a5ca --- /dev/null +++ b/internal/art/node48.go @@ -0,0 +1,15 @@ +package art + +// TODO: Node48 implementation (indirection layer) +func newNode48() *Node { + in := &innerNode{ + nodeType: Node48, + keys: make([]byte, Node256Max), + children: make([]*Node, Node48Max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/print_tree.go b/internal/art/print_tree.go new file mode 100644 index 0000000..ccb9b3f --- /dev/null +++ b/internal/art/print_tree.go @@ -0,0 +1,42 @@ +package art + +import "fmt" + +func PrintTree(n *Node, level int) { + if n == nil { + return + } + + indent := "" + for i := 0; i < level; i++ { + indent += " " + } + + if isleaf(n) { + fmt.Println(indent + "Leaf: " + string(n.leaf.key)) + return + } + + in := n.innerNode + + prefixlen := in.meta.prefixlen + prefix := "" + if prefixlen < maxprefixlen { + prefix = string(in.meta.prefix[:prefixlen]) + + } else { + leaf := fetchleaf(n) + prefix = string(leaf.leaf.key) + } + + fmt.Println(indent+"Node(prefix=\""+prefix+"\", prefixLen=", prefixlen, ")") + + // Print children + for i := 0; i < len(in.keys); i++ { + if in.children[i] != nil { + fmt.Printf("%s Edge('%c' | %d):\t", indent, in.keys[i], in.keys[i]) + + PrintTree(in.children[i], level+1) + } + } +} diff --git a/internal/art/search.go b/internal/art/search.go new file mode 100644 index 0000000..34472f3 --- /dev/null +++ b/internal/art/search.go @@ -0,0 +1,41 @@ +package art + +func search(n *Node, key []byte, depth int) *Node { + // Base case: nil node means we've reached a dead end. + // Key does not exist in this path of the tree. + if n == nil { + return nil + } + + // Reached a leaf node, do a full key comparison. + // Necessary because path compression may have skipped bytes. + if isleaf(n) { + if string(n.leaf.key) == string(key) { + return n + } + return nil + } + + // Check if the compressed prefix at this node matches the search key. + // If any byte mismatches, the entire subtree is irrelevant. + if n.innerNode.meta.prefixlen > 0 { + p := checkprefix(n, key, depth) + if p != n.innerNode.meta.prefixlen { + return nil + } + depth += n.innerNode.meta.prefixlen + } + + // Get the next byte to branch on at current depth. + // Returns 0 (terminator) if key is exhausted. + k := keycheck(key, depth) + + // Find the child corresponding to byte k and recurse deeper. + // Return nil if no child exists for this byte. + next, _ := findchild(k, n) + if next != nil { + return search(next, key, depth+1) + } + + return nil +} diff --git a/internal/art/util.go b/internal/art/util.go new file mode 100644 index 0000000..6ade844 --- /dev/null +++ b/internal/art/util.go @@ -0,0 +1,159 @@ +package art + +// TODO: Helper functions (e.g., prefix matching) +func addchild(n *Node, k byte, child *Node) *Node { + in := n.innerNode + + child1, pos1 := findchild(k, n) + if child1 != nil { + in.children[pos1] = child + return n + } + + if n.innerNode.num_children == len(in.keys) { + n = grow(n) + in = n.innerNode + + } + + var i int + for i = in.num_children - 1; i >= 0 && in.keys[i] > k; i-- { + in.keys[i+1] = in.keys[i] + in.children[i+1] = in.children[i] + + } + + in.keys[i+1] = k + in.children[i+1] = child + in.num_children += 1 + + return n + +} +func checkprefix(n *Node, key []byte, depth int) int { + in := n.innerNode + var i int + maxcmp := min(maxprefixlen, in.meta.prefixlen) + + for i = 0; i < maxcmp; i++ { //checks prefix until mismatch + if in.meta.prefix[i] != keycheck(key, depth+i) { + return i // case when you find mismatch and the mismatch is less than maxprefixlen + + } + + } + if in.meta.prefixlen > maxprefixlen { + leaf := fetchleaf(n) + for ; i < in.meta.prefixlen && keycheck(key, depth+i) == keycheck(leaf.leaf.key, depth+i); i++ { + + } + return i // case when you find mismatch and the mismatch is more than maxprefixlen + + } + + return i // case when you find mismatch and the mismatch is equal maxprefixlen + +} +func findchild(k byte, n *Node) (*Node, int) { + in := n.innerNode + switch in.nodeType { + case Node4, Node16: + for i := 0; i < len(in.keys); i++ { + if in.keys[i] == k { + return in.children[i], i //finds the node and the position + } + + } + case Node48: + idx := in.keys[k] + if idx > 0 { + realindex := int(idx - 1) + return in.children[realindex], realindex + + } + case Node256: + if in.children[k] != nil { + return in.children[k], int(k) + } + + } + return nil, -1 + +} + +func keycheck(key []byte, depth int) byte { + if depth >= len(key) { + return 1 + + } else { + return key[depth] + } +} + +func grow(n *Node) *Node { + switch n.innerNode.nodeType { + case Node4: + n16 := newNode16() + copymeta(n, n16) + for i := 0; i < 4; i++ { + n16.innerNode.keys[i] = n.innerNode.keys[i] + n16.innerNode.children[i] = n.innerNode.children[i] + } + return n16 + case Node16: + n48 := newNode48() + copymeta(n, n48) + index := 0 + for i := 0; i < 16; i++ { + idx := n.innerNode.keys[i] + child := n.innerNode.children[i] + + if child != nil { + n48.innerNode.keys[idx] = byte(index + 1) // the reason its index+1 is because we are making 0 a kind of "no children" case + + n48.innerNode.children[index] = child + index++ + + } + + } + return n48 + + case Node48: + n256 := newNode256() + copymeta(n, n256) + for i := 0; i < 256; i++ { + idx := n.innerNode.keys[i] + + if n.innerNode.keys[i] != 0 { + child := n.innerNode.children[int(idx-1)] + n256.innerNode.children[i] = child + } + + } + return n256 + + } + return nil + +} +func copymeta(n *Node, new_node *Node) { + new_node.innerNode.meta.prefix = n.innerNode.meta.prefix + new_node.innerNode.meta.prefixlen = n.innerNode.meta.prefixlen + +} + +func fetchleaf(n *Node) *Node { + if isleaf(n) { + return n + } + for i := 0; i < len(n.innerNode.keys); i++ { + if n.innerNode.children[i] != nil { + return fetchleaf(n.innerNode.children[i]) + + } + + } + return nil + +}