diff --git a/cmd/radFS/arttest/main.go b/cmd/radFS/arttest/main.go new file mode 100644 index 0000000..1d801ac --- /dev/null +++ b/cmd/radFS/arttest/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "fmt" + + "github.com/acmpesuecc/radFS/internal/art" +) + +func main() { + tree := &art.Tree{} + + for i := 0; i < 50; i++ { + key := []byte("ca" + string(byte(i+97))) + tree.Insert(key, fmt.Sprintf("val%d", i)) + } + art.PrintTree(tree.Root(), 0, 0) +} diff --git a/docs/weekly/angelo/week_4.md b/docs/weekly/angelo/week_4.md new file mode 100644 index 0000000..db281f3 --- /dev/null +++ b/docs/weekly/angelo/week_4.md @@ -0,0 +1,8 @@ +## Angelo's Progress +* implemented the insert function +* implemented update function in addchild + + +## What's next? +* implement grow to transition to larger nodes +* fix the bugs in insert diff --git a/internal/art/art.go b/internal/art/art.go new file mode 100644 index 0000000..0e1f922 --- /dev/null +++ b/internal/art/art.go @@ -0,0 +1,23 @@ +package art + +// TODO: Public API (Tree struct, Insert, Search, Delete) + +type Tree struct { + root *Node +} + +func (t *Tree) Root() *Node { + return t.root +} + +func (t *Tree) Insert(key []byte, value string) { + t.root = insert(t.root, value, key, 0) +} + +func (t *Tree) Search(key []byte) (string, bool) { + leaf := search(t.root, key, 0) // start from root and depth 0 + if leaf != nil && isleaf(leaf) { + return leaf.leaf.values, true //Node->innerleaf->values + } + return "", false +} diff --git a/internal/art/insert.go b/internal/art/insert.go new file mode 100644 index 0000000..3008c40 --- /dev/null +++ b/internal/art/insert.go @@ -0,0 +1,114 @@ +package art + +func insert(n *Node, value string, key []byte, depth int) *Node { + + if n == nil { + return newleaf(value, key) + } + if isleaf(n) { + new_node := newNode4() + oldkey := n.leaf.key + i := depth + + for i < len(oldkey) && i < len(key) && oldkey[i] == key[i] { + prefix_index := i - depth + if prefix_index < maxprefixlen { + new_node.innerNode.meta.prefix[prefix_index] = key[i] // stores only the till max prefix + + } + + i++ + } + + new_node.innerNode.meta.prefixlen = i - depth // stores full prefix len even after maxprefixlen + depth = i + if depth == len(key) { + new_node.innerNode.leaf = newleaf(value, key) + + } else { + new_node = addchild(new_node, key[depth], newleaf(value, key)) + + } + if depth == len(oldkey) { + new_node.innerNode.leaf = n + + } else { + new_node = addchild(new_node, oldkey[depth], n) + + } + + return new_node + + } + p := checkprefix(n, key, depth) + + if p != n.innerNode.meta.prefixlen { + + new_node := newNode4() + if p+depth == len(key) { + new_node.innerNode.leaf = newleaf(value, key) + + } else { + new_node = addchild(new_node, key[depth+p], newleaf(value, key)) + + } + leaf := fetchleaf(n) // either its an actual leaf or innernode leaf + oldkey := leaf.leaf.key + + var oldkeybyte byte + if p < maxprefixlen { + oldkeybyte = n.innerNode.meta.prefix[p] + } else { + oldkeybyte = oldkey[depth+p] + } + + new_node = addchild(new_node, oldkeybyte, n) + + new_node.innerNode.meta.prefixlen = p + if p < maxprefixlen { + new_node.innerNode.meta.prefix = deepcopy(n.innerNode.meta.prefix[:p]) + + } else { + new_node.innerNode.meta.prefix = deepcopy(n.innerNode.meta.prefix[:maxprefixlen]) + } + + oldprefixlen := n.innerNode.meta.prefixlen + n.innerNode.meta.prefixlen = oldprefixlen - (p + 1) + if oldprefixlen < maxprefixlen { + n.innerNode.meta.prefix = deepcopy(n.innerNode.meta.prefix[p+1 : oldprefixlen]) + + } else { + leaf := fetchleaf(n) + leafKey := leaf.leaf.key + start := depth + p + 1 + if start >= len(leafKey) { + n.innerNode.meta.prefix = []byte{} + } else { + end := start + maxprefixlen + if end > len(leafKey) { + end = len(leafKey) + } + n.innerNode.meta.prefix = deepcopy(leafKey[start:end]) + } + } + + return new_node + } + + depth += n.innerNode.meta.prefixlen + if depth == len(key) { + n.innerNode.leaf = newleaf(value, key) + return n + } + next, pos := findchild(key[depth], n) + if next != nil { + n.innerNode.children[pos] = insert(next, value, key, depth+1) + return n + + } else { + n = addchild(n, key[depth], newleaf(value, key)) + return n + + } + +} diff --git a/internal/art/leaf.go b/internal/art/leaf.go new file mode 100644 index 0000000..87074f1 --- /dev/null +++ b/internal/art/leaf.go @@ -0,0 +1,18 @@ +package art + +// TODO: Leaf node structure for storing values + +type leaf struct { + key []byte + values string +} + +func newleaf(value string, key []byte) *Node { + return &Node{ + leaf: &leaf{key: key, values: value}, + } + +} +func isleaf(n *Node) bool { + return n.leaf != nil +} diff --git a/internal/art/node.go b/internal/art/node.go new file mode 100644 index 0000000..b381512 --- /dev/null +++ b/internal/art/node.go @@ -0,0 +1,41 @@ +package art + +// TODO: Interfaces and shared node header (meta) + +type NodeType int + +const ( + Node4 NodeType = iota + Node16 + Node48 + Node256 +) +const ( + Node4max = 4 + Node16Max = 16 + Node48Max = 48 + Node256Max = 256 + + maxprefixlen = 8 +) + +type Node struct { + innerNode *innerNode + leaf *leaf +} + +type innerNode struct { + nodeType NodeType + keys []byte + children []*Node + leaf *Node + + num_children int + + meta meta +} + +type meta struct { + prefix []byte + prefixlen int +} diff --git a/internal/art/node16.go b/internal/art/node16.go new file mode 100644 index 0000000..a931480 --- /dev/null +++ b/internal/art/node16.go @@ -0,0 +1,15 @@ +package art + +// TODO: Node16 implementation +func newNode16() *Node { + in := &innerNode{ + nodeType: Node16, + keys: make([]byte, Node16Max), + children: make([]*Node, Node16Max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/node256.go b/internal/art/node256.go new file mode 100644 index 0000000..0a891fc --- /dev/null +++ b/internal/art/node256.go @@ -0,0 +1,16 @@ +package art + +// TODO: Node256 implementation (direct map) + +func newNode256() *Node { + in := &innerNode{ + nodeType: Node256, + children: make([]*Node, Node256Max), + num_children: 0, + + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/node4.go b/internal/art/node4.go new file mode 100644 index 0000000..aa996b8 --- /dev/null +++ b/internal/art/node4.go @@ -0,0 +1,15 @@ +package art + +func newNode4() *Node { + in := &innerNode{ + nodeType: Node4, + keys: make([]byte, Node4max), + children: make([]*Node, Node4max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} + +} diff --git a/internal/art/node48.go b/internal/art/node48.go new file mode 100644 index 0000000..f99a5ca --- /dev/null +++ b/internal/art/node48.go @@ -0,0 +1,15 @@ +package art + +// TODO: Node48 implementation (indirection layer) +func newNode48() *Node { + in := &innerNode{ + nodeType: Node48, + keys: make([]byte, Node256Max), + children: make([]*Node, Node48Max), + num_children: 0, + meta: meta{ + prefix: make([]byte, maxprefixlen), + }, + } + return &Node{innerNode: in} +} diff --git a/internal/art/print_tree.go b/internal/art/print_tree.go new file mode 100644 index 0000000..6f78ff3 --- /dev/null +++ b/internal/art/print_tree.go @@ -0,0 +1,73 @@ +package art + +import "fmt" + +func PrintTree(n *Node, level int, depth int) { + if n == nil { + return + } + + indent := "" + for i := 0; i < level; i++ { + indent += " " + } + + if isleaf(n) { + fmt.Println(indent + "Leaf: " + string(n.leaf.key)) + return + } + + in := n.innerNode + + prefixlen := in.meta.prefixlen + prefix := "" + if prefixlen <= maxprefixlen { + prefix = string(in.meta.prefix[:prefixlen]) + + } else { + leaf := fetchleaf(n) + prefix = string(leaf.leaf.key[depth : depth+prefixlen]) + } + + fmt.Println(indent+"Node(prefix=\""+prefix+"\", prefixLen=", prefixlen, ")") + + if in.leaf != nil { + fmt.Printf("%s [Internal Leaf]: %s\n", indent, string(in.leaf.leaf.key)) + } + // Print children + + newDepth := depth + prefixlen + + switch in.nodeType { + case Node4, Node16: + for i := 0; i < in.num_children; i++ { + key := in.keys[i] + child := in.children[i] + + fmt.Printf("%s Edge('%c' | %d):\t", indent, key, key) + PrintTree(child, level+1, newDepth+1) + } + + case Node48: + for b := 0; b < 256; b++ { + idx := in.keys[b] + + if idx != 0 { + + child := in.children[idx-1] + + fmt.Printf("%s Edge('%c' | %d):\t", indent, byte(b), b) + PrintTree(child, level+1, newDepth+1) + } + } + + case Node256: + for b := 0; b < 256; b++ { + child := in.children[b] + if child != nil { + fmt.Printf("%s Edge('%c' | %d):\t", indent, byte(b), b) + PrintTree(child, level+1, newDepth+1) + } + } + } +} diff --git a/internal/art/search.go b/internal/art/search.go new file mode 100644 index 0000000..3ca8dd3 --- /dev/null +++ b/internal/art/search.go @@ -0,0 +1,41 @@ +package art + +func search(n *Node, key []byte, depth int) *Node { + // Base case: nil node means we've reached a dead end. + // Key does not exist in this path of the tree. + if n == nil { + return nil + } + + // Reached a leaf node, do a full key comparison. + // Necessary because path compression may have skipped bytes. + if isleaf(n) { + if string(n.leaf.key) == string(key) { + return n + } + return nil + } + + // Check if the compressed prefix at this node matches the search key. + // If any byte mismatches, the entire subtree is irrelevant. + if n.innerNode.meta.prefixlen > 0 { + p := checkprefix(n, key, depth) + if p != n.innerNode.meta.prefixlen { + return nil + } + depth += n.innerNode.meta.prefixlen + } + + // Get the next byte to branch on at current depth. + // Returns 0 (terminator) if key is exhausted. + k := key[depth] + + // Find the child corresponding to byte k and recurse deeper. + // Return nil if no child exists for this byte. + next, _ := findchild(k, n) + if next != nil { + return search(next, key, depth+1) + } + + return nil +} diff --git a/internal/art/util.go b/internal/art/util.go new file mode 100644 index 0000000..c0849b0 --- /dev/null +++ b/internal/art/util.go @@ -0,0 +1,231 @@ +package art + +// TODO: Helper functions (e.g., prefix matching) +func addchild(n *Node, k byte, child *Node) *Node { + in := n.innerNode + + child1, pos1 := findchild(k, n) // to prevent duplicate insertions + if child1 != nil { + in.children[pos1] = child + return n + } + switch n.innerNode.nodeType { + case Node16, Node4: + if n.innerNode.num_children == len(in.keys) { + n = grow(n) + + return addchild(n, k, child) + + } + var i int + for i = in.num_children - 1; i >= 0 && k < in.keys[i]; i-- { //shifts until keybyte place is found + in.keys[i+1] = in.keys[i] + in.children[i+1] = in.children[i] + + } + + in.keys[i+1] = k + in.children[i+1] = child + in.num_children++ + + return n + case Node48: + if n.innerNode.num_children == len(in.children) { + n = grow(n) + + return addchild(n, k, child) + + } + if in.keys[k] != 0 { // if key exist then update + key := int(n.innerNode.keys[k]) - 1 // the zero slot is used to check if its an empty key so we start filling the index values in key from 1 + n.innerNode.children[key] = child + return n + + } + + for i := 0; i < len(in.children); i++ { // find the free child + if in.children[i] == nil { + in.children[i] = child + in.keys[k] = byte(i + 1) + in.num_children++ + + break + + } + + } + case Node256: + if in.children[k] != nil { //update key + in.children[k] = child + return n + + } + in.children[k] = child //inserting new key + in.num_children++ + + } + return n + +} +func checkprefix(n *Node, key []byte, depth int) int { + in := n.innerNode + var i int + maxcmp := min(maxprefixlen, in.meta.prefixlen) + + for i = 0; i < maxcmp && depth+i < len(key); i++ { //checks prefix until mismatch + if in.meta.prefix[i] != key[depth+i] { + return i // case when you find mismatch and the mismatch is less than maxprefixlen + + } + + } + if in.meta.prefixlen > maxprefixlen { + leaf := fetchleaf(n) + leafkey := leaf.leaf.key + for ; i < in.meta.prefixlen && depth+i < len(leafkey) && depth+i < len(key); i++ { + if key[depth+i] != leaf.leaf.key[depth+i] { + return i // case when you find mismatch and the mismatch is more than maxprefixlen + + } + + } + + } + + return i // case when you find mismatch and the mismatch is equal maxprefixlen + +} +func findchild(k byte, n *Node) (*Node, int) { + in := n.innerNode + switch in.nodeType { + case Node4, Node16: + for i := 0; i < in.num_children; i++ { + if in.keys[i] == k { + return in.children[i], i //finds the node and the position + } + + } + case Node48: + idx := in.keys[k] + if idx > 0 { + realindex := int(idx - 1) + return in.children[realindex], realindex + + } + case Node256: + if in.children[k] != nil { + return in.children[k], int(k) + } + + } + return nil, -1 + +} + +func grow(n *Node) *Node { + switch n.innerNode.nodeType { + case Node4: + n16 := newNode16() + copymeta(n, n16) + index := 0 + for i := 0; i < 4; i++ { + if n.innerNode.children[i] != nil { + n16.innerNode.keys[index] = n.innerNode.keys[i] + n16.innerNode.children[index] = n.innerNode.children[i] + index++ + + } + + } + n16.innerNode.num_children = index + return n16 + case Node16: + n48 := newNode48() + copymeta(n, n48) + index := 0 + for i := 0; i < n.innerNode.num_children; i++ { + idx := n.innerNode.keys[i] + child := n.innerNode.children[i] + + if child != nil { + n48.innerNode.keys[idx] = byte(index + 1) // the reason its index+1 is because we are making 0 a kind of "no children" case since arrays are automatically init to zero + + n48.innerNode.children[index] = child + index++ + + } + + } + n48.innerNode.num_children = index + return n48 + + case Node48: + n256 := newNode256() + copymeta(n, n256) + count := 0 + for i := 0; i < 256; i++ { + idx := n.innerNode.keys[i] + + if n.innerNode.keys[i] != 0 { + child := n.innerNode.children[int(idx-1)] + n256.innerNode.children[i] = child + count++ + } + + } + n256.innerNode.num_children = count + + return n256 + + } + return nil + +} +func copymeta(n *Node, new_node *Node) { + + new_node.innerNode.meta.prefixlen = n.innerNode.meta.prefixlen + new_node.innerNode.meta.prefix = deepcopy(n.innerNode.meta.prefix[:min(n.innerNode.meta.prefixlen, maxprefixlen)]) + new_node.innerNode.leaf = n.innerNode.leaf + +} + +func fetchleaf(n *Node) *Node { + if isleaf(n) { + return n + } + if n.innerNode.leaf != nil { + return n.innerNode.leaf + } + + in := n.innerNode + switch in.nodeType { + case Node4, Node16: + for i := 0; i < in.num_children; i++ { + if in.children[i] != nil { + return fetchleaf(in.children[i]) + } + } + case Node48: + for _, child := range in.children { + if child != nil { + return fetchleaf(child) + } + } + case Node256: + for i := 0; i < len(in.children); i++ { + if in.children[i] != nil { + return fetchleaf(in.children[i]) + } + } + } + return nil + +} + +func deepcopy(source []byte) []byte { + + desarr := make([]byte, maxprefixlen) + copy(desarr, source) + return desarr + +}