StringTrie is a general tree where each node has one or more characters. The tree is used to search for string keywords and save the space of stored keywords. A ''trie'' is a data structure similar to but different to a tree. See http://www.cs.mcgill.ca/~cs251/OldCourses/1997/topic7/ for more detail about how a trie can be used for fast retrieval on large data sets such as those used in BioInformatics The term trie comes from the word "re'''trie'''val". Tries were introduced in the 1960s by Fredkin. ---- An optimization of a string trie is a DAWG (Directed Acyclic Word Graph), which removes duplication of common suffix sub-tries. For a full EnglishLanguage dictionary, it can reduce the overall node count by a factor of six. An example of its use is described in "The World's Fastest Scrabble Program", CommunicationsOfTheAssociationForComputingMachinery. See DirectedAcyclicGraph ---- '''Sample code in JavaLanguage''': /* Trie.java */ import java.util.*; /* * Note: To avoid the case that an entire string is a prefix of another one, each * key is appended by "\0" */ /* Implement compressed trie tree data structure. */ public class Trie { /* Define the object that indicates no such key was found. */ public static final Object NO_SUCH_KEY = "Object:NO_SUCH_KEY"; /* Represent data stored in each node */ private class Trie''''''Item implements Comparable { public int index, /* The index among 'strings' list */ begin, /* The begin of the substring */ end; /* The end of the substring */ /* The reference to the list of strings used in tries */ private List strings; /* Data associated this keyword that ends at this item. */ public Object element; /* Constructor */ Trie''''''Item (int index, int begin, int end, List strings, Object element) { this.index = index; this.begin = begin; this.end = end; this.strings = strings; this.element = element; } /* Return the substring that this item represents. */ public String getString () { return strings.get (index) .substring (begin, end + 1); } /* For debugging purpose */ public String toString () { return getString () + ":" + index + "," + begin + "," + end; } /* Implement comparable interface */ public int compareTo (Trie''''''Item o) { return getString ().compareTo (o.getString ()); } /* Make sure equals method consistent with compareTo method */ public boolean equals (Object o) { return o instanceof Trie''''''Item && compareTo (o) == 0; } } /* The number of keywords stored in trie */ private int m_size; /* The list of strings stored in trie */ private List m_strings; /* The root node of the trie tree */ private Node m_head; /* Constructor */ Trie () { m_strings = new Array''''''List (); m_head = new Node (null); /* There is always a root that has null element. */ m_size = 0; } /* Return the number of items */ public int size () { return m_size; } /* Test if this is empty */ public boolean isEmpty () { return size () == 0; } /* Find the longest node corresponding to key */ private Node find (Node node, String key) { for (Iterator i = node.children (); i.hasNext (); ) { /* Try a new lineage */ Node candidate = i.next (); String s = ((Trie''''''Item) candidate.element).getString (); if (key.startsWith (s)) { key = key.substring (s.length ()); if (key.length () > 0) { /* Seek this lineage. */ return find (candidate, key); } else { /* This lineage matches. */ return candidate; } } } /* This doesn't match */ return null; } /* If this contains an item with key equal to k, then return the element of such an item, else return null */ public Object findElement (String key) { key += "\0"; Node node = find (m_head, key); if (node != null) return ((Trie''''''Item) node.element).element; else return NO_SUCH_KEY; } /* Insert an item with element and key */ public void insertItem (String key, Object element) { key += "\0"; /* Make sure there is no corresponding key now. */ if (findElement (key) != NO_SUCH_KEY) return; /* Add new string */ m_strings.add (key); final int index = m_strings.size () - 1; Node node = m_head; for (int i = 0; i < key.length (); i++) { final Trie''''''Item item = new Trie''''''Item (index, i, i, m_strings, null); /* Find the child corresponding to a character. */ Node next = node.getChild (item); if (next == null) { /* If not found, add new one. */ next = new Node (item); node.add (next); } node = next; } /* Put element at the last node of the lineage */ ((Trie''''''Item) node.element).element = element; } /* Compress the subtree that the given node leads. Return the new subtree. */ private Node compress0 (Node node) { if (node.size () == 1) { /* The node can be compressed. */ /* * First, get the compressed the subtree. Be careful, most of time, it * is one node, possibly is a subtree. */ Node new_node = compress0 (node.children ().next ()); /* * Note: Be careful. Don't expand the string of the node * but the string of the node's child. */ Trie''''''Item child = (Trie''''''Item) new_node.element; assert child.begin > 0; --child.begin; return new_node; } else { /* The node cannot be compressed. */ /* Aggregate the compressed new children */ Node new_node = new Node (node.element); for (Iterator i = node.children (); i.hasNext (); ) { new_node.add (compress0 (i.next ())); } return new_node; } } /* Compress the trie tree */ public void compress () { /* Aggregate the compressed new children */ Node new_head = new Node (null); for (Iterator i = m_head.children (); i.hasNext (); ) { new_head.add (compress0 (i.next ())); } m_head = new_head; } /* For debugging purpose. Add up debug info of the ancestors of the node. */ private String ancestryToString (Node node, int indent) { String s = ""; for (int i = 0; i < indent; i++) s += " "; Trie''''''Item item = (Trie''''''Item) node.element; s += item + "\n"; for (Iterator i = node.children (); i.hasNext (); ) { s += ancestryToString (i.next (), indent + 1); } return s; } /* For debugging purpose. */ public String toString () { String s = ""; for (String x : m_strings) { s += x + "\n"; } for (Iterator i = m_head.children (); i.hasNext (); ) { s += ancestryToString (i.next (), 0); } return s; } } Where are the Node and TrieItem classes? ---- '''Sample Code in PythonLanguage:''' http://cvs.bioperl.org/cgi-bin/viewcvs/viewcvs.cgi/biopython/Bio/triefind.py?rev=1.4&cvsroot=biopython&content-type=text/vnd.viewcvs-markup ---- Compare with TernarySearchTree ---- CategoryDataStructure