Java port of Quicksilver-style Live Search
Posted by Kelvin on 19 Nov 2012 at 06:04 pm | Tagged as: programming, Lucene / Solr / Elasticsearch / Nutch
Here's a straight Java port of the quicksilver algo, found here: http://orderedlist.com/blog/articles/live-search-with-quicksilver-style-for-jquery/
quicksilver.js contains the actual algorithm in javascript.
It uses the same input strings as the demo page at http://static.railstips.org/orderedlist/demos/quicksilverjs/jquery.html
import java.io.IOException; import java.util.TreeSet; public class Quicksilver { public static void main(String[] args) throws IOException { for (ScoreDoc doc : getScores("DGHTD")) System.out.println(doc); System.out.println("============================================"); for (ScoreDoc doc : getScores("Web")) System.out.println(doc); System.out.println("============================================"); for (ScoreDoc doc : getScores("jhn nnmkr")) System.out.println(doc); System.out.println("============================================"); for (ScoreDoc doc : getScores("wp")) System.out.println(doc); } public static TreeSet<ScoreDoc> getScores(String term) { term = term.toLowerCase(); TreeSet<ScoreDoc> scores = new TreeSet<ScoreDoc>(); for (int i = 0; i < cache.length; i++) { float score = score(cache[i], term, 0); if (score > 0) { scores.add(new ScoreDoc(score, i)); } } return scores; } public static float score(String str, String abbreviation, int offset) { // int offset ? offset : 0 // TODO: I think this is unused... remove if (abbreviation.length() == 0) return 0.9f; if (abbreviation.length() > str.length()) return 0.0f; for (int i = abbreviation.length(); i > 0; i--) { String sub_abbreviation = abbreviation.substring(0, i); int index = str.indexOf(sub_abbreviation); if (index < 0) continue; if (index + abbreviation.length() > str.length() + offset) continue; String next_string = str.substring(index + sub_abbreviation.length()); String next_abbreviation = null; if (i >= abbreviation.length()) next_abbreviation = ""; else next_abbreviation = abbreviation.substring(i); float remaining_score = score(next_string, next_abbreviation, offset + index); if (remaining_score > 0) { float score = str.length() - next_string.length(); if (index != 0) { int j = 0; char c = str.charAt(index - 1); if (c == 32 || c == 9) { for (j = (index - 2); j >= 0; j--) { c = str.charAt(j); score -= ((c == 32 || c == 9) ? 1 : 0.15); } // XXX maybe not port str heuristic // // } else if ([[NSCharacterSet uppercaseLetterCharacterSet] characterIsMember:[self characterAtIndex:matchedRange.location]]) { // for (j = matchedRange.location-1; j >= (int) searchRange.location; j--) { // if ([[NSCharacterSet uppercaseLetterCharacterSet] characterIsMember:[self characterAtIndex:j]]) // score--; // else // score -= 0.15; // } } else { score -= index; } } score += remaining_score * next_string.length(); score /= str.length(); return score; } } return 0.0f; } public static class ScoreDoc implements Comparable<ScoreDoc> { public float score; public int doc; public String term; public ScoreDoc(float score, int doc) { this.score = score; this.doc = doc; this.term = cache[doc]; } public int compareTo(ScoreDoc o) { if (o.score < score) return -1; if (o.score > score) return 1; return 0; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ScoreDoc scoreDoc = (ScoreDoc) o; if (doc != scoreDoc.doc) return false; if (Float.compare(scoreDoc.score, score) != 0) return false; return true; } @Override public int hashCode() { int result = (score != +0.0f ? Float.floatToIntBits(score) : 0); result = 31 * result + doc; return result; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("ScoreDoc"); sb.append("{score=").append(score); sb.append(", doc=").append(doc); sb.append(", term='").append(term).append('\''); sb.append('}'); return sb.toString(); } } public static String[] cache = new String[]{ "The Well-Designed Web", "Welcome John Nunemaker", "Sidebar Creative: The Next Steps", "The Web/Desktop Divide", "2007 in Review", "Don't Complicate the Solution", "Blog to Business", "Single Line CSS", "Comments Work Again", "The iPhone Effect", "Greek Blogger Camp", "FeedBurner FeedSmith", "Download Counter Update 1.3", "Branding Reworked", "Productivity and Fascination", "Passing the Torch", "Goodbye Austin", "Ordered Shirts", "Sidebar Creative", "Building the Modern Web", "Open for Business", "The Art and Science of CSS", "WP Tiger Administration v3.0", "Cleaning House", "Tiger Admin 3.0 Beta Testing", "Rails and MVC", "Updates and More", "FeedBurner Plugin v2.1 Released", "The Global Health Crisis", "WP FeedBurner v2.1 Beta", "Web Development and Information Technology", "On Becoming a Dad", "Tiger Admin and Shuttle", "Staying Small in a Big Place: Part 1", "WaSP eduTF Interview", "Planned Parenthood", "IE7 and Clearing Floats", "SXSWi 2006: Dan Gilbert - How To Do Exactly the Right Thing at All Possible Times", "SXSWi 2006: Traditional Design and New Technology", "SXSWi 2006: Almost There", "HOWTO: Animated Live Search", "Leaving Solo", "Tagged for Four Things", "Automotive Interface", "Another FeedBurner Plugin Update", "WP Tiger Admin 2.0", "WordPress FeedBurner Plugin for 2.0", "SXSWi 2006", "Statistical AJAX", "Semantics and Design", "Download Counter Update", "Best Buy, Worst Experience", "A Realign, or Whatever", "Stop with the Jargon", "10K+ for Tiger Plugin", "Flock and Integration", "Only the Beginning", "A Tip of the Hat", "3 Years", "Pepper: Download Counter", "Launch: Notre Dame College of Arts and Letters", "Innovation, Progress, and Imagination", "This Thing Here", "Ode", "Web Developer Opening", "WordPress Administration Design: Tiger", "SAJAX ColdFusion POST Request Method", "An Underscore Solution", "Google and the Underscore", "The Hand Off", "WordPress Upgrade and RSS", "WordPress FeedBurner Plugin", "Documentation Process", "WordPress Underscore Plugin", "CMS Release", "Two Suggestions for iTunes", "Call for Good Music", "A Change of Platform", "Point/Counterpoint: The Wrapper Div", "IE7 List, As Requested", "I'm a Switcher", "Breadcrumb Trails", "Output Code", "Bending the Matrix", "Children's Resource Group", "Do You Freelance?", "Project Management Software", "I Can't Stand It!", "Shiver Me Timbers!", "NDWG V1.0", "Dealing with IE5/Mac", "To All", "A Natural Progression", "Finishing the Basement", "Where Do You Live?", "The Recursion Project", "Clearing Floats: The FnE Method", "Ordered Zen", "Comment RSS", "Wordpress Code", "Writing Lean CSS", "v3.0 CMYK", "A Clean Slate", "Working for the Irish", "Excuse the Mess", "A Little Help", "Design Revisions", "Aloha", "FTOS Round 2", "I Love Storms", "One Gig?", "AD:TECH 2004 Chicago", "Thanks and Response", "OrderedList.com v2.0", "Skuzzy Standards", "Simple List", "Anger Management", "A Practical Start to Web Standards", "Irony and Progress", "The Familiar Chirping of Crickets", "Results of FTOS Round 1", "Figure This Out, Steve", "Increasing Developer Productivity", "One Down", "Content Management Your Way", "We Have Liftoff", "The Great Divide", "What's in a Name?", "Just How Important is Validation?"}; static{ for (int i = 0, n = cache.length; i < n; i++) { cache[i] = cache[i].toLowerCase(); } } }