Spatial queries are getting more and more important e.g. if you want a query all restaurants in a distance of 5 kilometers around your actual position. This is not easy because our planet is a sphere (so this requires a projection of locations) and spatial calculations are very resource consuming. Thats why Lucene implemented a smart technical approach to realize that. It is based on so called “tiers” where a preselection of bounding boxes is possible. (-> If you want to know more read this)
I evaluated this to provide a fast way to find GeoTIFF images stored in a Hadoop HBase database based on the zoom extend of a Client GIS window. The first thing I did was to implement the usual distance exemple in (simple and as usual brief) Scala.
Here we go:
object MyMain { val s = new SpatialLuceneExample def main(a: Array[String]) = { addData s.forEachFindNear(null, 38.8725000, -77.3829000, 4) { (doc, latitude, longitude, distance) => val name = doc.get("name") printf(name + ": %.2f Miles\n", distance) println("\t\t(" + latitude + "," + longitude + ")") } println for ((doc, latitude, longitude, distance) <- s.findNear(null, -0.546562, 9.459229, 4000)) { val name = doc.get("name") printf(name + ": %.2f Miles\n", distance) println("\t\t(" + latitude + "," + longitude + ")") } } def addData = s.fill { s => s += ("McCormick & Schmick's Seafood Restaurant", 38.9579000, -77.3572000) s += ("Jimmy's Old Town Tavern", 38.9690000, -77.862000) s += ("Ned Devine's", 38.9510000, -77.4107000) s += ("Old Brogue Irish Pub", 38.9955000, -77.2884000) s += ("Alf Laylah Wa Laylah", 38.8956000, -77.4258000) s += ("Sully's Restaurant & Supper", 38.9003000, -77.4467000) s += ("TGIFriday", 38.8725000, -77.3829000) s += ("Potomac Swing Dance Club", 38.9027000, -77.2639000) s += ("White Tiger Restaurant", 38.9027000, -77.2638000) s += ("Jammin' Java", 38.9039000, -77.2622000) s += ("Potomac Swing Dance Club", 38.9027000, -77.2639000) s += ("WiseAcres Comedy Club", 38.9248000, -77.2344000) s += ("Glen Echo Spanish Ballroom", 38.9691000, -77.1400000) s += ("Whitlow's on Wilson", 38.8889000, -77.0926000) s += ("Iota Club and Cafe", 38.8890000, -77.0923000) s += ("Hilton Washington Embassy Row", 38.9103000, -77.0451000) s += ("HorseFeathers, Bar & Grill", 39.01220000000001, -77.3942) s += ("Marshall Island Airfield", 7.06, 171.2) s += ("Wonga Wongue Reserve, Gabon", -0.546562, 9.459229) s += ("Midway Island", 25.7, -171.7) s += ("North Pole Way", 55.0, 4.0) } }
The MyMain
object (complete code above) provides method addData
to fill the spatial content, restaurants in this case. The main method itself does two distance queries, one using a closure and one using an Array
as return value that is used in a for statement. Pretty convenient, isn’t it?
class SpatialLuceneExample { . . . def fill(f: SpatialLuceneExample => Unit) = { f(this) writer.close } def +=(name: String, lat: Double, lng: Double): Unit = { val doc = new Document doc.add(new Field("name", name, Field.Store.YES, Field.Index.ANALYZED)) doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat), Field.Store.YES, Field.Index.NOT_ANALYZED)) doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng), Field.Store.YES, Field.Index.NOT_ANALYZED)) doc.add(new Field("metafile", "doc", Field.Store.YES, Field.Index.ANALYZED)) val projector = new SinusoidalProjector val maxMiles = 40000 val minMiles = 1 val ctp = new CartesianTierPlotter(0, projector, tierPrefix) val startTier = ctp.bestFit(maxMiles) val endTier = ctp.bestFit(minMiles) for (tier <- startTier to endTier) { val ctp = new CartesianTierPlotter(tier, projector, tierPrefix) val boxId = ctp.getTierBoxId(lat, lng) doc.add(new Field(ctp.getTierFieldName(), NumericUtils.doubleToPrefixCoded(boxId), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)) } writer.addDocument(doc) } }
The addData
method uses method +=
to add the spatial content. It adds the latitude and longitude locations to the Lucene Document and creates the correct tiers (which are calculated with maxMiles
and minMiles
).
class SpatialLuceneExample { . . . def findNear(what: String, latitude: Double, longitude: Double, radius: Double) = { val searcher = new IndexSearcher(directory) val dq = new DistanceQueryBuilder(latitude, longitude, radius, latField, lngField, tierPrefix, true) val tq = new TermQuery(if (what == null) new Term("metafile", "doc") else new Term("name", what)) val dsort = new DistanceFieldComparatorSource(dq.getDistanceFilter) val sort = new Sort(new SortField("foo", dsort, false)) val hits = searcher.search(tq, dq.getFilter, 10, sort) val numResults = hits.totalHits val distances = dq.getDistanceFilter.getDistances println("Number of results: " + numResults) val ar = new Array[(Document, Double, Double, Double)](numResults) var i = 0 for (scoreDoc <- hits.scoreDocs) { val docID = scoreDoc.doc val d = searcher.doc(docID) val rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)) val rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)) val geo_distance = distances.get(docID) ar(i) = ((d, rsLat, rsLng, geo_distance.asInstanceOf[Double])) i = i + 1 } ar } }
Method findNear
provides the search. It uses a distance sort and an Array
to return the found Document
, the location values and the distance.
The Rest of the class SpatialLuceneExample
is:
class SpatialLuceneExample { val latField = "lat" val lngField = "lon" val tierPrefix = "_localTier" val directory = new RAMDirectory val writer = new IndexWriter(directory, new WhitespaceAnalyzer(), MaxFieldLength.UNLIMITED) def fill(f: SpatialLuceneExample => Unit) = . . . // see above def +=(name: String, lat: Double, lng: Double): Unit = . . . // see above def forEachFindNear(what: String, latitude: Double, longitude: Double, radius: Double) (f: (Document, Double, Double, Double) => Unit) = for ((doc, lat, lng, dst) <- findNear(what, latitude, longitude, radius)) { f(doc, lat, lng, dst.asInstanceOf[Double]) } def findNear(what: String, latitude: Double, longitude: Double, radius: Double) = . . . // see above
I think Lucene provides a rather simple API to do spatial distance search.
And didn't I say this already: Scala is awesome.
“… two distance queries, one using a clojure …”
I believe you meant cloSure 🙂
Grüsse aus München, Alex
Yep – sorry and thx.
I am never sure, where I can use the term “closure”.
The correct Scala sentence would be:
There is a function
forEachFindNear
which takes another function f and a value (Document, Double, Double, Double) and applies function f to (Document, Double, Double, Double).I implemented a exact java version with lucene 2.9.3 and it’s not working any idea why here is code
import java.io.IOException;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
SpacialLuceneExample s = new SpacialLuceneExample();
Directory dir = null;
IndexWriter writer = null;
dir = new RAMDirectory();
try {
writer = new IndexWriter(dir, new StandardAnalyzer(),
MaxFieldLength.UNLIMITED);
s.fill(writer);
List findNear = s.findNear(dir, null, 38.9956000, -77.5258000, 1000.0);
for (SearchResult searchResult : findNear) {
System.out.println(searchResult.toString());
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.spatial.tier.DistanceFieldComparatorSource;
import org.apache.lucene.spatial.tier.DistanceQueryBuilder;
import org.apache.lucene.spatial.tier.projections.CartesianTierPlotter;
import org.apache.lucene.spatial.tier.projections.IProjector;
import org.apache.lucene.spatial.tier.projections.SinusoidalProjector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.NumericUtils;
public class SpacialLuceneExample {
private final String latField = “lat”;
private final String lngField = “lon”;
private final String tierPrefix = “_localTier”;
public SpacialLuceneExample() {
super();
}
public void fill(IndexWriter writer) {
add(“Jimmy’s Old Town Tavern”, 38.9690000, -77.862000, writer);
add(“Ned Devine’s”, 38.9510000, -77.4107000, writer);
add(“Old Brogue Irish Pub”, 38.9955000, -77.2884000, writer);
add(“Alf Laylah Wa Laylah”, 38.8956000, -77.4258000, writer);
add(“Sully’s Restaurant & Supper”, 38.9003000, -77.4467000, writer);
add(“TGIFriday”, 38.8725000, -77.3829000, writer);
add(“Potomac Swing Dance Club”, 38.9027000, -77.2639000, writer);
add(“White Tiger Restaurant”, 38.9027000, -77.2638000, writer);
add(“Jammin’ Java”, 38.9039000, -77.2622000, writer);
add(“Potomac Swing Dance Club”, 38.9027000, -77.2639000, writer);
add(“WiseAcres Comedy Club”, 38.9248000, -77.2344000, writer);
add(“Glen Echo Spanish Ballroom”, 38.9691000, -77.1400000, writer);
add(“Whitlow’s on Wilson”, 38.8889000, -77.0926000, writer);
add(“Iota Club and Cafe”, 38.8890000, -77.0923000, writer);
add(“Hilton Washington Embassy Row”, 38.9103000, -77.0451000, writer);
add(“HorseFeathers, Bar & Grill”, 39.01220000000001, -77.3942, writer);
add(“Marshall Island Airfield”, 7.06, 171.2, writer);
add(“Wonga Wongue Reserve, Gabon”, -0.546562, 9.459229, writer);
add(“Midway Island”, 25.7, -171.7, writer);
add(“North Pole Way”, 55.0, 4.0, writer);
}
private void add(String name, double lat, double lng, IndexWriter writer) {
Document doc = new Document();
doc.add(new Field(“name”, name, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field(“metafile”, “doc”, Field.Store.YES,
Field.Index.ANALYZED));
IProjector projector = new SinusoidalProjector();
int maxMiles = 40000;
int minMiles = 1;
CartesianTierPlotter ctp = new CartesianTierPlotter(0, projector,
tierPrefix);
int startTier = ctp.bestFit(maxMiles);
int endTier = ctp.bestFit(minMiles);
for (int tier = startTier; tier <= endTier; tier++) {
CartesianTierPlotter ctp2 = new CartesianTierPlotter(tier,
projector, tierPrefix);
double boxId = ctp2.getTierBoxId(lat, lng);
doc.add(new Field(ctp2.getTierFieldName(), NumericUtils
.doubleToPrefixCoded(boxId), Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
}
try {
writer.addDocument(doc);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public List findNear(Directory dir, String what,
double latitude, double longitude, double radius) {
List result = new ArrayList();
try {
IndexSearcher searcher = new IndexSearcher(dir);
DistanceQueryBuilder dq = new DistanceQueryBuilder(latitude,
longitude, radius, latField, lngField, tierPrefix, true);
Query tq = null;
if(what !=null)
tq = new TermQuery(new Term(“name”,what));
else
tq = new TermQuery(new Term(“metafile”, “doc”));
DistanceFieldComparatorSource dsort = new DistanceFieldComparatorSource(dq.getDistanceFilter());
Sort sort = new Sort(new SortField(“foo”,dsort,false));
TopDocs hits = searcher.search(tq, dq.getFilter(),10,sort);
int numResults = hits.totalHits;
Map distances = dq.getDistanceFilter().getDistances();
for (int i = 0; i < numResults; i++) {
int docID = hits.scoreDocs[i].doc;
Document d = searcher.doc(docID);
double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
double geo_distance = distances.get(docID);
SearchResult sr = new SearchResult(d, rsLat, rsLng, geo_distance);
result.add(sr);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
}
nevermind i find the problem, forgot to close writer before search