Apache Cassandra: Iterate over all columns in a row
Recently I have been using Cassandra for one of my projects, and one of the needs is to iterate over all columns of a row. Each column represents an individual data, of type identified by row id, and keeps changing. So I can’t simply use a set of known column names. Using the setRange
call on a SliceQuery
and setting a large count
is also not an option, since Cassandra will try to load the entire set of columns into memory. Instead I’ve written this iterator which takes a query on which row key and column family has been set, and will load columns as they are requested. By default it loads a 100 columns at a time. You could make it take the count as a parameter and all, but this works for me for now.
// Made by Nikhil Marathe <nsm.nikhil@gmail.com> | |
// This code is in the public domain. | |
import java.util.Iterator; | |
import java.util.List; | |
import me.prettyprint.hector.api.beans.ColumnSlice; | |
import me.prettyprint.hector.api.beans.HColumn; | |
import me.prettyprint.hector.api.exceptions.HectorException; | |
import me.prettyprint.hector.api.query.SliceQuery; | |
public class AllColumnsIterator<N, V> implements Iterator<HColumn<N, V>> { | |
private N start; | |
private int count; | |
Iterator<HColumn<N, V>> columnsIterator; | |
SliceQuery<?, N, V> query; | |
private boolean isLastIteration; | |
public AllColumnsIterator(SliceQuery<?, N, V> query) { | |
start = null; | |
count = 100; | |
columnsIterator = null; | |
this.query = query; | |
isLastIteration = false; | |
} | |
public Iterator<HColumn<N, V>> iterator() { | |
return this; | |
} | |
public boolean hasNext() { | |
if (columnsIterator == null || !columnsIterator.hasNext()) { | |
if (isLastIteration) | |
return false; | |
if (!fetchMore()) | |
return false; | |
} | |
return true; | |
} | |
public HColumn<N, V> next() { | |
return columnsIterator.next(); | |
} | |
private boolean fetchMore() { | |
try { | |
query.setRange(start, null, false, count); | |
ColumnSlice<N, V> slice = query.execute().get(); | |
List<HColumn<N, V>> columns = slice.getColumns(); | |
int origSize = columns.size(); | |
if (origSize == 0) { | |
return false; | |
} | |
if (origSize >= count) | |
start = columns.remove(columns.size()-1).getName(); | |
columnsIterator = columns.iterator(); | |
if (origSize < count) | |
isLastIteration = true; | |
return true; | |
} catch (HectorException e) { | |
return false; | |
} | |
} | |
public void remove() { | |
throw new UnsupportedOperationException(); | |
} | |
} |
The one ‘problem’ with this is the removal of the last column to ensure that there are no duplicates, but still having a start point for the next query. This is because each column is independent, so you cannot ask a column who it’s next neighbour is and start the next query from there. If anybody has a tip to make it more elegant, I’d love to hear it.