|
MongoDB provides an interesting "multikey" feature that can automatically index arrays of an object's values. A good example is tagging. Suppose you have an article tagged with some category names: $ dbshell
> db.articles.save( { name: "Warm Weather", author: "Steve",
tags: ['weather', 'hot', 'record', 'april'] } )
> db.articles.find()
{"name" : "Warm Weather" , "author" : "Steve" ,
"tags" : ["weather","hot","record","april"] ,
"_id" : "497ce4051ca9ca6d3efca323"}
We can easily perform a query looking for a particular value in the tags array:
> db.articles.find( { tags: 'april' } )
{"name" : "Warm Weather" , "author" : "Steve" ,
"tags" : ["weather","hot","record","april"] ,
"_id" : "497ce4051ca9ca6d3efca323"}
Further, we can index on the tags array. Creating an index on an array element indexes results in the database indexing each element of the array:
> db.articles.ensureIndex( { tags : 1 } )
true
> db.articles.find( { tags: 'april' } )
{"name" : "Warm Weather" , "author" : "Steve" ,
"tags" : ["weather","hot","record","april"] ,
"_id" : "497ce4051ca9ca6d3efca323"}
> db.articles.find( { tags: 'april' } ).explain()
{"cursor" : "BtreeCursor tags_1" , "startKey" : {"tags" : "april"} ,
"endKey" : {"tags" : "april"} , "nscanned" : 1 , "n" : 1 , "millis" : 0 }
Incrementally adding and removing keysYou can use $addToSet to add a new key to the array, and $pull to remove one.
> db.articles.update({name: "Warm Weather"},{$addToSet:{tags:"northeast"}});
> db.articles.find();
...
> db.articles.update({name: "Warm Weather"},{$pull:{tags:"northeast"}});
Embedded object fields in an arrayThe same technique can be used to find fields within objects embedded in arrays: > // find posts where julie commented > db.posts.find( { "comments.author" : "julie" } ) {"title" : "How the west was won", "comments" : [{"text" : "great!" , "author" : "sam"}, {"text" : "ok" , "author" : "julie"}], "_id" : "497ce79f1ca9ca6d3efca325"} Querying on all values in a given setBy using the $all query option, a set of values may be supplied each of which must be present in a matching object field. For example:
> db.articles.find( { tags: { $all: [ 'april', 'record' ] } } )
{"name" : "Warm Weather" , "author" : "Steve" ,
"tags" : ["weather","hot","record","april"] ,
"_id" : "497ce4051ca9ca6d3efca323"}
> db.articles.find( { tags: { $all: [ 'april', 'june' ] } } )
> // no matches
Exact Array Matching with an IndexSince indexing an array, which creates the multikey index, only indexes each element in the array it is not possible to do an exact array match using an index on the array field. The index will be used to lookup a subset of the values (currently the first one) and then the document will be inspected for the exact match. This is also true for range queries, but the rules for types are bit more complicated since it follows the matching rules for single values.
> db.emails.find()
{ "_id" : ObjectId("4ef8ee415ca7c3e2a4a639a8"), "email_id" : [ 109, 97, 105, 108 ] }
{ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a9"), "email_id" : [ 109, 101, 64, 98 ] }
> db.emails.find({"email_id" : [ 109, 97, 105, 108 ]})
{ "_id" : ObjectId("4ef8ee415ca7c3e2a4a639a8"), "email_id" : [ 109, 97, 105, 108 ] }
> db.emails.find({"email_id" : [ 109, 97, 105, 108 ]}).explain()
{
"cursor" : "BtreeCursor email_id_1 multi",
"nscanned" : 2,
"nscannedObjects" : 2,
"n" : 1,
"millis" : 0,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : true,
"indexOnly" : false,
"indexBounds" : {
"email_id" : [
[
109,
109
],
[
[
109,
97,
105,
108
],
[
109,
97,
105,
108
]
]
]
}
}
//Note the nscanned, nscannedObjects and n values.
//Notice the array or arrays and the behavior below
> db.emails.insert({ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a7"), "email_id" : [[ 109, 101, 64, 98 ]] })
> db.emails.find()
{ "_id" : ObjectId("4ef8ee415ca7c3e2a4a639a8"), "email_id" : [ 109, 97, 105, 108 ] }
{ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a9"), "email_id" : [ 109, 101, 64, 98 ] }
{ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a7"), "email_id" : [ [ 109, 101, 64, 98 ] ] }
> db.emails.find({"email_id" : [ 109, 101, 64, 98 ]})
{ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a9"), "email_id" : [ 109, 101, 64, 98 ] }
{ "_id" : ObjectId("4ef8ee435ca7c3e2a4a639a7"), "email_id" : [ [ 109, 101, 64, 98 ] ] }
Caveats with Parallel ArraysWhen using a compound index, at most one of indexed values in any document can be an array. So if we have an index on {a: 1, b: 1}, the following documents are both fine:
{a: [1, 2], b: 1}
{a: 1, b: [1, 2]}
This document, however, will fail to be inserted, with an error message "cannot index parallel arrays":
{a: [1, 2], b: [1, 2]}
The problem with indexing parallel arrays is that each value in the cartesian product of the compound keys would have to be indexed, which can get out of hand very quickly. See Also
|

PLEASE POST QUESTIONS IN THE USER GROUPS FORUM. Post non-question comments and helpful hints here.
blog comments powered by Disqus