daniel-s
6/30/2015 - 1:16 PM

Python driver for MongoDB: PyMongo. http://api.mongodb.org/python/current/

Python driver for MongoDB: PyMongo. http://api.mongodb.org/python/current/

# First option
# If there is no document with that _id or the object we're passing does not have an _id field, then MongoDB will create a new document for us.
city = db.cities.find_one({'name': 'Munchen', 'country': 'German'})
city['isoCountryCode'] = 'DEU'
db.cities.save(city)

# Second option
# $set -> update or add field (if it doesn't exist)
city = db.cities.update({'name': 'Munchen', 'country': 'Germany'}, {'$set': {'isoCountryCode': 'DEU'}})
# $unset -> remove field
city = db.cities.update({'name': 'Munchen', 'country': 'Germany'}, {'$unset': {'isoCountryCode': ''}})
# multi update
city = db.cities.update({'name': 'Munchen', 'country': 'Germany'}, {'$set': {'isoCountryCode': 'DEU'}}, multi=True)
db.cities.remove(query) # same functionaly as find()
db.cities.remove() # will remove one by one all documents
db.cities.remove({'name': 'Chicago'})
db.cities.remove({'name': {'$exists': 0}}) # Remove all document that doesn't have a field 'name'

# Removes the entire collection and any metadata associates (such as indexes)
db.cities.drop() 
query = {'manufacturer': 'Toyota', 'class': 'mid-size car'}
collection.find(query)
projection = {'_id': 0, 'name': 1} # fields we want to retrieve from the documents, _id is retrieved by default
collection.find(query, projection)

# range queries: $gt $lt $gte $lte $ne
query = {'population': {'$gt': 250000, '$lte': 500000}}
query = {'foundingDate': {'$gt': datetime(1837, 1, 1), '$lte': datetime(1837, 12, 31)}}

# documents that HAVE or DON'T HAVE a specific field (or fields)
query = {'governmentType': {'$exists': 1}}

# regular expressions
query = {'motto': {'$regex': '[Ff]riendship|[Hh]appiness'}}

# any of the values in the array
query = {'modelYears': {'$in': [1965, 1966, 1967]}}

# all the valuees in the array
query = {'modelYears': {'$all': [1965, 1966, 1967]}}

# dot notation
query = {'dimensions.weight': {'$gt': 5000}}
pipeline = [
  {'$group': {'_id': '$source', 'count': {'$sum': 1}}},
  {'$sort': {'count': -1}}]
  
collection.aggregate(pipeline)

pipeline = [
  {'$match': {
    'user.time_zone': 'Brasilia', 
    'user.statuses_count' : {'$gte': 100}}},
  {'$project': {
    'followers': '$user.followers_count',
    'screen_name': '$user.screen_name',
    'tweets': '$user.statuses_count'}},
  {'$sort': {'followers': -1}},
  {'$limit': 1}]

pipeline = [
  {'$match': {'country': 'India'}},
  {'$unwind': '$isPartOf'},
  {'$group': {'_id': '$isPartOf', 'count': {'$sum': 1} }},
  {'$sort': {'count': -1}},
  {'$limit': 1}]
  
pipeline = [
  {'$group': {
    '_id': '$user.screen_name', 
    'count': {'$sum': 1}, 
    'tweet_texts': {'$push': '$text'}}},
  {'$sort': {'count': -1}},
  {'$limit': 5}]
  
pipeline = [
  {'$match': {'country': 'India'}},
  {'$unwind': '$isPartOf'},
  {'$group': {'_id': '$isPartOf', 'avg': {'$avg': '$population'}}},
  {'$group': {'_id': 'India Regional City Population Average', 'avg': {'$avg': '$avg'}}}]
"""
MongoDB database installed on ~/Development/MongoDB/
To run MongoDB just type:
mongod --dbpath ~/Development/MongoDB/db/

cd <mongodb installation dir>
./bin/mongod --dbpath ~/Development/MongoDB/db/
"""

from pymongo import MongoClient

client = MongoClient('mongodb://localhost:27017')

db = client.my_database
collection = db.my_collection

document = {} # with pymongo, the document to insert in a collection has to be a dictionary (or a list of dictionaries)
# this is valid for pymongo 2.8, for pymongo 3.0 has to be insert_one(...) or insert_many(...)
# It returns the '_id' value (or list of ‘_id’ values)
collection.insert(document) 
n = 4
documents = [{} for i in xrange(n)] # for inserting several documents, it must have a valid JSON-like structure, that is, dict inside a list
collection.insert(documents) # again, this is valid for pymongo 2.8