首先是安装

yum install -y mongodb-server mongodb

Installing for dependencies:  
 boost-filesystem                                      
 boost-iostreams                                       
 boost-program-options                                 
 boost-system                                          
 boost-thread                                          
 gperftools-libs                                       
 libicu                                                
 libunwind                                             
 v8

会装一堆依赖包,boost库,icu库,v8库,gperftools库,都是很厉害的库啊!

启动:

service mongod start  

导入海量数据:

[root@ovs-16-11-2 ~]# mongoimport -d mydb -c prj01 --type csv --file opendata_projects.csv --headerline
connected to: 127.0.0.1  
Thu Jul 28 09:50:47.002         Progress: 39118658/470754183    8%  
Thu Jul 28 09:50:47.002             74400   24800/second  
Thu Jul 28 09:50:50.033         Progress: 80042213/470754183    17%  
Thu Jul 28 09:50:50.033             150700  25116/second  
Thu Jul 28 09:50:53.145         Progress: 108143323/470754183   22%  
Thu Jul 28 09:50:53.145             202700  22522/second  
Thu Jul 28 09:50:56.004         Progress: 149781879/470754183   31%  
Thu Jul 28 09:50:56.004             280000  23333/second  
Thu Jul 28 09:50:59.001         Progress: 179705162/470754183   38%  
Thu Jul 28 09:50:59.001             336200  22413/second  
Thu Jul 28 09:51:03.385         Progress: 212197023/470754183   45%  
Thu Jul 28 09:51:03.385             396400  20863/second  
Thu Jul 28 09:51:06.015         Progress: 236552399/470754183   50%  
Thu Jul 28 09:51:06.015             441700  20077/second  
Thu Jul 28 09:51:09.299         Progress: 264365847/470754183   56%  
Thu Jul 28 09:51:09.299             493300  19732/second  
Thu Jul 28 09:51:12.001         Progress: 304790148/470754183   64%  
Thu Jul 28 09:51:12.001             568500  20303/second  
Thu Jul 28 09:51:15.033         Progress: 323508057/470754183   68%  
Thu Jul 28 09:51:15.033             603300  19461/second  
Thu Jul 28 09:51:18.607         Progress: 361610334/470754183   76%  
Thu Jul 28 09:51:18.607             674200  19829/second  
Thu Jul 28 09:51:21.000         Progress: 393748962/470754183   83%  
Thu Jul 28 09:51:21.000             733700  19829/second  
Thu Jul 28 09:51:24.007         Progress: 427667505/470754183   90%  
Thu Jul 28 09:51:24.007             796900  19922/second  
Thu Jul 28 09:51:27.001         Progress: 459658299/470754183   97%  
Thu Jul 28 09:51:27.001             857300  19937/second  
Thu Jul 28 09:51:27.793 check 9 878853  
Thu Jul 28 09:51:27.979 imported 878852 objects  

进入命令行,看看库的整体情况:

mongo  
use mydb  
show collections  
db.prj01.findOne()  

完整结果如下:

mongo  
MongoDB shell version: 2.4.14  
connecting to: test  
> use mydb
switched to db mydb  
> show collections
prj01  
system.indexes  
> db.prj01.findOne()
{
    "_id" : ObjectId("579964f41d36d69d1752f82b"),
    "_projectid" : "7342bd01a2a7725ce033a179d22e382d",
    "_teacher_acctid" : "5c43ef5eac0f5857c266baa1ccfa3d3f",
    "_schoolid" : "9e72d6f2f1e9367b578b6479aa5852b7",
    "school_ncesid" : NumberLong("360009702803"),
    "school_latitude" : 40.688454,
    "school_longitude" : -73.910432,
    "school_city" : "Brooklyn",
    "school_state" : "NY",
    "school_zip" : 11207,
    "school_metro" : "urban",
    "school_district" : "New York City Dept Of Ed",
    "school_county" : "Kings (Brooklyn)",
    "school_charter" : "f",
    "school_magnet" : "t",
    "school_year_round" : "f",
    "school_nlns" : "f",
    "school_kipp" : "f",
    "school_charter_ready_promise" : "f",
    "teacher_prefix" : "Mr.",
    "teacher_teach_for_america" : "f",
    "teacher_ny_teaching_fellow" : "f",
    "primary_focus_subject" : "Other",
    "primary_focus_area" : "Applied Learning",
    "secondary_focus_subject" : "",
    "secondary_focus_area" : "",
    "resource_type" : "Supplies",
    "poverty_level" : "highest poverty",
    "grade_level" : "Grades 6-8",
    "vendor_shipping_charges" : "",
    "sales_tax" : "",
    "payment_processing_charges" : "",
    "fulfillment_labor_materials" : "",
    "total_price_excluding_optional_support" : 229,
    "total_price_including_optional_support" : 279.27,
    "students_reached" : 0,
    "total_donations" : 251,
    "num_donors" : 1,
    "eligible_double_your_impact_match" : "f",
    "eligible_almost_home_match" : "f",
    "funding_status" : "completed",
    "date_posted" : "2002-09-13 00:00:00",
    "date_completed" : "2002-09-23 00:00:00",
    "date_thank_you_packet_mailed" : "2003-01-27 00:00:00",
    "date_expiration" : "2003-12-31 00:00:00"
}
>

太多字段了,如果我们就想要其中的6个字段:

> db.prj01.findOne({}, {school_state:1, resource_type:1, poverty_level:1, date_posted:1, total_donations:1, funding_status:1, _id:0})
{
    "school_state" : "NY",
    "resource_type" : "Supplies",
    "poverty_level" : "highest poverty",
    "total_donations" : 251,
    "funding_status" : "completed",
    "date_posted" : "2002-09-13 00:00:00"
}

装个pymongo

python -m pip install pymongo  

测一下,进入python命令行:

python  
...
from pymongo import MongoClient  
MONGODB_HOST = 'localhost'  
MONGODB_PORT = 27017  
DBS_NAME = 'mydb'  
COLLECTION_NAME = 'prj01'  
FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}  
connection = MongoClient(MONGODB_HOST, MONGODB_PORT)  
collection = connection[DBS_NAME][COLLECTION_NAME]  
projects = collection.find(projection=FIELDS)  
for project in projects:  
    print project
...
{u'school_state': u'MO', u'date_posted': u'2015-08-18 00:00:00', u'poverty_level': u'highest poverty', u'resource_type': u'Books', u'total_donations': 0}
...

数据会疯狂显示一阵子,其实这就是一个完整的python访问mongodb的程序了。

下面我们来完成flask的部分

装个flask

python -m pip install flask  

建立个文件夹flask01,建立目录templates

flask01  
├── run.py
└── templates
    └── index.html

准备一个首页文件index.html放到目录templates下

cat index.html  
<h1>Hello World</h1>  

准备主程序run.py

from flask import Flask  
from flask import render_template

app = Flask(__name__)

@app.route("/")
def index():  
    return render_template("index.html")

if __name__ == "__main__":  
    app.run(host='0.0.0.0',port=5000,debug=True)

分配个url来存取mongodb

from flask import Flask  
from flask import render_template  
from pymongo import MongoClient  
import json  
from bson import json_util  
from bson.json_util import dumps

app = Flask(__name__)

MONGODB_HOST = 'localhost'  
MONGODB_PORT = 27017  
DBS_NAME = 'mydb'  
COLLECTION_NAME = 'prj01'  
FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}

@app.route("/")
def index():  
    return render_template("index.html")

@app.route("/mydb/prj01")
def mydb_prj01():  
    connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
    collection = connection[DBS_NAME][COLLECTION_NAME]
    projects = collection.find(projection=FIELDS)
    json_projects = []
    for project in projects:
        json_projects.append(project)
    json_projects = json.dumps(json_projects, default=json_util.default)
    connection.close()
    return json_projects

if __name__ == "__main__":  
    app.run(host='0.0.0.0',port=5000,debug=True)

运行一下,在浏览器打开这个url:

python run.py  
 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
 * Restarting with stat
 * Debugger is active!
 * Debugger pin code: 373-144-494
172.16.8.1 - - [28/Jul/2016 10:13:27] "GET /mydb/prj01 HTTP/1.1" 200 -  

结果很明显是个json

[{"school_state": "NY", "date_posted": "2002-09-13 00:00:00", "poverty_level": "highest poverty", "resource_type": "Supplies", "total_donations": 251},
 {"school_state": "NY", "date_posted": "2002-09-16 00:00:00", "poverty_level": "moderate poverty", "resource_type": "Supplies", "total_donations": 125}, 
...
]
comments powered by Disqus