这是一篇长文......

前面写了好几篇elasticsearch的文章,这跟linux系统管理看起来毫不沾边啊。理由很朴素,想给自己的Ghost blog加个搜索引擎。试了几个都不如意,于是干脆搭建一个elasticsearch自用。

鉴于生产环境的主程序和elasticsearch通常是分开的。不可能直接主程序调用elasticsearch的9200进行查询,通常是会在中间加一层proxy代理,通过api查询。所以有了之前的flask代理api。

现在我们已经从 免费的elasticsearch 有了自己的服务器,我们怎么从Ghost blog来建立Elasticsearch的搜索引擎呢?

第一步:添加个博客的url,列出所有文章:

首先添加ghost的routes,使得能看到所有文章

# vi core/server/routes/frontend.js
...
    // redirect to /ghost and let that do the authentication to prevent redirects to /ghost//admin etc.
    router.get(/^\/((ghost-admin|admin|wp-admin|dashboard|signin|login)\/?)$/, function redirectToAdmin(req, res) {
        utils.redirect301(res, subdir + '/ghost/');
    });

    // 增加下面这行
    router.get('/all_posts/', frontend.all_posts);

然后编辑controllers,在preview前增加all_posts函数

# vi core/server/controllers/frontend/index.js
...
frontendControllers = {  
   // 增加
   all_posts: function all_posts(req, res, next) {
    api.posts.browse({
      include: 'title,markdown,slug,tags',
      page: 1,
      limit: 1000
    }).then(function (posts) {
      if (!posts || posts.length == 0) {
        res.end("[]");
      } else {
        var output = [];
        posts.posts.forEach(function (p) {
                console.log(p.status)
                output.push({
                    title: p.title,
                    content: p.markdown,
                    slug: p.slug,
                    tags: p.tags.map(function (tag) { return tag.name; }),
                    updated_at: new Date(p.updated_at)
                });
            });
        res.end(JSON.stringify(output));
      }
     }).catch(handleError(next));
    },

    //下面是原来的preview函数
    preview: function preview(req, res, next) {

ok,重启。这样我们打开 http://xxx.xxx.xxx.xxx/all_posts/ 就会看到自己博客的所有文章的json格式。

第二步:我们需要写个程序导出数据到bonsai.io的elasticsearch去

首先登陆bonsai.io,打开Interactive Console: 发个请求,建立新索引:

POST /posts  

显示ture就ok了

然后打开Manage,记下来那个https打头的地址:

然后回到Ghost服务器上随便建个目录,当然不能放到Ghost的主程序下 ,例如/home/ex/ 生成一个run.js

# cat /home/ex/run.js
var httpRequest = require('request'),  
  getMetaForPosts = function (posts) {
    var requestString = '',
      meta;
    posts.forEach(function (post) {
      meta = {
        create: {
          _index: "posts",
          _type: "post",
          _id: <del>(Math.random() * 1000)
        }
      };
      requestString += JSON.stringify(meta) + '\
      ' + JSON.stringify(post) + '\
';  
    });
    return requestString;
  }, validate = function (err, response, body) {
    if (err) throw err;
    if (response.statusCode != 200) throw body;
  };

// callback hell!!
httpRequest({  
  uri: 'https://xxxxx.bonsai.io'
  }, function (err, response, body) {
  validate(err, response, body);
  console.log("Elastic Search running, deleting posts index...");
  httpRequest.del({
    uri: 'https://xxxxx.bonsai.io/posts/'
  }, function (err, response, body) {
    validate(err, response, body)
    console.log("Deleted index successfully, recreating 'posts' index...");
    httpRequest.put({
      uri: 'https://xxxxx.bonsai.io/posts/'
    }, function (err, response, body) {
      validate(err, response, body);
      console.log("Fetching posts...");
      httpRequest({
        uri: 'http://127.0.0.1:2368/all_posts/'
      }, function (err, response, body) {
        validate(err, response, body);
        var posts = JSON.parse(body);
        if (!posts) throw new Error("Could not fetch posts!");
        console.log("Fetched " + posts.length + " posts, Bulk Indexing posts...");
        httpRequest.post({
          uri: 'https://xxxxx.bonsai.io/posts/post/_bulk',
          body: getMetaForPosts(posts)
        }, function (err, response, body) {
          validate(err, response, body);
          console.log("Done indexing")
        });
      });
    });
  });
});

注意上面四个地方的https地址,换成你自己的哦。

上面的程序用到了request库,所以我们需要装一下:

# cd /home/ex
# npm install request

然后运行这个js,把数据导入bonsai.io:

# cd /home/ex
# node run.js
Elastic Search running, deleting posts index...  
Deleted index successfully, recreating 'posts' index...  
Fetching posts...  
Fetched 115 posts, Bulk Indexing posts...  
Done indexing  

ok,建立成功,我们去bonsai的控制台发个请求看看:

GET /posts/_search  
{
  "fields": ["slug", "title", "tags", "updated_at"],
  "query": {
    "wildcard": {
      "_all": {
        "wildcard": "vpn*"
      }
    }
  },
  "highlight": {
    "fields": {
      "title": {},
      "tags": {},
      "content": {}
    }
  },
  "suggest": {
    "suggestions": {
      "text": "query_text",
      "term": {
        "field": "_all",
        "suggest_mode": "always"
      }
    }
  }
}

显示有数据就对了:

第三步:直接改造ghost,让它支持elasticsearch

本来是可以中间加个api层的,但是vps太弱,再加东西恐怕起不来,所以直接来,如果是正式大公司的生产环境,中间层是必须的。

首先是添加search路由,在all_posts之下再加个search路由:

# vi core/server/routes/frontend.js
...
    router.get('/all_posts/', frontend.all_posts);
    router.get('/search/', frontend.search_results);
...

然后编辑controllers,在allposts前增加searchresults函数

# vi core/server/controllers/frontend/index.js
...
search_results: function search_results(req, res, next) {

  // Build up the search request
  var request_data = {
    "fields": ["slug", "title", "tags", "updated_at"],
    "query": {
      "wildcard": {
        "_all": {
          "wildcard": req.query.q + "*"
        }
      }
    },
    "highlight": {
      "fields": {
        "title": {},
        "tags": {},
        "content": {}
      }
    },
    "suggest": {
      "suggestions": {
        "text": req.query.q,
        "term": {
          "field": "_all",
          "suggest_mode": "always"
        }
      }
    }
  };

  // create the elastic search request
  request_data = JSON.stringify(request_data);
  var esRequest = require('https').request({
    host: 'xxxxxx.bonsai.io',
    path: '/posts/_search',
    port: 443,
    auth: 'xxxxxx:xxxxxx',
    method: "POST",
    headers: {
      'Content-Type': 'application/json',
      'Content-Length': request_data.length
    }
  }, function (esRes) {
    var result = '';
    esRes.on('data', function (chunk) {
      result += chunk;
    });
    esRes.on('end', function () {
      var response = JSON.parse(result);

      // render the results
      res.render('results', {
        results: response,
        resultsJSON: JSON.stringify(response, null, 2),
        query: req.query.q
      });
    });
  });

  // search!
  esRequest.write(request_data);
  esRequest.end();
},
...

注意上面填写bonsai.io的request方式,其实bonsai的url是分成了好几部分,比如 [https://aaa:bbb@ccc.bonsai.io],那么auth就是aaa:bbb,port就是默认的443,要注意。

大家看到res.render('results',是渲染到了results的模板去,所以我们再在Ghost的主题目录下,建立一个results.hbs模板:

{{!< default}}

{{> "header"}}

<main id="content" class="content" role="main">

<div id="article" class="box">  
  <div class="category-all-page">
    <div class="category-all-title">
        搜索结果:{{query}}
    </div>
    <ul class="category-list">    
        <li class="category-item">
          <h3 class="category-name" id="category1"><i class="fa fa-coffee"></i>{{query}}</h3>
          <ul class="post-list">
             {{#foreach results.hits.hits}}
                     <li><a href="/{{fields.slug}}/">{{fields.title}}</a></li>

         {{/foreach}}
          </ul>
        </li>
    </ul>
  </div>
</div>

</main>  

然后重启ghost, 发个链接

http://xxx.xxx.xxx.xxx/search  

网页显示如下就ok了:

最后再在模板上加上搜索框就可以了。但是啊,这个还有问题,没有中文分词,哈哈。

增加与修改:

真的给自己的博客加上了elasticsearch,中间又遇到了麻烦。

用的是Elasticsearch 2.3.3 版本,这个跟bansai.io是不一样的。

所以从博客导入elasticsearch出了麻烦:

以上的程序实际是用POST bulk提交了数据:

POST /posts/post/_bulk  
{"create":{"_index":"posts","_type":"post","_id":634}}      {"title":"","content":"","slug"......}

以上的语法往elasticsearch 2.3.3提交就会报错,正确的如下:

/posts/post/_bulk
{"index":{"_index":"posts","_type":"post","_id":634}}      {"title":"","content":"","slug"......}

所以run.js需要做如下改动:

...
var httpRequest = require('request'),  
  getMetaForPosts = function (posts) {
    var requestString = '',
      meta;
    posts.forEach(function (post) {
      meta = {
        index: {
          _index: "posts",
          _type: "post",
          _id: </del>(Math.random() * 1000)
        }
      };
      requestString += JSON.stringify(meta) + '\
      '+'\n' + JSON.stringify(post) + '\
'+'\n';  
    });
    return requestString;
  }, validate = function (err, response, body) {
    if (err) throw err;
    if (response.statusCode != 200) throw body;
  };
...

改动了三处,一是meta里的create改成了index,二是JSON.stringify(meta)后加了回车,三是JSON.stringify(post)后加了回车。

这才是bulk在2.3.3版本中的正确用法。

`

comments powered by Disqus