<?php
/**
* Created by PhpStorm.
* User: Administrator
* Date: 13/05/2015
* Time: 9:18 SA
*/
namespace App\Services;
use Carbon\Carbon;
use \Config;
use App\Post;
use Elasticsearch\Client;
use Elasticsearch\Common\Exceptions\Missing404Exception;
/**
* Class ESearchService
* @package App\Services
*/
class ElasticPostSearchService
{
/**
* @var
*/
private $elastic;
/**
* @var string
*/
private $indexName = 'cms-tintuc';
/**
* @var string
*/
private $indexType = 'post';
/**
* @var string
*/
private $primaryKey = 'id';
/**
* @var array
*/
private $matches = ['title', 'content', 'excerpt'];
/**
* @var array
*/
private $columns = ['id', 'title', 'content', 'excerpt', 'slug', 'scores'];
/**
* Get ElasticSearch client instance;
* @return \Elasticsearch\Client;
*/
private function getElastic()
{
if ($this->elastic == null) {
$this->elastic = new Client(Config::get('elasticsearch.connection'));
}
$params['index'] = $this->indexName;
if(!$this->elastic->indices()->exists($params)){
$this->initIndex($this->elastic);
}
return $this->elastic;
}
/**
* @param Client $elastic
*/
private function initIndex($elastic)
{
//Check Index is Exists;
$params['index'] = $this->indexName;
$indices = $elastic->indices();
//Create Setting
$settings = [
'analysis' => [
'filter' => [
'ascii_folding_keep_original' => [ // Bộ lọc khi bỏ dấu, có index cả phần có dấu
'type' => 'asciifolding',
'preserve_original' => true
],
'synonym' => [
'type' => 'synonym',
'synonyms_path' => 'analysis/synonym.txt'
]
],
'analyzer' => [
'default' => [
'tokenizer' => 'standard',
'filter' => ['standard', 'ascii_folding_keep_original', 'lowercase']
]
,'synonym' => [
"tokenizer" => "whitespace",
"filter" => ["synonym"]
]
]
]
];
$params["body"]['settings'] = $settings;
// Init some mapping for specialize properties;
$properties = [
'properties' => [
'title' => [
'type' => 'string',
'term_vector' => 'yes'
],
'content' => [
'type' => 'string',
'term_vector' => 'yes'
],
'slug' => [
'type' => 'string',
'index' => 'not_analyzed',
'term_vector' => 'yes'
]
]
];
$params['type'] = $this->indexType;
$params["body"]['mappings'] = $properties;
$indices->create($params);
$indices->refresh(['index' => $this->indexName]);
// End if not exists index
}
/**
* Delete the indexed Post in ElasticSearch
* @param $id
*/
private function deletePost($id = null)
{
$params['index'] = $this->indexName;
$params['type'] = $this->indexType;
if ($id) {
$params['id'] = $id;
$this->getElastic()->delete($params);
} else {
$params['body']['query']['bool']['must']['match_all'] = new \stdClass();
$this->getElastic()->deleteByQuery($params);
}
}
/**
* @param string|\DateTime $date
*/
private function setLastUpdateTimeMeta($date){
if(get_class($date) == 'DateTime'){
$date = $date->format('Y-m-d h:i:s');
}
$this->setIndexingMeta(['updated_at' => $date]);
}
/**
* @return static
*/
private function getLastUpdateTimeMeta(){
if($meta = $this->getIndexingMeta()){
if($lastUpdate = $meta['updated_at']){
return $lastUpdate;
}else{
return Carbon::create(1970,1,1);
}
}
}
/**
* @param array $indexMeta
*/
private function setIndexingMeta($indexMeta = array())
{
$routing = sprintf('%s_%s_meta', $this->indexName, $this->indexType);
$metaType = sprintf('%s_meta', $this->indexType);
$this->getElastic()->update([
'index' => $this->indexName,
'type' => $metaType,
'id' => $routing,
'body' => [
'doc' => $indexMeta,
'doc_as_upsert' => 'true',
]
]);
}
/**
* @return array|null
*/
private function getIndexingMeta()
{
$routing = sprintf('%s_%s_meta', $this->indexName, $this->indexType);
$metaType = sprintf('%s_meta', $this->indexType);
try {
return $this->getElastic()->getSource([
'index' => $this->indexName,
'type' => $metaType,
'id' => $routing
]);
} catch (Missing404Exception $ex) {
return null;
}
}
/**
* @param $posts
* @return array
*/
private function getBulkUpdatePostRequestBody($posts)
{
$body = [];
$idColumn = $this->primaryKey;
$columns = $this->columns;
foreach ($posts as $post) {
$body[] = [
'update' => [
'_id' => $post[$idColumn]
]
];
$docs = [];
foreach ($columns as $column) {
if (gettype($post[$column]) == 'string') {
$docs[$column] = html_entity_decode(strip_tags($post[$column]));
} else {
$docs[$column] = $post[$column];
}
}
$body[]
= array(
'doc_as_upsert' => 'true', //Create if not exits
'doc' => $docs
);
}
return $body;
}
/**
* Migrate dữ liệu từ mySQL sang ES
* If postId migrate this post, else migrate all updated
* @param int $postId
*/
public function migrate($postId = -1)
{
$params = array();
$params['index'] = $this->indexName;
$params['type'] = $this->indexType;
$columns = $this->columns;
//Indexing one Post;
if ($postId > 0) {
//Get Model data from MySql Database with columns;
$post = Post::find($postId, $columns);
//If
if (is_null($post)) {
// Delete Post from Elastic Document;
$this->deletePost($postId);
} else {
// Update Post to Elastic Document Store;
$params['body'] = $this->postToBulkUpdateDocuments($post);
$this->getElastic()->bulk($params);
}
} // Indexing All Posts
else {
$builder = Post::orderBy('updated_at', 'desc');
//Lấy thời gian lần cập nhật cuối dữ liệu từ mySQL sang Elastic;
$lastUpdateTime = $this->getLastUpdateTimeMeta();
//Nếu có ngày cuối cập nhật, thì chỉ lấy những bản ghi được cập nhật mới;
if ($lastUpdateTime) {
$builder = $builder->where('updated_at', '>', $lastUpdateTime);
}
//Lấy thời gian cập nhật mới nhất tại thời điểm bắt đầu update dữ liệu;
if ($newest = $builder->first()) {
$thisupdateTime = $newest->updated_at;
} else {
//Trường hợp không có bản ghi dữ liệu nào thì lấy ngày hiên tại; SQL time, not system time
$thisupdateTime = \DB::selectOne('select now() as now')->now;
}
$builder = $builder->where('updated_at', '<=', $thisupdateTime);
$pageSize = 500; //Bulk update 1000 record/time;
$records = $builder->count();
$totalPage = ceil($records / $pageSize);
for ($page = 0; $page <= $totalPage; $page++) {
$builder = $builder->skip($page * $pageSize)->take($pageSize)->select($columns);
$posts = $builder->get()->toArray();
$params['body'] = $this->getBulkUpdatePostRequestBody($posts);
if ($params['body']){
$this->getElastic()->bulk($params);
}
};
$this->setLastUpdateTimeMeta($thisupdateTime);
}
}
/**
* @param string $query
* @param array $fields
* @return array
*/
public function search($query='',$fields = array()){
$searchFields = array();
if(count($fields) > 0){
$searchFields = array_intersect($this->matches,$fields);
}else{
$searchFields = $this->matches;
}
$params['index'] = $this->indexName;
$params['type'] = $this->indexType;
$queries = [];
foreach ($searchFields as $field) {
$queries['match'] = [$field => $query];
}
// dd($searchFields);
$params['body']['query']['bool']['must'] = $queries;
$hilifields = [];
foreach ($searchFields as $field) {
$hilifields[$field] = [ "force_source" => true ];
}
$params['body']['highlight']['fields'] = $hilifields;
return $this->getElastic()->search($params);
}
}