首页 > Web开发 > 详细

phpcms全文检索功能实现(集成sphinx)

时间:2020-04-08 15:02:41      阅读:129      评论:0      收藏:0      [点我收藏+]

sphinx配置

sphinx是俄罗斯人开发的一个搜索引擎,基于c++编写,具有强大的检索能力,本身支持中文单个字符的检索,中文分词需要额外的插件Coreseek,但该插件已很久未更新,github用户eric1688基于sphinx 2.2.9版本改写了可以较好支持中文分词的sphinx for chinese,本文配置均基于此版本,下载地址在此,向作者致敬。 另:如果是别的系统,建议使用elastic search开发,对中文分词的支持更完善一些,社区更为活跃,但需求不复杂的话,sphinx已足够,而且配置相比更方便些。

安装sphinx

本文基于ubuntu 18.04安装sphinx,从上述地址下载压缩包,以下示例大部分基于作者的说明

下载解压

$ git clone https://github.com/eric1688/sphinx 
$ cd sphinx

编译(假设安装到/usr/local/sphinx目录,下同)

安装gcc 和 mysql

sudo apt update
sudo apt install build-essential
sudo apt install mysql-server

编译安装sphinx

$ ./configure --prefix=/usr/local/sphinx --prefix 指定安装路径 --with-mysql 编译mysql支持 --with-pgsql 编译pgsql支持 
$ make 
$ make install

安装完成后,sphinx目录下应有bin、etc、share三个子目录,新建data和log两个子目录

配置中文支持

$ tar -xvf xdict_1.1.tar.gz 
$ /usr/local/sphinx/bin/mkdict xdict_1.1.txt xdict

从xdict_1.1.txt生成xdict文件,xdict_1.1.txt文件可以根据需要进行修改

$ cp xdict /usr/local/sphinx/etc/

配置sphinx.conf

sphinx.conf是最关键的一部分,sphinx的运作主要基于该文件。以下是本人的配置文件,结合phpcms数据库进行配置,该文件存放于bin子目录中

# 主索引数据源
source main
{
	type = mysql

	sql_host = 192.168.91.1 #phpcms所使用数据库地址
	sql_user = sphinx #用户名
	sql_pass = sphinx #密码
	sql_db = phpcms_v9 #phpcms数据库名字
	sql_port = 3306 #数据库端口
	sql_query_pre = SET NAMES utf8 
	sql_query_pre = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
	sql_query = SELECT searchid, adddate, siteid, typeid, id as news_id, data FROM v9_search WHERE searchid>=$start AND searchid<=$end
	sql_query_range  = SELECT 1,max_doc_id FROM v9_sphinx_counter WHERE counter_id=1
	sql_range_step = 5000 

	sql_attr_uint  = typeid
	sql_attr_uint  = siteid
	sql_attr_uint  = news_id
	sql_attr_timestamp  = adddate
	sql_query_info  = SELECT * FROM v9_search WHERE searchid=$id
}

#增量索引数据源
source delta
{
	type = mysql
	sql_host = 192.168.91.1 #phpcms所使用数据库地址
	sql_user = sphinx #用户名
	sql_pass = sphinx #密码
	sql_db = phpcms_v9 #phpcms数据库名字
	sql_port = 3306 #数据库端口
	sql_query_pre = SET NAMES utf8 
	sql_query_pre = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
	sql_query = SELECT searchid, adddate, siteid, typeid, id as news_id, data FROM v9_search WHERE searchid>=$start AND searchid<=$end
	sql_query_range  = SELECT 1,max_doc_id FROM v9_sphinx_counter WHERE counter_id=1
	sql_range_step = 5000 

	sql_attr_uint  = typeid
	sql_attr_uint  = siteid
	sql_attr_uint  = news_id
	sql_attr_timestamp  = adddate
	sql_query_info  = SELECT * FROM v9_search WHERE searchid=$id
}

#主索引
index main
{ 
	source = main
	path = /usr/local/sphinx/data/main
	charset_type = utf-8 
	chinese_dictionary = /usr/local/sphinx/etc/xdict
}  

 #增量索引
 index delta
 {
	source = delta
	path = /usr/local/sphinx/data/delta
	charset_type = utf-8
	chinese_dictionary = /usr/local/sphinx/etc/xdict
} 

indexer
{
	mem_limit = 128M
}

searchd
{
	listen = 9312
	listen = 9306:mysql41 #监听端口,配置完成后可用于测试
	log = /usr/local/sphinx/log/searchd.log
	query_log = /usr/local/sphinx/log/query.log
	read_timeout = 5
	max_children = 30
	pid_file = /usr/local/sphinx/log/searchd.pid
	seamless_rotate = 1
	preopen_indexes = 1
	unlink_old = 1
	binlog_path = /usr/local/sphinx/data
}

建立索引文件,并开启服务

# 以下命令执行的当前目录均为/usr/local/sphinx
$ ./indexer --all
$ ./searchd

测试

# 假设sphinx所在服务器ip为192.168.91.130
$ mysql -h192.168.91.130 -P9306
mysql> select * from main;
+------+------------+--------+--------+---------+
| id   | adddate    | siteid | typeid | news_id |
+------+------------+--------+--------+---------+
|  941 | 1585707168 |      1 |      1 |     231 |
|  942 | 1585707168 |      1 |      1 |     232 |
|  943 | 1585707168 |      1 |      1 |     233 |
|  944 | 1585707168 |      1 |      1 |     234 |
|  945 | 1585707168 |      1 |      1 |     235 |
|  946 | 1585707168 |      1 |      1 |     236 |
|  947 | 1585707168 |      1 |      1 |     237 |
|  948 | 1585707168 |      1 |      1 |     238 |
|  949 | 1585707168 |      1 |      1 |     239 |
|  950 | 1585707168 |      1 |      1 |     240 |
|  951 | 1585707168 |      1 |      1 |     241 |
|  952 | 1585707168 |      1 |      1 |     242 |
|  953 | 1585707168 |      1 |      1 |     243 |
|  954 | 1585707168 |      1 |      1 |     244 |
|  955 | 1585707168 |      1 |      1 |     245 |
|  956 | 1585707168 |      1 |      1 |     246 |
|  957 | 1585707168 |      1 |      1 |     247 |
|  958 | 1585707168 |      1 |      1 |     248 |
+------+------------+--------+--------+---------+
20 rows in set (0.00 sec)

phpcms相关配置和部分文件修改(todo)

phpcms全文检索功能实现(集成sphinx)

原文:https://www.cnblogs.com/captainmoore/p/12659782.html

(1)
(1)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!