堕落不振功业废,勤耕不辍日月新

coreseek搜索中文的配置

中间件 hailen 413℃

关于中文搜索,如果大家想用sphinx来实现,还是算了,因为sphinx本身并不支持中文搜索,虽然coreseek公司有针对sphinx提供补丁文件,但目前为止最新的版本知针对0.9.8,不过,建议不要这样作,之前我也尝试打补丁,但事实证明不可行,因为sphinx低级的版本不支持关于中文配置的选项。coreseek其实是sphinx的升级版,说白了,就是sphinx 加上 mmseg,mmseg就是中文分词的工具,coreseek就是使得sphinx也能对中文进行索引。接下来我们就来p配置coreseek,coreseek3.2.14 下载地址 http://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.tar.gz

#tar -zxvf coreseek-3.2.14.tar.gz   //解压
#cd coreseek-3.2.14       // 进入源文件
//安装mmseg start
#cd mmseg-3.2.14       //  进入mmseg,先安装mmseg  (中文分词插件)
#./configure --prefix=/usr/local/coreseek   //配置   报错 config.status: error: cannot find input file: src/Makefile.in  
//解决方法  依次执行
#yum -y install autoconf automake libtool 
#aclocal
#libtoolize --force
#automake --add-missing
#autoconf
#autoheader
#./configure --prefix=/usr/local/coreseek
#make
#make install
//mmseg安装 end
//安装coreseek  start
#cd /usr/local/src/coreseek-3.2.14/csft-3.2.14/
#./configure --prefix=/usr/local/coreseek --with-mmseg-libs=/usr/local/mmseg/lib --with-mmseg-includes=/usr/local/mmseg/include/mmseg
#make
#make install
//安装coreseek  end

接下来就是配置配置文件了,配置文件的选项去看sphinx的官方文档

#cd /usr/local/coreseek
#cp sphinx.conf.dist sphinx.conf

以下为我的sphinx配置文件

# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#

#############################################################################
## data source definition
#############################################################################

source product
{
# data source type. mandatory, no default value
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
type = mysql

#####################################################################
## SQL settings (for 'mysql' and 'pgsql' types)
#####################################################################

sql_host = localhost
sql_user = root
sql_pass =********
sql_db = sphinx
sql_port = 3306 # optional, default is 3306

# UNIX socket name
# optional, default is empty (reuse client library defaults)
# usually '/var/lib/mysql/mysql.sock' on Linux
# usually '/tmp/mysql.sock' on FreeBSD
#BSD   if linux /var/lib/mysql/mysql.sock
sql_sock = /tmp/mysql.sock
mysql_connect_flags = 32 # enable compression
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF


#搭建实时索引
sql_query_pre = REPLACE INTO sph_counter select 1, MAX(pid) FROM pre_cosmetics_product

sql_query = 
SELECT pid,cname
FROM pre_cosmetics_product where pid>=$start and pid<=$end;
#开启分区查询,有助于避免myisam死锁问题
sql_query_range = select MIN(pid),MAX(pid) FROM pre_cosmetics_product
sql_ranged_throttle = 0 
sql_query_info = select * FROM pre_cosmetics WHERE pid=$id 
}

source delta :product
{
sql_host = localhost
sql_user = root
sql_pass =qwer1111
sql_db = sphinx
sql_port = 3306 # optional, default is 3306
sql_sock = /tmp/mysql.sock
mysql_connect_flags = 32 # enable compression
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
sql_query_pre =

sql_query = 
SELECT pid,cname
FROM pre_cosmetics_product  WHERE pid>=$start and pid<=$end



sql_query_range = select (SELECT max_doc_id FROM sph_counter WHERE counter_id=1),MAX(pid) FROM product
#sql_attr_uint = Purchase
#sql_attr_float = Member_price

sql_ranged_throttle = 0

sql_query_info = select * FROM pre_cosmetics_product WHERE pid=$id
} 
index product
{ 
source = product 
path = /usr/local/coreseek/var/data/product
docinfo = extern
mlock = 0
min_word_len = 1
charset_type = zh_cn.utf-8
html_strip = 0
ngram_len = 0
charset_dictpath  = /usr/local/mmseg/etc/
}
index delta : product {
source = delta
path = /usr/local/coreseek/var/data/delta
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
charset_type = zh_cn.utf-8
html_strip = 0
ngram_len = 0
charset_dictpath  = /usr/local/mmseg/etc/
}
indexer
{ 
mem_limit = 256M 
max_iosize = 1048576 
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /usr/local/coreseek/var/log/searchd.log
query_log = /usr/local/coreseek/var/log/query.log
read_timeout = 5 
client_timeout = 300 
max_children = 10 
pid_file = /usr/local/coreseek/var/log/searchd.pid 
max_matches = 1000 
seamless_rotate = 1 
preopen_indexes = 0 
unlink_old = 1 
mva_updates_pool = 1M 
max_packet_size = 8M 
max_filters = 256 
max_filter_values = 4096 
}

关于中文词典的构造清访问 http://www.coreseek.cn/opensource/mmseg/
//详细查看官方文档中关于source的各个配置选项
接着就是建立索引

#cd /usr/local/coreseek/
#bin/indexer --config /usr/local/coreseek/etc/sphinx.conf product
//启动searchd
#bin/searchd --config /usr/local/coreseek/etc/sphinx.conf
#bin/search 要搜索的字符串

关于php 端的调用,coreseek源码中有api。
这篇博客只是帮助初学者了解配置sphinx,更多请访问coreseek官方网站http://www.coreseek.cn

转载请注明:我是IT » coreseek搜索中文的配置

喜欢 (0)or分享 (0)