大约从07年起,本博客就不转载了; 这篇算是以译文发的,原文在:http://www.contentwithstyle.co.uk/content/searching-with-xapian-and-php ========邪恶的分割线============ 有的时候呢,嗯 ,mysql 就是不够快;尤其是在做全文检索的时候.各个字段都得正确地检索才行,而当我们的各个字段带有不同的权重时,事情就马上变得特别复杂了,这时你就需要xapian来救急了. Xapian是什么东东 xapian是一个全文检索库,就和lucene和sphinx一样;它需要从c++代码编译,比较底层;现在已经有直接可用的php,perl,python绑定可以用了.目前提供了redhat和ubuntu的包;你可以在Mac os上编译,还可以通过cygwin来在windows下运行. 示例脚本 我不想去解释why和how,我只想展示一个简单的脚本;我封装的php文件有点大,读者可以从下载; db.sql CREATE DATABASE `demo`; CREATE TABLE `demo`.`demo` ( `id` INT( 10 ) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY , `unique_key` VARCHAR( 255 ) NOT NULL , `name` VARCHAR( 255 ) NULL DEFAULT NULL , `summary` TEXT NULL DEFAULT NULL , `date` DATETIME NULL DEFAULT NULL , UNIQUE (`unique_key`)); INSERT INTO `demo`.`demo` (`id`, `unique_key`, `name`, `summary`, `date`) VALUES (NULL, ‘foo’, ‘foo’, ‘foo bar test’, ‘2008-11-05 00:00:00’), (NULL , ‘bar’, ‘bar’, ‘test foo bar’, ‘2009-11-05 00:00:00’); XapianWrapper.php xapian_read_db = new XapianDatabase(self::SETTINGS_XAPIAN_DB); $this->xapian_stemmer = new XapianStem(“english”); $this->xapian_enquire = new XapianEnquire($this->xapian_read_db); } catch(Exception $e) { throw new Exception(‘Could initialize Xapian: ‘ . $e->getMessage()); } } private function xapian_init_writable() { try{ $this->xapian_write_db = new XapianWritableDatabase(self::SETTINGS_XAPIAN_DB, Xapian::DB_CREATE_OR_OPEN); $this->xapian_indexer = new XapianTermGenerator(); $this->xapian_stemmer = new XapianStem(“english”); $this->xapian_indexer->set_stemmer($this->xapian_stemmer); } catch(Exception $e) { throw new Exception(‘Could initialize Xapian: ‘ . $e->getMessage()); } } private function mysql_init() { $this->mysql_link = mysql_connect(self::SETTINGS_MYSQL_HOST, self::SETTINGS_MYSQL_USER, self::SETTINGS_MYSQL_PASS); if (!$this->mysql_link) { throw new Exception(‘Could not connect: ‘ . mysql_error()); } $db_selected = mysql_select_db(self::SETTINGS_MYSQL_DB, $this->mysql_link); if (!$db_selected) { throw new Exception(‘Can’t use db : ‘ . mysql_error()); } } /** * Index method * */ public function index($params) { $this->xapian_init_writable(); $this->mysql_init(); $start = microtime(true); $response = new stdClass(); $response->indexed = array(); $offset = (isset($params[‘offset’])) ? intval($params[‘offset’]) : 0; $count = (isset($params[‘count’])) ? intval($params[‘count’]) : self::DEFAULT_COUNT; $sql = ‘SELECT * FROM ‘.self::SETTINGS_MYSQL_TABLE.’ LIMIT ‘ . $offset . ‘, ‘ . $count . ‘;’; $result = mysql_query($sql); if (!$result) { throw new Exception(‘Invalid query: ‘ . mysql_error()); } $this->xapian_write_db->begin_transaction(); while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) { $response->indexed[] = $this->index_row($row); } $this->xapian_write_db->commit_transaction(); mysql_free_result($result); mysql_close($this->mysql_link); return $response; } private function index_row($row) { $doc = new XapianDocument(); $this->xapian_indexer->set_document($doc); $this->xapian_indexer->index_text($row[‘name’],50); $this->xapian_indexer->index_text($row[‘summary’], 1); $GUID = self::XAPIAN_PREFIX_UID . $row[‘unique_key’]; $doc->add_term($GUID); $doc->add_value(self::XAPIAN_FIELD_URL, $row[‘url’]); $doc->add_value(self::XAPIAN_FIELD_DATE, date(‘Ymd’, strtotime($row[‘date’]))); $doc->add_value(self::XAPIAN_FIELD_UID, $row[‘unique_key’]); $doc->add_value(self::XAPIAN_FIELD_NAME, $row[‘name’]); $doc->add_value(self::XAPIAN_FIELD_SUMMARY, $row[‘summary’]); $this->xapian_write_db->replace_document(strval($GUID), $doc); $row_response = array(); $row_response[‘name’] = $row[‘name’]; $row_response[‘guid’] = $row[‘unique_key’]; $row_response[‘url’] = $row[‘url’]; return $row_response; } /** * Delete method * */ public function delete($params) { $this->xapian_init_writable(); $this->xapian_write_db->begin_transaction(); $response = array(); foreach($params[‘items’] as $param_guid) { $GUID = self::XAPIAN_PREFIX_UID . $param_guid; $this->xapian_write_db->delete_document(strval($GUID)); $response[] = $param_guid; } $this->xapian_write_db->commit_transaction(); return $response; } /** * Search method * */ public function search($params) { $this->xapian_init_readonly(); $start = microtime(true); // queries array to later construct full query $arr_queries = array(); // from date if(!empty($params[‘date_from’])) { $arr_queries[] = new XapianQuery(XapianQuery::OP_VALUE_GE, 6, date(‘Ymd’, strtotime($params[‘date_from’]))); } // to date if(!empty($params[‘date_to’])) { $arr_queries[] = new XapianQuery(XapianQuery::OP_VALUE_LE, 6, date(‘Ymd’, strtotime($params[‘date_to’]))); } // unique key if(!empty($params[‘unique_key’])) { $arr_queries[] = new XapianQuery(self::XAPIAN_PREFIX_UID . $params[‘unique_key’]); } // normal search query parsed if(!empty($params[‘search’])) { $qp = new XapianQueryParser(); $qp->set_stemmer($this->xapian_stemmer); $qp->set_database($this->xapian_read_db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $arr_queries[] = $qp->parse_query($params[‘search’]); } // Find the results for the query. // construct final query $query = array_pop($arr_queries); foreach($arr_queries as $sq) { $query = new XapianQuery(XapianQuery::OP_AND, $query, $sq); } $this->xapian_enquire->set_query($query); // set the count to the specified params $offset = (isset($params[‘offset’])) ? intval($params[‘offset’]) : 0; $count = (isset($params[‘count’])) ? intval($params[‘count’]) : self::DEFAULT_COUNT; $matches = $this->xapian_enquire->get_mset($offset, $count); $response = new stdClass(); $response->result_count = $matches->get_matches_estimated(); $results = array(); $i = $matches->begin(); while (!$i->equals($matches->end())) { $m = array(); $n = $i->get_rank() + 1; $doc = $i->get_document(); $m[‘position’] = $n; $m[‘url’] = $doc->get_value(self::XAPIAN_FIELD_URL); $m[‘name’] = $doc->get_value(self::XAPIAN_FIELD_NAME); $m[‘summary’] = $doc->get_value(self::XAPIAN_FIELD_SUMMARY); $m[‘date’] = $doc->get_value(self::XAPIAN_FIELD_DATE); $m[‘unique_key’] = $doc->get_value(self::XAPIAN_FIELD_UID); $m[‘percent’] = $i->get_percent(); $results[count($results)] = $m; $i->next(); } $response->results = $results; $end = microtime(true); // runtime info $response->execute = new stdClass(); $response->execute->call = ‘search’; $response->execute->offset = $offset; $response->execute->count = $count; $response->execute->start = $start; $response->execute->end = $end; $response->execute->time = $end - $start; // debug stuff $response->execute->debug = $query->get_description(); return $response; } } index.php index(array()); print_r($res); Search.php ‘foo’); $res = $x->search($params); print_r($res); delete.php array(‘foo’), ); $res = $x->delete($params); print_r($res); 使用示例: 您下载刚才的源码包后,就可以导入db.sql,并在命令里运行程序; bash$ php index.php stdClass Object ( [indexed] => Array ( [0] => Array ( [name] => foo [guid] => foo [url] => ) [1] => Array ( [name] => bar [guid] => bar [url] => ) ) ) bash$ php search.php stdClass Object ( [result_count] => 2 [results] => Array ( [0] => Array ( [position] => 1 [url] => [name] => foo [summary] => foo bar test [date] => 20081105 [unique_key] => foo [percent] => 100 ) [1] => Array ( [position] => 2 [url] => [name] => bar [summary] => test foo bar [date] => 20091105 [unique_key] => bar [percent] => 50 ) ) [execute] => stdClass Object ( [call] => search [offset] => 0 [count] => 10 [start] => 1256674866.79 [end] => 1256674866.79 [time] => 0.000944852828979 [debug] => Xapian::Query(Zfoo:(pos=1)) ) ) bash$ php delete.php Array ( [0] => foo ) bash$ php search.php stdClass Object ( [result_count] => 1 [results] => Array ( [0] => Array ( [position] => 1 [url] => [name] => bar [summary] => test foo bar [date] => 20091105 [unique_key] => bar [percent] => 100 ) ) [execute] => stdClass Object ( [call] => search [offset] => 0 [count] => 10 [start] => 1256674876.02 [end] => 1256674876.02 [time] => 0.000872850418091 [debug] => Xapian::Query(Zfoo:(pos=1)) ) ) 接下来,扩展您自己的程序来满足您的各种需求吧,欢迎反馈.好好地享受检索的乐趣吧.