4 // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
8 // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
16 /////////////////////////////////////////////////////////////////////////////
17 // PHP version of Sphinx searchd client (PHP API)
18 /////////////////////////////////////////////////////////////////////////////
20 /// known searchd commands
21 define ( "SEARCHD_COMMAND_SEARCH", 0 );
22 define ( "SEARCHD_COMMAND_EXCERPT", 1 );
23 define ( "SEARCHD_COMMAND_UPDATE", 2 );
24 define ( "SEARCHD_COMMAND_KEYWORDS",3 );
25 define ( "SEARCHD_COMMAND_PERSIST", 4 );
26 define ( "SEARCHD_COMMAND_STATUS", 5 );
27 define ( "SEARCHD_COMMAND_QUERY", 6 );
29 /// current client-side command implementation versions
30 define ( "VER_COMMAND_SEARCH", 0x116 );
31 define ( "VER_COMMAND_EXCERPT", 0x100 );
32 define ( "VER_COMMAND_UPDATE", 0x102 );
33 define ( "VER_COMMAND_KEYWORDS", 0x100 );
34 define ( "VER_COMMAND_STATUS", 0x100 );
35 define ( "VER_COMMAND_QUERY", 0x100 );
37 /// known searchd status codes
38 define ( "SEARCHD_OK", 0 );
39 define ( "SEARCHD_ERROR", 1 );
40 define ( "SEARCHD_RETRY", 2 );
41 define ( "SEARCHD_WARNING", 3 );
44 define ( "SPH_MATCH_ALL", 0 );
45 define ( "SPH_MATCH_ANY", 1 );
46 define ( "SPH_MATCH_PHRASE", 2 );
47 define ( "SPH_MATCH_BOOLEAN", 3 );
48 define ( "SPH_MATCH_EXTENDED", 4 );
49 define ( "SPH_MATCH_FULLSCAN", 5 );
50 define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
52 /// known ranking modes (ext2 only)
53 define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
54 define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
55 define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
56 define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
57 define ( "SPH_RANK_PROXIMITY", 4 );
58 define ( "SPH_RANK_MATCHANY", 5 );
59 define ( "SPH_RANK_FIELDMASK", 6 );
62 define ( "SPH_SORT_RELEVANCE", 0 );
63 define ( "SPH_SORT_ATTR_DESC", 1 );
64 define ( "SPH_SORT_ATTR_ASC", 2 );
65 define ( "SPH_SORT_TIME_SEGMENTS", 3 );
66 define ( "SPH_SORT_EXTENDED", 4 );
67 define ( "SPH_SORT_EXPR", 5 );
69 /// known filter types
70 define ( "SPH_FILTER_VALUES", 0 );
71 define ( "SPH_FILTER_RANGE", 1 );
72 define ( "SPH_FILTER_FLOATRANGE", 2 );
74 /// known attribute types
75 define ( "SPH_ATTR_INTEGER", 1 );
76 define ( "SPH_ATTR_TIMESTAMP", 2 );
77 define ( "SPH_ATTR_ORDINAL", 3 );
78 define ( "SPH_ATTR_BOOL", 4 );
79 define ( "SPH_ATTR_FLOAT", 5 );
80 define ( "SPH_ATTR_BIGINT", 6 );
81 define ( "SPH_ATTR_MULTI", 0x40000000 );
83 /// known grouping functions
84 define ( "SPH_GROUPBY_DAY", 0 );
85 define ( "SPH_GROUPBY_WEEK", 1 );
86 define ( "SPH_GROUPBY_MONTH", 2 );
87 define ( "SPH_GROUPBY_YEAR", 3 );
88 define ( "SPH_GROUPBY_ATTR", 4 );
89 define ( "SPH_GROUPBY_ATTRPAIR", 5 );
91 // important properties of PHP's integers:
92 // - always signed (one bit short of PHP_INT_SIZE)
93 // - conversion from string to int is saturated
95 // - div converts arguments to floats
96 // - mod converts arguments to ints
98 // the packing code below works as follows:
99 // - when we got an int, just pack it
100 // if performance is a problem, this is the branch users should aim for
102 // - otherwise, we got a number in string form
103 // this might be due to different reasons, but we assume that this is
104 // because it didn't fit into PHP int
106 // - factor the string into high and low ints for packing
107 // - if we have bcmath, then it is used
108 // - if we don't, we have to do it manually (this is the fun part)
110 // - x64 branch does factoring using ints
111 // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
113 // unpacking routines are pretty much the same.
114 // - return ints if we can
115 // - otherwise format number into a string
117 /// pack 64-bit signed
118 function sphPackI64 ( $v )
120 assert ( is_numeric($v) );
123 if ( PHP_INT_SIZE
>=8 )
126 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
131 return pack ( "NN", $v < 0 ?
-1 : 0, $v );
134 if ( function_exists("bcmul") )
136 if ( bccomp ( $v, 0 ) == -1 )
137 $v = bcadd ( "18446744073709551616", $v );
138 $h = bcdiv ( $v, "4294967296", 0 );
139 $l = bcmod ( $v, "4294967296" );
140 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
144 $p = max(0, strlen($v) - 13);
145 $lo = abs((float)substr($v, $p));
146 $hi = abs((float)substr($v, 0, $p));
148 $m = $lo +
$hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
149 $q = floor($m/4294967296.0);
150 $l = $m - ($q*4294967296.0);
151 $h = $hi*2328.0 +
$q; // (10 ^ 13) / (1 << 32) = 2328
156 $h = 4294967296.0 - $h;
159 $h = 4294967295.0 - $h;
160 $l = 4294967296.0 - $l;
163 return pack ( "NN", $h, $l );
166 /// pack 64-bit unsigned
167 function sphPackU64 ( $v )
169 assert ( is_numeric($v) );
172 if ( PHP_INT_SIZE
>=8 )
178 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
181 if ( function_exists("bcmul") )
183 $h = bcdiv ( $v, 4294967296, 0 );
184 $l = bcmod ( $v, 4294967296 );
185 return pack ( "NN", $h, $l );
189 $p = max ( 0, strlen($v) - 13 );
190 $lo = (int)substr ( $v, $p );
191 $hi = (int)substr ( $v, 0, $p );
193 $m = $lo +
$hi*1316134912;
194 $l = $m %
4294967296;
195 $h = $hi*2328 +
(int)($m/4294967296);
197 return pack ( "NN", $h, $l );
202 return pack ( "NN", 0, $v );
205 if ( function_exists("bcmul") )
207 $h = bcdiv ( $v, "4294967296", 0 );
208 $l = bcmod ( $v, "4294967296" );
209 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
213 $p = max(0, strlen($v) - 13);
214 $lo = (float)substr($v, $p);
215 $hi = (float)substr($v, 0, $p);
217 $m = $lo +
$hi*1316134912.0;
218 $q = floor($m / 4294967296.0);
219 $l = $m - ($q * 4294967296.0);
220 $h = $hi*2328.0 +
$q;
222 return pack ( "NN", $h, $l );
225 // unpack 64-bit unsigned
226 function sphUnpackU64 ( $v )
228 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
230 if ( PHP_INT_SIZE
>=8 )
232 if ( $hi<0 ) $hi +
= (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
233 if ( $lo<0 ) $lo +
= (1<<32);
236 if ( $hi<=2147483647 )
237 return ($hi<<32) +
$lo;
240 if ( function_exists("bcmul") )
241 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
245 $h = ((int)($hi / $C) << 32) +
(int)($lo / $C);
246 $l = (($hi %
$C) << 32) +
($lo %
$C);
249 $h +
= (int)($l / $C);
255 return sprintf ( "%d%05d", $h, $l );
263 return sprintf ( "%u", $lo );
266 $hi = sprintf ( "%u", $hi );
267 $lo = sprintf ( "%u", $lo );
270 if ( function_exists("bcmul") )
271 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
277 $q = floor($hi/10000000.0);
278 $r = $hi - $q*10000000.0;
279 $m = $lo +
$r*4967296.0;
280 $mq = floor($m/10000000.0);
281 $l = $m - $mq*10000000.0;
282 $h = $q*4294967296.0 +
$r*429.0 +
$mq;
284 $h = sprintf ( "%.0f", $h );
285 $l = sprintf ( "%07.0f", $l );
287 return sprintf( "%.0f", (float)$l );
291 // unpack 64-bit signed
292 function sphUnpackI64 ( $v )
294 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
297 if ( PHP_INT_SIZE
>=8 )
299 if ( $hi<0 ) $hi +
= (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
300 if ( $lo<0 ) $lo +
= (1<<32);
302 return ($hi<<32) +
$lo;
310 return sprintf ( "%u", $lo );
317 return sprintf ( "%.0f", $lo - 4294967296.0 );
330 $hi = sprintf ( "%u", $hi );
331 $lo = sprintf ( "%u", $lo );
334 if ( function_exists("bcmul") )
335 return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
341 $q = floor($hi/10000000.0);
342 $r = $hi - $q*10000000.0;
343 $m = $lo +
$r*4967296.0;
344 $mq = floor($m/10000000.0);
345 $l = $m - $mq*10000000.0 +
$c;
346 $h = $q*4294967296.0 +
$r*429.0 +
$mq;
353 $h = sprintf ( "%.0f", $h );
354 $l = sprintf ( "%07.0f", $l );
356 return $neg . sprintf( "%.0f", (float)$l );
357 return $neg . $h . $l;
361 function sphFixUint ( $value )
363 if ( PHP_INT_SIZE
>=8 )
365 // x64 route, workaround broken unpack() in 5.2.2+
366 if ( $value<0 ) $value +
= (1<<32);
371 // x32 route, workaround php signed/unsigned braindamage
372 return sprintf ( "%u", $value );
377 /// sphinx searchd client class
380 var $_host; ///< searchd host (default is "localhost")
381 var $_port; ///< searchd port (default is 9312)
382 var $_offset; ///< how many records to seek from result-set start (default is 0)
383 var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
384 var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
385 var $_weights; ///< per-field weights (default is 1 for all fields)
386 var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
387 var $_sortby; ///< attribute to sort by (defualt is "")
388 var $_min_id; ///< min ID to match (default is 0, which means no limit)
389 var $_max_id; ///< max ID to match (default is 0, which means no limit)
390 var $_filters; ///< search filters
391 var $_groupby; ///< group-by attribute name
392 var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
393 var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
394 var $_groupdistinct;///< group-by count-distinct attribute
395 var $_maxmatches; ///< max matches to retrieve
396 var $_cutoff; ///< cutoff to stop searching at (default is 0)
397 var $_retrycount; ///< distributed retries count
398 var $_retrydelay; ///< distributed retries delay
399 var $_anchor; ///< geographical anchor point
400 var $_indexweights; ///< per-index weights
401 var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
402 var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
403 var $_fieldweights; ///< per-field-name weights
404 var $_overrides; ///< per-query attribute values overrides
405 var $_select; ///< select-list (attributes or expressions, with optional aliases)
407 var $_error; ///< last error message
408 var $_warning; ///< last warning message
409 var $_connerror; ///< connection error vs remote error flag
411 var $_reqs; ///< requests array for multi-query
412 var $_mbenc; ///< stored mbstring encoding
413 var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
414 var $_timeout; ///< connect timeout
416 /////////////////////////////////////////////////////////////////////////////
418 /////////////////////////////////////////////////////////////////////////////
420 /// create a new client object and fill defaults
421 function SphinxClient ()
423 // per-client-object settings
424 $this->_host
= "localhost";
426 $this->_path
= false;
427 $this->_socket
= false;
429 // per-query settings
432 $this->_mode
= SPH_MATCH_ALL
;
433 $this->_weights
= array ();
434 $this->_sort
= SPH_SORT_RELEVANCE
;
438 $this->_filters
= array ();
439 $this->_groupby
= "";
440 $this->_groupfunc
= SPH_GROUPBY_DAY
;
441 $this->_groupsort
= "@group desc";
442 $this->_groupdistinct
= "";
443 $this->_maxmatches
= 1000;
445 $this->_retrycount
= 0;
446 $this->_retrydelay
= 0;
447 $this->_anchor
= array ();
448 $this->_indexweights
= array ();
449 $this->_ranker
= SPH_RANK_PROXIMITY_BM25
;
450 $this->_maxquerytime
= 0;
451 $this->_fieldweights
= array();
452 $this->_overrides
= array();
453 $this->_select
= "*";
455 $this->_error
= ""; // per-reply fields (for single-query case)
456 $this->_warning
= "";
457 $this->_connerror
= false;
459 $this->_reqs
= array (); // requests storage (for multi-query case)
461 $this->_arrayresult
= false;
465 function __destruct()
467 if ( $this->_socket
!== false )
468 fclose ( $this->_socket
);
471 /// get last error message (string)
472 function GetLastError ()
474 return $this->_error
;
477 /// get last warning message (string)
478 function GetLastWarning ()
480 return $this->_warning
;
483 /// get last error flag (to tell network connection errors from searchd errors or broken responses)
484 function IsConnectError()
486 return $this->_connerror
;
489 /// set searchd host name (string) and port (integer)
490 function SetServer ( $host, $port = 0 )
492 assert ( is_string($host) );
493 if ( $host[0] == '/')
495 $this->_path
= 'unix://' . $host;
498 if ( substr ( $host, 0, 7 )=="unix://" )
500 $this->_path
= $host;
504 assert ( is_int($port) );
505 $this->_host
= $host;
506 $this->_port
= $port;
511 /// set server connection timeout (0 to remove)
512 function SetConnectTimeout ( $timeout )
514 assert ( is_numeric($timeout) );
515 $this->_timeout
= $timeout;
519 function _Send ( $handle, $data, $length )
521 if ( feof($handle) ||
fwrite ( $handle, $data, $length ) !== $length )
523 $this->_error
= 'connection unexpectedly closed (timed out?)';
524 $this->_connerror
= true;
530 /////////////////////////////////////////////////////////////////////////////
532 /// enter mbstring workaround mode
536 if ( ini_get ( "mbstring.func_overload" ) & 2 )
538 $this->_mbenc
= mb_internal_encoding();
539 mb_internal_encoding ( "latin1" );
543 /// leave mbstring workaround mode
547 mb_internal_encoding ( $this->_mbenc
);
550 /// connect to searchd server
553 if ( $this->_socket
!==false )
555 // we are in persistent connection mode, so we have a socket
556 // however, need to check whether it's still alive
557 if ( !@feof
( $this->_socket
) )
558 return $this->_socket
;
561 $this->_socket
= false;
566 $this->_connerror
= false;
570 $host = $this->_path
;
575 $host = $this->_host
;
576 $port = $this->_port
;
579 if ( $this->_timeout
<=0 )
580 $fp = @fsockopen
( $host, $port, $errno, $errstr );
582 $fp = @fsockopen
( $host, $port, $errno, $errstr, $this->_timeout
);
587 $location = $this->_path
;
589 $location = "{$this->_host}:{$this->_port}";
591 $errstr = trim ( $errstr );
592 $this->_error
= "connection to $location failed (errno=$errno, msg=$errstr)";
593 $this->_connerror
= true;
598 // this is a subtle part. we must do it before (!) reading back from searchd.
599 // because otherwise under some conditions (reported on FreeBSD for instance)
600 // TCP stack could throttle write-write-read pattern because of Nagle.
601 if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
604 $this->_error
= "failed to send client protocol version";
609 list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
614 $this->_error
= "expected searchd protocol version 1+, got version '$v'";
621 /// get and check response packet from searchd server
622 function _GetResponse ( $fp, $client_ver )
627 $header = fread ( $fp, 8 );
628 if ( strlen($header)==8 )
630 list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
632 while ( $left>0 && !feof($fp) )
634 $chunk = fread ( $fp, $left );
638 $left -= strlen($chunk);
642 if ( $this->_socket
=== false )
646 $read = strlen ( $response );
647 if ( !$response ||
$read!=$len )
650 ?
"failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
651 : "received zero-sized searchd response";
656 if ( $status==SEARCHD_WARNING
)
658 list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
659 $this->_warning
= substr ( $response, 4, $wlen );
660 return substr ( $response, 4+
$wlen );
662 if ( $status==SEARCHD_ERROR
)
664 $this->_error
= "searchd error: " . substr ( $response, 4 );
667 if ( $status==SEARCHD_RETRY
)
669 $this->_error
= "temporary searchd error: " . substr ( $response, 4 );
672 if ( $status!=SEARCHD_OK
)
674 $this->_error
= "unknown status code '$status'";
679 if ( $ver<$client_ver )
681 $this->_warning
= sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
682 $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
688 /////////////////////////////////////////////////////////////////////////////
690 /////////////////////////////////////////////////////////////////////////////
692 /// set offset and count into result set,
693 /// and optionally set max-matches and cutoff limits
694 function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
696 assert ( is_int($offset) );
697 assert ( is_int($limit) );
698 assert ( $offset>=0 );
701 $this->_offset
= $offset;
702 $this->_limit
= $limit;
704 $this->_maxmatches
= $max;
706 $this->_cutoff
= $cutoff;
709 /// set maximum query time, in milliseconds, per-index
710 /// integer, 0 means "do not limit"
711 function SetMaxQueryTime ( $max )
713 assert ( is_int($max) );
715 $this->_maxquerytime
= $max;
718 /// set matching mode
719 function SetMatchMode ( $mode )
721 assert ( $mode==SPH_MATCH_ALL
722 ||
$mode==SPH_MATCH_ANY
723 ||
$mode==SPH_MATCH_PHRASE
724 ||
$mode==SPH_MATCH_BOOLEAN
725 ||
$mode==SPH_MATCH_EXTENDED
726 ||
$mode==SPH_MATCH_FULLSCAN
727 ||
$mode==SPH_MATCH_EXTENDED2
);
728 $this->_mode
= $mode;
732 function SetRankingMode ( $ranker )
734 assert ( $ranker==SPH_RANK_PROXIMITY_BM25
735 ||
$ranker==SPH_RANK_BM25
736 ||
$ranker==SPH_RANK_NONE
737 ||
$ranker==SPH_RANK_WORDCOUNT
738 ||
$ranker==SPH_RANK_PROXIMITY
);
739 $this->_ranker
= $ranker;
742 /// set matches sorting mode
743 function SetSortMode ( $mode, $sortby="" )
746 $mode==SPH_SORT_RELEVANCE ||
747 $mode==SPH_SORT_ATTR_DESC ||
748 $mode==SPH_SORT_ATTR_ASC ||
749 $mode==SPH_SORT_TIME_SEGMENTS ||
750 $mode==SPH_SORT_EXTENDED ||
751 $mode==SPH_SORT_EXPR
);
752 assert ( is_string($sortby) );
753 assert ( $mode==SPH_SORT_RELEVANCE ||
strlen($sortby)>0 );
755 $this->_sort
= $mode;
756 $this->_sortby
= $sortby;
759 /// bind per-field weights by order
760 /// DEPRECATED; use SetFieldWeights() instead
761 function SetWeights ( $weights )
763 assert ( is_array($weights) );
764 foreach ( $weights as $weight )
765 assert ( is_int($weight) );
767 $this->_weights
= $weights;
770 /// bind per-field weights by name
771 function SetFieldWeights ( $weights )
773 assert ( is_array($weights) );
774 foreach ( $weights as $name=>$weight )
776 assert ( is_string($name) );
777 assert ( is_int($weight) );
779 $this->_fieldweights
= $weights;
782 /// bind per-index weights by name
783 function SetIndexWeights ( $weights )
785 assert ( is_array($weights) );
786 foreach ( $weights as $index=>$weight )
788 assert ( is_string($index) );
789 assert ( is_int($weight) );
791 $this->_indexweights
= $weights;
794 /// set IDs range to match
795 /// only match records if document ID is beetwen $min and $max (inclusive)
796 function SetIDRange ( $min, $max )
798 assert ( is_numeric($min) );
799 assert ( is_numeric($max) );
800 assert ( $min<=$max );
801 $this->_min_id
= $min;
802 $this->_max_id
= $max;
805 /// set values set filter
806 /// only match records where $attribute value is in given set
807 function SetFilter ( $attribute, $values, $exclude=false )
809 assert ( is_string($attribute) );
810 assert ( is_array($values) );
811 assert ( count($values) );
813 if ( is_array($values) && count($values) )
815 foreach ( $values as $value )
816 assert ( is_numeric($value) );
818 $this->_filters
[] = array ( "type"=>SPH_FILTER_VALUES
, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
823 /// only match records if $attribute value is beetwen $min and $max (inclusive)
824 function SetFilterRange ( $attribute, $min, $max, $exclude=false )
826 assert ( is_string($attribute) );
827 assert ( is_numeric($min) );
828 assert ( is_numeric($max) );
829 assert ( $min<=$max );
831 $this->_filters
[] = array ( "type"=>SPH_FILTER_RANGE
, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
834 /// set float range filter
835 /// only match records if $attribute value is beetwen $min and $max (inclusive)
836 function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
838 assert ( is_string($attribute) );
839 assert ( is_float($min) );
840 assert ( is_float($max) );
841 assert ( $min<=$max );
843 $this->_filters
[] = array ( "type"=>SPH_FILTER_FLOATRANGE
, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
846 /// setup anchor point for geosphere distance calculations
847 /// required to use @geodist in filters and sorting
848 /// latitude and longitude must be in radians
849 function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
851 assert ( is_string($attrlat) );
852 assert ( is_string($attrlong) );
853 assert ( is_float($lat) );
854 assert ( is_float($long) );
856 $this->_anchor
= array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
859 /// set grouping attribute and function
860 function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
862 assert ( is_string($attribute) );
863 assert ( is_string($groupsort) );
864 assert ( $func==SPH_GROUPBY_DAY
865 ||
$func==SPH_GROUPBY_WEEK
866 ||
$func==SPH_GROUPBY_MONTH
867 ||
$func==SPH_GROUPBY_YEAR
868 ||
$func==SPH_GROUPBY_ATTR
869 ||
$func==SPH_GROUPBY_ATTRPAIR
);
871 $this->_groupby
= $attribute;
872 $this->_groupfunc
= $func;
873 $this->_groupsort
= $groupsort;
876 /// set count-distinct attribute for group-by queries
877 function SetGroupDistinct ( $attribute )
879 assert ( is_string($attribute) );
880 $this->_groupdistinct
= $attribute;
883 /// set distributed retries count and delay
884 function SetRetries ( $count, $delay=0 )
886 assert ( is_int($count) && $count>=0 );
887 assert ( is_int($delay) && $delay>=0 );
888 $this->_retrycount
= $count;
889 $this->_retrydelay
= $delay;
892 /// set result set format (hash or array; hash by default)
893 /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
894 function SetArrayResult ( $arrayresult )
896 assert ( is_bool($arrayresult) );
897 $this->_arrayresult
= $arrayresult;
900 /// set attribute values override
901 /// there can be only one override per attribute
902 /// $values must be a hash that maps document IDs to attribute values
903 function SetOverride ( $attrname, $attrtype, $values )
905 assert ( is_string ( $attrname ) );
906 assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER
, SPH_ATTR_TIMESTAMP
, SPH_ATTR_BOOL
, SPH_ATTR_FLOAT
, SPH_ATTR_BIGINT
) ) );
907 assert ( is_array ( $values ) );
909 $this->_overrides
[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
912 /// set select-list (attributes or expressions), SQL-like syntax
913 function SetSelect ( $select )
915 assert ( is_string ( $select ) );
916 $this->_select
= $select;
919 //////////////////////////////////////////////////////////////////////////////
921 /// clear all filters (for multi-queries)
922 function ResetFilters ()
924 $this->_filters
= array();
925 $this->_anchor
= array();
928 /// clear groupby settings (for multi-queries)
929 function ResetGroupBy ()
931 $this->_groupby
= "";
932 $this->_groupfunc
= SPH_GROUPBY_DAY
;
933 $this->_groupsort
= "@group desc";
934 $this->_groupdistinct
= "";
937 /// clear all attribute value overrides (for multi-queries)
938 function ResetOverrides ()
940 $this->_overrides
= array ();
943 //////////////////////////////////////////////////////////////////////////////
945 /// connect to searchd server, run given search query through given indexes,
946 /// and return the search results
947 function Query ( $query, $index="*", $comment="" )
949 assert ( empty($this->_reqs
) );
951 $this->AddQuery ( $query, $index, $comment );
952 $results = $this->RunQueries ();
953 $this->_reqs
= array (); // just in case it failed too early
955 if ( !is_array($results) )
956 return false; // probably network error; error message should be already filled
958 $this->_error
= $results[0]["error"];
959 $this->_warning
= $results[0]["warning"];
960 if ( $results[0]["status"]==SEARCHD_ERROR
)
966 /// helper to pack floats in network byte order
967 function _PackFloat ( $f )
969 $t1 = pack ( "f", $f ); // machine order
970 list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
971 return pack ( "N", $t2 );
974 /// add query to multi-query batch
975 /// returns index into results array from RunQueries() call
976 function AddQuery ( $query, $index="*", $comment="" )
978 // mbstring workaround
982 $req = pack ( "NNNNN", $this->_offset
, $this->_limit
, $this->_mode
, $this->_ranker
, $this->_sort
); // mode and limits
983 $req .= pack ( "N", strlen($this->_sortby
) ) . $this->_sortby
;
984 $req .= pack ( "N", strlen($query) ) . $query; // query itself
985 $req .= pack ( "N", count($this->_weights
) ); // weights
986 foreach ( $this->_weights
as $weight )
987 $req .= pack ( "N", (int)$weight );
988 $req .= pack ( "N", strlen($index) ) . $index; // indexes
989 $req .= pack ( "N", 1 ); // id64 range marker
990 $req .= sphPackU64 ( $this->_min_id
) . sphPackU64 ( $this->_max_id
); // id64 range
993 $req .= pack ( "N", count($this->_filters
) );
994 foreach ( $this->_filters
as $filter )
996 $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
997 $req .= pack ( "N", $filter["type"] );
998 switch ( $filter["type"] )
1000 case SPH_FILTER_VALUES
:
1001 $req .= pack ( "N", count($filter["values"]) );
1002 foreach ( $filter["values"] as $value )
1003 $req .= sphPackI64 ( $value );
1006 case SPH_FILTER_RANGE
:
1007 $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
1010 case SPH_FILTER_FLOATRANGE
:
1011 $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
1015 assert ( 0 && "internal error: unhandled filter type" );
1017 $req .= pack ( "N", $filter["exclude"] );
1020 // group-by clause, max-matches count, group-sort clause, cutoff count
1021 $req .= pack ( "NN", $this->_groupfunc
, strlen($this->_groupby
) ) . $this->_groupby
;
1022 $req .= pack ( "N", $this->_maxmatches
);
1023 $req .= pack ( "N", strlen($this->_groupsort
) ) . $this->_groupsort
;
1024 $req .= pack ( "NNN", $this->_cutoff
, $this->_retrycount
, $this->_retrydelay
);
1025 $req .= pack ( "N", strlen($this->_groupdistinct
) ) . $this->_groupdistinct
;
1028 if ( empty($this->_anchor
) )
1030 $req .= pack ( "N", 0 );
1033 $a =& $this->_anchor
;
1034 $req .= pack ( "N", 1 );
1035 $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
1036 $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
1037 $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
1040 // per-index weights
1041 $req .= pack ( "N", count($this->_indexweights
) );
1042 foreach ( $this->_indexweights
as $idx=>$weight )
1043 $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
1046 $req .= pack ( "N", $this->_maxquerytime
);
1048 // per-field weights
1049 $req .= pack ( "N", count($this->_fieldweights
) );
1050 foreach ( $this->_fieldweights
as $field=>$weight )
1051 $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
1054 $req .= pack ( "N", strlen($comment) ) . $comment;
1056 // attribute overrides
1057 $req .= pack ( "N", count($this->_overrides
) );
1058 foreach ( $this->_overrides
as $key => $entry )
1060 $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
1061 $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
1062 foreach ( $entry["values"] as $id=>$val )
1064 assert ( is_numeric($id) );
1065 assert ( is_numeric($val) );
1067 $req .= sphPackU64 ( $id );
1068 switch ( $entry["type"] )
1070 case SPH_ATTR_FLOAT
: $req .= $this->_PackFloat ( $val ); break;
1071 case SPH_ATTR_BIGINT
: $req .= sphPackI64 ( $val ); break;
1072 default: $req .= pack ( "N", $val ); break;
1078 $req .= pack ( "N", strlen($this->_select
) ) . $this->_select
;
1080 // mbstring workaround
1083 // store request to requests array
1084 $this->_reqs
[] = $req;
1085 return count($this->_reqs
)-1;
1088 /// connect to searchd, run queries batch, and return an array of result sets
1089 function RunQueries ()
1091 if ( empty($this->_reqs
) )
1093 $this->_error
= "no queries defined, issue AddQuery() first";
1097 // mbstring workaround
1100 if (!( $fp = $this->_Connect() ))
1106 // send query, get response
1107 $nreqs = count($this->_reqs
);
1108 $req = join ( "", $this->_reqs
);
1109 $len = 4+
strlen($req);
1110 $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH
, VER_COMMAND_SEARCH
, $len, $nreqs ) . $req; // add header
1112 if ( !( $this->_Send ( $fp, $req, $len+
8 ) ) ||
1113 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH
) ) )
1119 // query sent ok; we can reset reqs now
1120 $this->_reqs
= array ();
1122 // parse and return response
1123 return $this->_ParseSearchResponse ( $response, $nreqs );
1126 /// parse and return search query (or queries) response
1127 function _ParseSearchResponse ( $response, $nreqs )
1129 $p = 0; // current position
1130 $max = strlen($response); // max position for checks, to protect against broken responses
1132 $results = array ();
1133 for ( $ires=0; $ires<$nreqs && $p<$max; $ires++
)
1135 $results[] = array();
1136 $result =& $results[$ires];
1138 $result["error"] = "";
1139 $result["warning"] = "";
1142 list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1143 $result["status"] = $status;
1144 if ( $status!=SEARCHD_OK
)
1146 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1147 $message = substr ( $response, $p, $len ); $p +
= $len;
1149 if ( $status==SEARCHD_WARNING
)
1151 $result["warning"] = $message;
1154 $result["error"] = $message;
1163 list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1164 while ( $nfields-->0 && $p<$max )
1166 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1167 $fields[] = substr ( $response, $p, $len ); $p +
= $len;
1169 $result["fields"] = $fields;
1171 list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1172 while ( $nattrs-->0 && $p<$max )
1174 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1175 $attr = substr ( $response, $p, $len ); $p +
= $len;
1176 list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1177 $attrs[$attr] = $type;
1179 $result["attrs"] = $attrs;
1182 list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1183 list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1187 while ( $count-->0 && $p<$max )
1189 // index into result array
1192 // parse document id and weight
1195 $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p +
= 8;
1196 list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1200 list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
1201 substr ( $response, $p, 8 ) ) );
1203 $doc = sphFixUint($doc);
1205 $weight = sprintf ( "%u", $weight );
1207 // create match entry
1208 if ( $this->_arrayresult
)
1209 $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
1211 $result["matches"][$doc]["weight"] = $weight;
1213 // parse and create attributes
1214 $attrvals = array ();
1215 foreach ( $attrs as $attr=>$type )
1217 // handle 64bit ints
1218 if ( $type==SPH_ATTR_BIGINT
)
1220 $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p +
= 8;
1225 if ( $type==SPH_ATTR_FLOAT
)
1227 list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1228 list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
1229 $attrvals[$attr] = $fval;
1233 // handle everything else as unsigned ints
1234 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1235 if ( $type & SPH_ATTR_MULTI
)
1237 $attrvals[$attr] = array ();
1239 while ( $nvalues-->0 && $p<$max )
1241 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1242 $attrvals[$attr][] = sphFixUint($val);
1246 $attrvals[$attr] = sphFixUint($val);
1250 if ( $this->_arrayresult
)
1251 $result["matches"][$idx]["attrs"] = $attrvals;
1253 $result["matches"][$doc]["attrs"] = $attrvals;
1256 list ( $total, $total_found, $msecs, $words ) =
1257 array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
1258 $result["total"] = sprintf ( "%u", $total );
1259 $result["total_found"] = sprintf ( "%u", $total_found );
1260 $result["time"] = sprintf ( "%.3f", $msecs/1000 );
1263 while ( $words-->0 && $p<$max )
1265 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1266 $word = substr ( $response, $p, $len ); $p +
= $len;
1267 list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p +
= 8;
1268 $result["words"][$word] = array (
1269 "docs"=>sprintf ( "%u", $docs ),
1270 "hits"=>sprintf ( "%u", $hits ) );
1278 /////////////////////////////////////////////////////////////////////////////
1279 // excerpts generation
1280 /////////////////////////////////////////////////////////////////////////////
1282 /// connect to searchd server, and generate exceprts (snippets)
1283 /// of given documents for given query. returns false on failure,
1284 /// an array of snippets on success
1285 function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1287 assert ( is_array($docs) );
1288 assert ( is_string($index) );
1289 assert ( is_string($words) );
1290 assert ( is_array($opts) );
1294 if (!( $fp = $this->_Connect() ))
1304 if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
1305 if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
1306 if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
1307 if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
1308 if ( !isset($opts["around"]) ) $opts["around"] = 5;
1309 if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
1310 if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
1311 if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
1312 if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
1319 $flags = 1; // remove spaces
1320 if ( $opts["exact_phrase"] ) $flags |
= 2;
1321 if ( $opts["single_passage"] ) $flags |
= 4;
1322 if ( $opts["use_boundaries"] ) $flags |
= 8;
1323 if ( $opts["weight_order"] ) $flags |
= 16;
1324 $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1325 $req .= pack ( "N", strlen($index) ) . $index; // req index
1326 $req .= pack ( "N", strlen($words) ) . $words; // req words
1329 $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1330 $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1331 $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1332 $req .= pack ( "N", (int)$opts["limit"] );
1333 $req .= pack ( "N", (int)$opts["around"] );
1336 $req .= pack ( "N", count($docs) );
1337 foreach ( $docs as $doc )
1339 assert ( is_string($doc) );
1340 $req .= pack ( "N", strlen($doc) ) . $doc;
1343 ////////////////////////////
1344 // send query, get response
1345 ////////////////////////////
1347 $len = strlen($req);
1348 $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT
, VER_COMMAND_EXCERPT
, $len ) . $req; // add header
1349 if ( !( $this->_Send ( $fp, $req, $len+
8 ) ) ||
1350 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT
) ) )
1362 $rlen = strlen($response);
1363 for ( $i=0; $i<count($docs); $i++
)
1365 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1368 if ( $pos+
$len > $rlen )
1370 $this->_error
= "incomplete reply";
1374 $res[] = $len ?
substr ( $response, $pos, $len ) : "";
1383 /////////////////////////////////////////////////////////////////////////////
1384 // keyword generation
1385 /////////////////////////////////////////////////////////////////////////////
1387 /// connect to searchd server, and generate keyword list for a given query
1388 /// returns false on failure,
1389 /// an array of words on success
1390 function BuildKeywords ( $query, $index, $hits )
1392 assert ( is_string($query) );
1393 assert ( is_string($index) );
1394 assert ( is_bool($hits) );
1398 if (!( $fp = $this->_Connect() ))
1409 $req = pack ( "N", strlen($query) ) . $query; // req query
1410 $req .= pack ( "N", strlen($index) ) . $index; // req index
1411 $req .= pack ( "N", (int)$hits );
1413 ////////////////////////////
1414 // send query, get response
1415 ////////////////////////////
1417 $len = strlen($req);
1418 $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS
, VER_COMMAND_KEYWORDS
, $len ) . $req; // add header
1419 if ( !( $this->_Send ( $fp, $req, $len+
8 ) ) ||
1420 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS
) ) )
1432 $rlen = strlen($response);
1433 list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1435 for ( $i=0; $i<$nwords; $i++
)
1437 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos +
= 4;
1438 $tokenized = $len ?
substr ( $response, $pos, $len ) : "";
1441 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos +
= 4;
1442 $normalized = $len ?
substr ( $response, $pos, $len ) : "";
1445 $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1449 list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1451 $res [$i]["docs"] = $ndocs;
1452 $res [$i]["hits"] = $nhits;
1457 $this->_error
= "incomplete reply";
1467 function EscapeString ( $string )
1469 $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
1470 $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
1472 return str_replace ( $from, $to, $string );
1475 /////////////////////////////////////////////////////////////////////////////
1476 // attribute updates
1477 /////////////////////////////////////////////////////////////////////////////
1479 /// batch update given attributes in given rows in given indexes
1480 /// returns amount of updated documents (0 or more) on success, or -1 on failure
1481 function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1483 // verify everything
1484 assert ( is_string($index) );
1485 assert ( is_bool($mva) );
1487 assert ( is_array($attrs) );
1488 foreach ( $attrs as $attr )
1489 assert ( is_string($attr) );
1491 assert ( is_array($values) );
1492 foreach ( $values as $id=>$entry )
1494 assert ( is_numeric($id) );
1495 assert ( is_array($entry) );
1496 assert ( count($entry)==count($attrs) );
1497 foreach ( $entry as $v )
1501 assert ( is_array($v) );
1502 foreach ( $v as $vv )
1503 assert ( is_int($vv) );
1505 assert ( is_int($v) );
1510 $req = pack ( "N", strlen($index) ) . $index;
1512 $req .= pack ( "N", count($attrs) );
1513 foreach ( $attrs as $attr )
1515 $req .= pack ( "N", strlen($attr) ) . $attr;
1516 $req .= pack ( "N", $mva ?
1 : 0 );
1519 $req .= pack ( "N", count($values) );
1520 foreach ( $values as $id=>$entry )
1522 $req .= sphPackU64 ( $id );
1523 foreach ( $entry as $v )
1525 $req .= pack ( "N", $mva ?
count($v) : $v );
1527 foreach ( $v as $vv )
1528 $req .= pack ( "N", $vv );
1532 // connect, send query, get response
1533 if (!( $fp = $this->_Connect() ))
1536 $len = strlen($req);
1537 $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE
, VER_COMMAND_UPDATE
, $len ) . $req; // add header
1538 if ( !$this->_Send ( $fp, $req, $len+
8 ) )
1541 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE
) ))
1545 list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1549 /////////////////////////////////////////////////////////////////////////////
1550 // persistent connections
1551 /////////////////////////////////////////////////////////////////////////////
1555 if ( $this->_socket
!== false )
1557 $this->_error
= 'already connected';
1560 if ( !$fp = $this->_Connect() )
1563 // command, command version = 0, body length = 4, body = 1
1564 $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST
, 0, 4, 1 );
1565 if ( !$this->_Send ( $fp, $req, 12 ) )
1568 $this->_socket
= $fp;
1574 if ( $this->_socket
=== false )
1576 $this->_error
= 'not connected';
1580 fclose ( $this->_socket
);
1581 $this->_socket
= false;
1586 //////////////////////////////////////////////////////////////////////////
1588 //////////////////////////////////////////////////////////////////////////
1593 if (!( $fp = $this->_Connect() ))
1599 $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS
, VER_COMMAND_STATUS
, 4, 1 ); // len=4, body=1
1600 if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
1601 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS
) ) )
1607 $res = substr ( $response, 4 ); // just ignore length, error handling, etc
1609 list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p +
= 8;
1612 for ( $i=0; $i<$rows; $i++
)
1613 for ( $j=0; $j<$cols; $j++
)
1615 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p +
= 4;
1616 $res[$i][] = substr ( $response, $p, $len ); $p +
= $len;
1625 // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $