]> git.wh0rd.org - tt-rss.git/blob - lib/sphinxapi.php.08_04_11_183927
Update lib/sphinxapi.php to V2.0.1
[tt-rss.git] / lib / sphinxapi.php.08_04_11_183927
1 <?php
2
3 //
4 // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
5 //
6
7 //
8 // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15
16 /////////////////////////////////////////////////////////////////////////////
17 // PHP version of Sphinx searchd client (PHP API)
18 /////////////////////////////////////////////////////////////////////////////
19
20 /// known searchd commands
21 define ( "SEARCHD_COMMAND_SEARCH", 0 );
22 define ( "SEARCHD_COMMAND_EXCERPT", 1 );
23 define ( "SEARCHD_COMMAND_UPDATE", 2 );
24 define ( "SEARCHD_COMMAND_KEYWORDS",3 );
25 define ( "SEARCHD_COMMAND_PERSIST", 4 );
26 define ( "SEARCHD_COMMAND_STATUS", 5 );
27 define ( "SEARCHD_COMMAND_QUERY", 6 );
28
29 /// current client-side command implementation versions
30 define ( "VER_COMMAND_SEARCH", 0x116 );
31 define ( "VER_COMMAND_EXCERPT", 0x100 );
32 define ( "VER_COMMAND_UPDATE", 0x102 );
33 define ( "VER_COMMAND_KEYWORDS", 0x100 );
34 define ( "VER_COMMAND_STATUS", 0x100 );
35 define ( "VER_COMMAND_QUERY", 0x100 );
36
37 /// known searchd status codes
38 define ( "SEARCHD_OK", 0 );
39 define ( "SEARCHD_ERROR", 1 );
40 define ( "SEARCHD_RETRY", 2 );
41 define ( "SEARCHD_WARNING", 3 );
42
43 /// known match modes
44 define ( "SPH_MATCH_ALL", 0 );
45 define ( "SPH_MATCH_ANY", 1 );
46 define ( "SPH_MATCH_PHRASE", 2 );
47 define ( "SPH_MATCH_BOOLEAN", 3 );
48 define ( "SPH_MATCH_EXTENDED", 4 );
49 define ( "SPH_MATCH_FULLSCAN", 5 );
50 define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
51
52 /// known ranking modes (ext2 only)
53 define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
54 define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
55 define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
56 define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
57 define ( "SPH_RANK_PROXIMITY", 4 );
58 define ( "SPH_RANK_MATCHANY", 5 );
59 define ( "SPH_RANK_FIELDMASK", 6 );
60
61 /// known sort modes
62 define ( "SPH_SORT_RELEVANCE", 0 );
63 define ( "SPH_SORT_ATTR_DESC", 1 );
64 define ( "SPH_SORT_ATTR_ASC", 2 );
65 define ( "SPH_SORT_TIME_SEGMENTS", 3 );
66 define ( "SPH_SORT_EXTENDED", 4 );
67 define ( "SPH_SORT_EXPR", 5 );
68
69 /// known filter types
70 define ( "SPH_FILTER_VALUES", 0 );
71 define ( "SPH_FILTER_RANGE", 1 );
72 define ( "SPH_FILTER_FLOATRANGE", 2 );
73
74 /// known attribute types
75 define ( "SPH_ATTR_INTEGER", 1 );
76 define ( "SPH_ATTR_TIMESTAMP", 2 );
77 define ( "SPH_ATTR_ORDINAL", 3 );
78 define ( "SPH_ATTR_BOOL", 4 );
79 define ( "SPH_ATTR_FLOAT", 5 );
80 define ( "SPH_ATTR_BIGINT", 6 );
81 define ( "SPH_ATTR_MULTI", 0x40000000 );
82
83 /// known grouping functions
84 define ( "SPH_GROUPBY_DAY", 0 );
85 define ( "SPH_GROUPBY_WEEK", 1 );
86 define ( "SPH_GROUPBY_MONTH", 2 );
87 define ( "SPH_GROUPBY_YEAR", 3 );
88 define ( "SPH_GROUPBY_ATTR", 4 );
89 define ( "SPH_GROUPBY_ATTRPAIR", 5 );
90
91 // important properties of PHP's integers:
92 // - always signed (one bit short of PHP_INT_SIZE)
93 // - conversion from string to int is saturated
94 // - float is double
95 // - div converts arguments to floats
96 // - mod converts arguments to ints
97
98 // the packing code below works as follows:
99 // - when we got an int, just pack it
100 // if performance is a problem, this is the branch users should aim for
101 //
102 // - otherwise, we got a number in string form
103 // this might be due to different reasons, but we assume that this is
104 // because it didn't fit into PHP int
105 //
106 // - factor the string into high and low ints for packing
107 // - if we have bcmath, then it is used
108 // - if we don't, we have to do it manually (this is the fun part)
109 //
110 // - x64 branch does factoring using ints
111 // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
112 //
113 // unpacking routines are pretty much the same.
114 // - return ints if we can
115 // - otherwise format number into a string
116
117 /// pack 64-bit signed
118 function sphPackI64 ( $v )
119 {
120 assert ( is_numeric($v) );
121
122 // x64
123 if ( PHP_INT_SIZE>=8 )
124 {
125 $v = (int)$v;
126 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
127 }
128
129 // x32, int
130 if ( is_int($v) )
131 return pack ( "NN", $v < 0 ? -1 : 0, $v );
132
133 // x32, bcmath
134 if ( function_exists("bcmul") )
135 {
136 if ( bccomp ( $v, 0 ) == -1 )
137 $v = bcadd ( "18446744073709551616", $v );
138 $h = bcdiv ( $v, "4294967296", 0 );
139 $l = bcmod ( $v, "4294967296" );
140 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
141 }
142
143 // x32, no-bcmath
144 $p = max(0, strlen($v) - 13);
145 $lo = abs((float)substr($v, $p));
146 $hi = abs((float)substr($v, 0, $p));
147
148 $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
149 $q = floor($m/4294967296.0);
150 $l = $m - ($q*4294967296.0);
151 $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
152
153 if ( $v<0 )
154 {
155 if ( $l==0 )
156 $h = 4294967296.0 - $h;
157 else
158 {
159 $h = 4294967295.0 - $h;
160 $l = 4294967296.0 - $l;
161 }
162 }
163 return pack ( "NN", $h, $l );
164 }
165
166 /// pack 64-bit unsigned
167 function sphPackU64 ( $v )
168 {
169 assert ( is_numeric($v) );
170
171 // x64
172 if ( PHP_INT_SIZE>=8 )
173 {
174 assert ( $v>=0 );
175
176 // x64, int
177 if ( is_int($v) )
178 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
179
180 // x64, bcmath
181 if ( function_exists("bcmul") )
182 {
183 $h = bcdiv ( $v, 4294967296, 0 );
184 $l = bcmod ( $v, 4294967296 );
185 return pack ( "NN", $h, $l );
186 }
187
188 // x64, no-bcmath
189 $p = max ( 0, strlen($v) - 13 );
190 $lo = (int)substr ( $v, $p );
191 $hi = (int)substr ( $v, 0, $p );
192
193 $m = $lo + $hi*1316134912;
194 $l = $m % 4294967296;
195 $h = $hi*2328 + (int)($m/4294967296);
196
197 return pack ( "NN", $h, $l );
198 }
199
200 // x32, int
201 if ( is_int($v) )
202 return pack ( "NN", 0, $v );
203
204 // x32, bcmath
205 if ( function_exists("bcmul") )
206 {
207 $h = bcdiv ( $v, "4294967296", 0 );
208 $l = bcmod ( $v, "4294967296" );
209 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
210 }
211
212 // x32, no-bcmath
213 $p = max(0, strlen($v) - 13);
214 $lo = (float)substr($v, $p);
215 $hi = (float)substr($v, 0, $p);
216
217 $m = $lo + $hi*1316134912.0;
218 $q = floor($m / 4294967296.0);
219 $l = $m - ($q * 4294967296.0);
220 $h = $hi*2328.0 + $q;
221
222 return pack ( "NN", $h, $l );
223 }
224
225 // unpack 64-bit unsigned
226 function sphUnpackU64 ( $v )
227 {
228 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
229
230 if ( PHP_INT_SIZE>=8 )
231 {
232 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
233 if ( $lo<0 ) $lo += (1<<32);
234
235 // x64, int
236 if ( $hi<=2147483647 )
237 return ($hi<<32) + $lo;
238
239 // x64, bcmath
240 if ( function_exists("bcmul") )
241 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
242
243 // x64, no-bcmath
244 $C = 100000;
245 $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
246 $l = (($hi % $C) << 32) + ($lo % $C);
247 if ( $l>$C )
248 {
249 $h += (int)($l / $C);
250 $l = $l % $C;
251 }
252
253 if ( $h==0 )
254 return $l;
255 return sprintf ( "%d%05d", $h, $l );
256 }
257
258 // x32, int
259 if ( $hi==0 )
260 {
261 if ( $lo>0 )
262 return $lo;
263 return sprintf ( "%u", $lo );
264 }
265
266 $hi = sprintf ( "%u", $hi );
267 $lo = sprintf ( "%u", $lo );
268
269 // x32, bcmath
270 if ( function_exists("bcmul") )
271 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
272
273 // x32, no-bcmath
274 $hi = (float)$hi;
275 $lo = (float)$lo;
276
277 $q = floor($hi/10000000.0);
278 $r = $hi - $q*10000000.0;
279 $m = $lo + $r*4967296.0;
280 $mq = floor($m/10000000.0);
281 $l = $m - $mq*10000000.0;
282 $h = $q*4294967296.0 + $r*429.0 + $mq;
283
284 $h = sprintf ( "%.0f", $h );
285 $l = sprintf ( "%07.0f", $l );
286 if ( $h=="0" )
287 return sprintf( "%.0f", (float)$l );
288 return $h . $l;
289 }
290
291 // unpack 64-bit signed
292 function sphUnpackI64 ( $v )
293 {
294 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
295
296 // x64
297 if ( PHP_INT_SIZE>=8 )
298 {
299 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
300 if ( $lo<0 ) $lo += (1<<32);
301
302 return ($hi<<32) + $lo;
303 }
304
305 // x32, int
306 if ( $hi==0 )
307 {
308 if ( $lo>0 )
309 return $lo;
310 return sprintf ( "%u", $lo );
311 }
312 // x32, int
313 elseif ( $hi==-1 )
314 {
315 if ( $lo<0 )
316 return $lo;
317 return sprintf ( "%.0f", $lo - 4294967296.0 );
318 }
319
320 $neg = "";
321 $c = 0;
322 if ( $hi<0 )
323 {
324 $hi = ~$hi;
325 $lo = ~$lo;
326 $c = 1;
327 $neg = "-";
328 }
329
330 $hi = sprintf ( "%u", $hi );
331 $lo = sprintf ( "%u", $lo );
332
333 // x32, bcmath
334 if ( function_exists("bcmul") )
335 return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
336
337 // x32, no-bcmath
338 $hi = (float)$hi;
339 $lo = (float)$lo;
340
341 $q = floor($hi/10000000.0);
342 $r = $hi - $q*10000000.0;
343 $m = $lo + $r*4967296.0;
344 $mq = floor($m/10000000.0);
345 $l = $m - $mq*10000000.0 + $c;
346 $h = $q*4294967296.0 + $r*429.0 + $mq;
347 if ( $l==10000000 )
348 {
349 $l = 0;
350 $h += 1;
351 }
352
353 $h = sprintf ( "%.0f", $h );
354 $l = sprintf ( "%07.0f", $l );
355 if ( $h=="0" )
356 return $neg . sprintf( "%.0f", (float)$l );
357 return $neg . $h . $l;
358 }
359
360
361 function sphFixUint ( $value )
362 {
363 if ( PHP_INT_SIZE>=8 )
364 {
365 // x64 route, workaround broken unpack() in 5.2.2+
366 if ( $value<0 ) $value += (1<<32);
367 return $value;
368 }
369 else
370 {
371 // x32 route, workaround php signed/unsigned braindamage
372 return sprintf ( "%u", $value );
373 }
374 }
375
376
377 /// sphinx searchd client class
378 class SphinxClient
379 {
380 var $_host; ///< searchd host (default is "localhost")
381 var $_port; ///< searchd port (default is 9312)
382 var $_offset; ///< how many records to seek from result-set start (default is 0)
383 var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
384 var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
385 var $_weights; ///< per-field weights (default is 1 for all fields)
386 var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
387 var $_sortby; ///< attribute to sort by (defualt is "")
388 var $_min_id; ///< min ID to match (default is 0, which means no limit)
389 var $_max_id; ///< max ID to match (default is 0, which means no limit)
390 var $_filters; ///< search filters
391 var $_groupby; ///< group-by attribute name
392 var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
393 var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
394 var $_groupdistinct;///< group-by count-distinct attribute
395 var $_maxmatches; ///< max matches to retrieve
396 var $_cutoff; ///< cutoff to stop searching at (default is 0)
397 var $_retrycount; ///< distributed retries count
398 var $_retrydelay; ///< distributed retries delay
399 var $_anchor; ///< geographical anchor point
400 var $_indexweights; ///< per-index weights
401 var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
402 var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
403 var $_fieldweights; ///< per-field-name weights
404 var $_overrides; ///< per-query attribute values overrides
405 var $_select; ///< select-list (attributes or expressions, with optional aliases)
406
407 var $_error; ///< last error message
408 var $_warning; ///< last warning message
409 var $_connerror; ///< connection error vs remote error flag
410
411 var $_reqs; ///< requests array for multi-query
412 var $_mbenc; ///< stored mbstring encoding
413 var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
414 var $_timeout; ///< connect timeout
415
416 /////////////////////////////////////////////////////////////////////////////
417 // common stuff
418 /////////////////////////////////////////////////////////////////////////////
419
420 /// create a new client object and fill defaults
421 function SphinxClient ()
422 {
423 // per-client-object settings
424 $this->_host = "localhost";
425 $this->_port = 9312;
426 $this->_path = false;
427 $this->_socket = false;
428
429 // per-query settings
430 $this->_offset = 0;
431 $this->_limit = 20;
432 $this->_mode = SPH_MATCH_ALL;
433 $this->_weights = array ();
434 $this->_sort = SPH_SORT_RELEVANCE;
435 $this->_sortby = "";
436 $this->_min_id = 0;
437 $this->_max_id = 0;
438 $this->_filters = array ();
439 $this->_groupby = "";
440 $this->_groupfunc = SPH_GROUPBY_DAY;
441 $this->_groupsort = "@group desc";
442 $this->_groupdistinct= "";
443 $this->_maxmatches = 1000;
444 $this->_cutoff = 0;
445 $this->_retrycount = 0;
446 $this->_retrydelay = 0;
447 $this->_anchor = array ();
448 $this->_indexweights= array ();
449 $this->_ranker = SPH_RANK_PROXIMITY_BM25;
450 $this->_maxquerytime= 0;
451 $this->_fieldweights= array();
452 $this->_overrides = array();
453 $this->_select = "*";
454
455 $this->_error = ""; // per-reply fields (for single-query case)
456 $this->_warning = "";
457 $this->_connerror = false;
458
459 $this->_reqs = array (); // requests storage (for multi-query case)
460 $this->_mbenc = "";
461 $this->_arrayresult = false;
462 $this->_timeout = 0;
463 }
464
465 function __destruct()
466 {
467 if ( $this->_socket !== false )
468 fclose ( $this->_socket );
469 }
470
471 /// get last error message (string)
472 function GetLastError ()
473 {
474 return $this->_error;
475 }
476
477 /// get last warning message (string)
478 function GetLastWarning ()
479 {
480 return $this->_warning;
481 }
482
483 /// get last error flag (to tell network connection errors from searchd errors or broken responses)
484 function IsConnectError()
485 {
486 return $this->_connerror;
487 }
488
489 /// set searchd host name (string) and port (integer)
490 function SetServer ( $host, $port = 0 )
491 {
492 assert ( is_string($host) );
493 if ( $host[0] == '/')
494 {
495 $this->_path = 'unix://' . $host;
496 return;
497 }
498 if ( substr ( $host, 0, 7 )=="unix://" )
499 {
500 $this->_path = $host;
501 return;
502 }
503
504 assert ( is_int($port) );
505 $this->_host = $host;
506 $this->_port = $port;
507 $this->_path = '';
508
509 }
510
511 /// set server connection timeout (0 to remove)
512 function SetConnectTimeout ( $timeout )
513 {
514 assert ( is_numeric($timeout) );
515 $this->_timeout = $timeout;
516 }
517
518
519 function _Send ( $handle, $data, $length )
520 {
521 if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
522 {
523 $this->_error = 'connection unexpectedly closed (timed out?)';
524 $this->_connerror = true;
525 return false;
526 }
527 return true;
528 }
529
530 /////////////////////////////////////////////////////////////////////////////
531
532 /// enter mbstring workaround mode
533 function _MBPush ()
534 {
535 $this->_mbenc = "";
536 if ( ini_get ( "mbstring.func_overload" ) & 2 )
537 {
538 $this->_mbenc = mb_internal_encoding();
539 mb_internal_encoding ( "latin1" );
540 }
541 }
542
543 /// leave mbstring workaround mode
544 function _MBPop ()
545 {
546 if ( $this->_mbenc )
547 mb_internal_encoding ( $this->_mbenc );
548 }
549
550 /// connect to searchd server
551 function _Connect ()
552 {
553 if ( $this->_socket!==false )
554 {
555 // we are in persistent connection mode, so we have a socket
556 // however, need to check whether it's still alive
557 if ( !@feof ( $this->_socket ) )
558 return $this->_socket;
559
560 // force reopen
561 $this->_socket = false;
562 }
563
564 $errno = 0;
565 $errstr = "";
566 $this->_connerror = false;
567
568 if ( $this->_path )
569 {
570 $host = $this->_path;
571 $port = 0;
572 }
573 else
574 {
575 $host = $this->_host;
576 $port = $this->_port;
577 }
578
579 if ( $this->_timeout<=0 )
580 $fp = @fsockopen ( $host, $port, $errno, $errstr );
581 else
582 $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
583
584 if ( !$fp )
585 {
586 if ( $this->_path )
587 $location = $this->_path;
588 else
589 $location = "{$this->_host}:{$this->_port}";
590
591 $errstr = trim ( $errstr );
592 $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
593 $this->_connerror = true;
594 return false;
595 }
596
597 // send my version
598 // this is a subtle part. we must do it before (!) reading back from searchd.
599 // because otherwise under some conditions (reported on FreeBSD for instance)
600 // TCP stack could throttle write-write-read pattern because of Nagle.
601 if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
602 {
603 fclose ( $fp );
604 $this->_error = "failed to send client protocol version";
605 return false;
606 }
607
608 // check version
609 list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
610 $v = (int)$v;
611 if ( $v<1 )
612 {
613 fclose ( $fp );
614 $this->_error = "expected searchd protocol version 1+, got version '$v'";
615 return false;
616 }
617
618 return $fp;
619 }
620
621 /// get and check response packet from searchd server
622 function _GetResponse ( $fp, $client_ver )
623 {
624 $response = "";
625 $len = 0;
626
627 $header = fread ( $fp, 8 );
628 if ( strlen($header)==8 )
629 {
630 list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
631 $left = $len;
632 while ( $left>0 && !feof($fp) )
633 {
634 $chunk = fread ( $fp, $left );
635 if ( $chunk )
636 {
637 $response .= $chunk;
638 $left -= strlen($chunk);
639 }
640 }
641 }
642 if ( $this->_socket === false )
643 fclose ( $fp );
644
645 // check response
646 $read = strlen ( $response );
647 if ( !$response || $read!=$len )
648 {
649 $this->_error = $len
650 ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
651 : "received zero-sized searchd response";
652 return false;
653 }
654
655 // check status
656 if ( $status==SEARCHD_WARNING )
657 {
658 list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
659 $this->_warning = substr ( $response, 4, $wlen );
660 return substr ( $response, 4+$wlen );
661 }
662 if ( $status==SEARCHD_ERROR )
663 {
664 $this->_error = "searchd error: " . substr ( $response, 4 );
665 return false;
666 }
667 if ( $status==SEARCHD_RETRY )
668 {
669 $this->_error = "temporary searchd error: " . substr ( $response, 4 );
670 return false;
671 }
672 if ( $status!=SEARCHD_OK )
673 {
674 $this->_error = "unknown status code '$status'";
675 return false;
676 }
677
678 // check version
679 if ( $ver<$client_ver )
680 {
681 $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
682 $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
683 }
684
685 return $response;
686 }
687
688 /////////////////////////////////////////////////////////////////////////////
689 // searching
690 /////////////////////////////////////////////////////////////////////////////
691
692 /// set offset and count into result set,
693 /// and optionally set max-matches and cutoff limits
694 function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
695 {
696 assert ( is_int($offset) );
697 assert ( is_int($limit) );
698 assert ( $offset>=0 );
699 assert ( $limit>0 );
700 assert ( $max>=0 );
701 $this->_offset = $offset;
702 $this->_limit = $limit;
703 if ( $max>0 )
704 $this->_maxmatches = $max;
705 if ( $cutoff>0 )
706 $this->_cutoff = $cutoff;
707 }
708
709 /// set maximum query time, in milliseconds, per-index
710 /// integer, 0 means "do not limit"
711 function SetMaxQueryTime ( $max )
712 {
713 assert ( is_int($max) );
714 assert ( $max>=0 );
715 $this->_maxquerytime = $max;
716 }
717
718 /// set matching mode
719 function SetMatchMode ( $mode )
720 {
721 assert ( $mode==SPH_MATCH_ALL
722 || $mode==SPH_MATCH_ANY
723 || $mode==SPH_MATCH_PHRASE
724 || $mode==SPH_MATCH_BOOLEAN
725 || $mode==SPH_MATCH_EXTENDED
726 || $mode==SPH_MATCH_FULLSCAN
727 || $mode==SPH_MATCH_EXTENDED2 );
728 $this->_mode = $mode;
729 }
730
731 /// set ranking mode
732 function SetRankingMode ( $ranker )
733 {
734 assert ( $ranker==SPH_RANK_PROXIMITY_BM25
735 || $ranker==SPH_RANK_BM25
736 || $ranker==SPH_RANK_NONE
737 || $ranker==SPH_RANK_WORDCOUNT
738 || $ranker==SPH_RANK_PROXIMITY );
739 $this->_ranker = $ranker;
740 }
741
742 /// set matches sorting mode
743 function SetSortMode ( $mode, $sortby="" )
744 {
745 assert (
746 $mode==SPH_SORT_RELEVANCE ||
747 $mode==SPH_SORT_ATTR_DESC ||
748 $mode==SPH_SORT_ATTR_ASC ||
749 $mode==SPH_SORT_TIME_SEGMENTS ||
750 $mode==SPH_SORT_EXTENDED ||
751 $mode==SPH_SORT_EXPR );
752 assert ( is_string($sortby) );
753 assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
754
755 $this->_sort = $mode;
756 $this->_sortby = $sortby;
757 }
758
759 /// bind per-field weights by order
760 /// DEPRECATED; use SetFieldWeights() instead
761 function SetWeights ( $weights )
762 {
763 assert ( is_array($weights) );
764 foreach ( $weights as $weight )
765 assert ( is_int($weight) );
766
767 $this->_weights = $weights;
768 }
769
770 /// bind per-field weights by name
771 function SetFieldWeights ( $weights )
772 {
773 assert ( is_array($weights) );
774 foreach ( $weights as $name=>$weight )
775 {
776 assert ( is_string($name) );
777 assert ( is_int($weight) );
778 }
779 $this->_fieldweights = $weights;
780 }
781
782 /// bind per-index weights by name
783 function SetIndexWeights ( $weights )
784 {
785 assert ( is_array($weights) );
786 foreach ( $weights as $index=>$weight )
787 {
788 assert ( is_string($index) );
789 assert ( is_int($weight) );
790 }
791 $this->_indexweights = $weights;
792 }
793
794 /// set IDs range to match
795 /// only match records if document ID is beetwen $min and $max (inclusive)
796 function SetIDRange ( $min, $max )
797 {
798 assert ( is_numeric($min) );
799 assert ( is_numeric($max) );
800 assert ( $min<=$max );
801 $this->_min_id = $min;
802 $this->_max_id = $max;
803 }
804
805 /// set values set filter
806 /// only match records where $attribute value is in given set
807 function SetFilter ( $attribute, $values, $exclude=false )
808 {
809 assert ( is_string($attribute) );
810 assert ( is_array($values) );
811 assert ( count($values) );
812
813 if ( is_array($values) && count($values) )
814 {
815 foreach ( $values as $value )
816 assert ( is_numeric($value) );
817
818 $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
819 }
820 }
821
822 /// set range filter
823 /// only match records if $attribute value is beetwen $min and $max (inclusive)
824 function SetFilterRange ( $attribute, $min, $max, $exclude=false )
825 {
826 assert ( is_string($attribute) );
827 assert ( is_numeric($min) );
828 assert ( is_numeric($max) );
829 assert ( $min<=$max );
830
831 $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
832 }
833
834 /// set float range filter
835 /// only match records if $attribute value is beetwen $min and $max (inclusive)
836 function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
837 {
838 assert ( is_string($attribute) );
839 assert ( is_float($min) );
840 assert ( is_float($max) );
841 assert ( $min<=$max );
842
843 $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
844 }
845
846 /// setup anchor point for geosphere distance calculations
847 /// required to use @geodist in filters and sorting
848 /// latitude and longitude must be in radians
849 function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
850 {
851 assert ( is_string($attrlat) );
852 assert ( is_string($attrlong) );
853 assert ( is_float($lat) );
854 assert ( is_float($long) );
855
856 $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
857 }
858
859 /// set grouping attribute and function
860 function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
861 {
862 assert ( is_string($attribute) );
863 assert ( is_string($groupsort) );
864 assert ( $func==SPH_GROUPBY_DAY
865 || $func==SPH_GROUPBY_WEEK
866 || $func==SPH_GROUPBY_MONTH
867 || $func==SPH_GROUPBY_YEAR
868 || $func==SPH_GROUPBY_ATTR
869 || $func==SPH_GROUPBY_ATTRPAIR );
870
871 $this->_groupby = $attribute;
872 $this->_groupfunc = $func;
873 $this->_groupsort = $groupsort;
874 }
875
876 /// set count-distinct attribute for group-by queries
877 function SetGroupDistinct ( $attribute )
878 {
879 assert ( is_string($attribute) );
880 $this->_groupdistinct = $attribute;
881 }
882
883 /// set distributed retries count and delay
884 function SetRetries ( $count, $delay=0 )
885 {
886 assert ( is_int($count) && $count>=0 );
887 assert ( is_int($delay) && $delay>=0 );
888 $this->_retrycount = $count;
889 $this->_retrydelay = $delay;
890 }
891
892 /// set result set format (hash or array; hash by default)
893 /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
894 function SetArrayResult ( $arrayresult )
895 {
896 assert ( is_bool($arrayresult) );
897 $this->_arrayresult = $arrayresult;
898 }
899
900 /// set attribute values override
901 /// there can be only one override per attribute
902 /// $values must be a hash that maps document IDs to attribute values
903 function SetOverride ( $attrname, $attrtype, $values )
904 {
905 assert ( is_string ( $attrname ) );
906 assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
907 assert ( is_array ( $values ) );
908
909 $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
910 }
911
912 /// set select-list (attributes or expressions), SQL-like syntax
913 function SetSelect ( $select )
914 {
915 assert ( is_string ( $select ) );
916 $this->_select = $select;
917 }
918
919 //////////////////////////////////////////////////////////////////////////////
920
921 /// clear all filters (for multi-queries)
922 function ResetFilters ()
923 {
924 $this->_filters = array();
925 $this->_anchor = array();
926 }
927
928 /// clear groupby settings (for multi-queries)
929 function ResetGroupBy ()
930 {
931 $this->_groupby = "";
932 $this->_groupfunc = SPH_GROUPBY_DAY;
933 $this->_groupsort = "@group desc";
934 $this->_groupdistinct= "";
935 }
936
937 /// clear all attribute value overrides (for multi-queries)
938 function ResetOverrides ()
939 {
940 $this->_overrides = array ();
941 }
942
943 //////////////////////////////////////////////////////////////////////////////
944
945 /// connect to searchd server, run given search query through given indexes,
946 /// and return the search results
947 function Query ( $query, $index="*", $comment="" )
948 {
949 assert ( empty($this->_reqs) );
950
951 $this->AddQuery ( $query, $index, $comment );
952 $results = $this->RunQueries ();
953 $this->_reqs = array (); // just in case it failed too early
954
955 if ( !is_array($results) )
956 return false; // probably network error; error message should be already filled
957
958 $this->_error = $results[0]["error"];
959 $this->_warning = $results[0]["warning"];
960 if ( $results[0]["status"]==SEARCHD_ERROR )
961 return false;
962 else
963 return $results[0];
964 }
965
966 /// helper to pack floats in network byte order
967 function _PackFloat ( $f )
968 {
969 $t1 = pack ( "f", $f ); // machine order
970 list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
971 return pack ( "N", $t2 );
972 }
973
974 /// add query to multi-query batch
975 /// returns index into results array from RunQueries() call
976 function AddQuery ( $query, $index="*", $comment="" )
977 {
978 // mbstring workaround
979 $this->_MBPush ();
980
981 // build request
982 $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
983 $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
984 $req .= pack ( "N", strlen($query) ) . $query; // query itself
985 $req .= pack ( "N", count($this->_weights) ); // weights
986 foreach ( $this->_weights as $weight )
987 $req .= pack ( "N", (int)$weight );
988 $req .= pack ( "N", strlen($index) ) . $index; // indexes
989 $req .= pack ( "N", 1 ); // id64 range marker
990 $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
991
992 // filters
993 $req .= pack ( "N", count($this->_filters) );
994 foreach ( $this->_filters as $filter )
995 {
996 $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
997 $req .= pack ( "N", $filter["type"] );
998 switch ( $filter["type"] )
999 {
1000 case SPH_FILTER_VALUES:
1001 $req .= pack ( "N", count($filter["values"]) );
1002 foreach ( $filter["values"] as $value )
1003 $req .= sphPackI64 ( $value );
1004 break;
1005
1006 case SPH_FILTER_RANGE:
1007 $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
1008 break;
1009
1010 case SPH_FILTER_FLOATRANGE:
1011 $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
1012 break;
1013
1014 default:
1015 assert ( 0 && "internal error: unhandled filter type" );
1016 }
1017 $req .= pack ( "N", $filter["exclude"] );
1018 }
1019
1020 // group-by clause, max-matches count, group-sort clause, cutoff count
1021 $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
1022 $req .= pack ( "N", $this->_maxmatches );
1023 $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
1024 $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
1025 $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
1026
1027 // anchor point
1028 if ( empty($this->_anchor) )
1029 {
1030 $req .= pack ( "N", 0 );
1031 } else
1032 {
1033 $a =& $this->_anchor;
1034 $req .= pack ( "N", 1 );
1035 $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
1036 $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
1037 $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
1038 }
1039
1040 // per-index weights
1041 $req .= pack ( "N", count($this->_indexweights) );
1042 foreach ( $this->_indexweights as $idx=>$weight )
1043 $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
1044
1045 // max query time
1046 $req .= pack ( "N", $this->_maxquerytime );
1047
1048 // per-field weights
1049 $req .= pack ( "N", count($this->_fieldweights) );
1050 foreach ( $this->_fieldweights as $field=>$weight )
1051 $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
1052
1053 // comment
1054 $req .= pack ( "N", strlen($comment) ) . $comment;
1055
1056 // attribute overrides
1057 $req .= pack ( "N", count($this->_overrides) );
1058 foreach ( $this->_overrides as $key => $entry )
1059 {
1060 $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
1061 $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
1062 foreach ( $entry["values"] as $id=>$val )
1063 {
1064 assert ( is_numeric($id) );
1065 assert ( is_numeric($val) );
1066
1067 $req .= sphPackU64 ( $id );
1068 switch ( $entry["type"] )
1069 {
1070 case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
1071 case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
1072 default: $req .= pack ( "N", $val ); break;
1073 }
1074 }
1075 }
1076
1077 // select-list
1078 $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
1079
1080 // mbstring workaround
1081 $this->_MBPop ();
1082
1083 // store request to requests array
1084 $this->_reqs[] = $req;
1085 return count($this->_reqs)-1;
1086 }
1087
1088 /// connect to searchd, run queries batch, and return an array of result sets
1089 function RunQueries ()
1090 {
1091 if ( empty($this->_reqs) )
1092 {
1093 $this->_error = "no queries defined, issue AddQuery() first";
1094 return false;
1095 }
1096
1097 // mbstring workaround
1098 $this->_MBPush ();
1099
1100 if (!( $fp = $this->_Connect() ))
1101 {
1102 $this->_MBPop ();
1103 return false;
1104 }
1105
1106 // send query, get response
1107 $nreqs = count($this->_reqs);
1108 $req = join ( "", $this->_reqs );
1109 $len = 4+strlen($req);
1110 $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
1111
1112 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1113 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
1114 {
1115 $this->_MBPop ();
1116 return false;
1117 }
1118
1119 // query sent ok; we can reset reqs now
1120 $this->_reqs = array ();
1121
1122 // parse and return response
1123 return $this->_ParseSearchResponse ( $response, $nreqs );
1124 }
1125
1126 /// parse and return search query (or queries) response
1127 function _ParseSearchResponse ( $response, $nreqs )
1128 {
1129 $p = 0; // current position
1130 $max = strlen($response); // max position for checks, to protect against broken responses
1131
1132 $results = array ();
1133 for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
1134 {
1135 $results[] = array();
1136 $result =& $results[$ires];
1137
1138 $result["error"] = "";
1139 $result["warning"] = "";
1140
1141 // extract status
1142 list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1143 $result["status"] = $status;
1144 if ( $status!=SEARCHD_OK )
1145 {
1146 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1147 $message = substr ( $response, $p, $len ); $p += $len;
1148
1149 if ( $status==SEARCHD_WARNING )
1150 {
1151 $result["warning"] = $message;
1152 } else
1153 {
1154 $result["error"] = $message;
1155 continue;
1156 }
1157 }
1158
1159 // read schema
1160 $fields = array ();
1161 $attrs = array ();
1162
1163 list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1164 while ( $nfields-->0 && $p<$max )
1165 {
1166 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1167 $fields[] = substr ( $response, $p, $len ); $p += $len;
1168 }
1169 $result["fields"] = $fields;
1170
1171 list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1172 while ( $nattrs-->0 && $p<$max )
1173 {
1174 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1175 $attr = substr ( $response, $p, $len ); $p += $len;
1176 list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1177 $attrs[$attr] = $type;
1178 }
1179 $result["attrs"] = $attrs;
1180
1181 // read match count
1182 list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1183 list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1184
1185 // read matches
1186 $idx = -1;
1187 while ( $count-->0 && $p<$max )
1188 {
1189 // index into result array
1190 $idx++;
1191
1192 // parse document id and weight
1193 if ( $id64 )
1194 {
1195 $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
1196 list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1197 }
1198 else
1199 {
1200 list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
1201 substr ( $response, $p, 8 ) ) );
1202 $p += 8;
1203 $doc = sphFixUint($doc);
1204 }
1205 $weight = sprintf ( "%u", $weight );
1206
1207 // create match entry
1208 if ( $this->_arrayresult )
1209 $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
1210 else
1211 $result["matches"][$doc]["weight"] = $weight;
1212
1213 // parse and create attributes
1214 $attrvals = array ();
1215 foreach ( $attrs as $attr=>$type )
1216 {
1217 // handle 64bit ints
1218 if ( $type==SPH_ATTR_BIGINT )
1219 {
1220 $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
1221 continue;
1222 }
1223
1224 // handle floats
1225 if ( $type==SPH_ATTR_FLOAT )
1226 {
1227 list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1228 list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
1229 $attrvals[$attr] = $fval;
1230 continue;
1231 }
1232
1233 // handle everything else as unsigned ints
1234 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1235 if ( $type & SPH_ATTR_MULTI )
1236 {
1237 $attrvals[$attr] = array ();
1238 $nvalues = $val;
1239 while ( $nvalues-->0 && $p<$max )
1240 {
1241 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1242 $attrvals[$attr][] = sphFixUint($val);
1243 }
1244 } else
1245 {
1246 $attrvals[$attr] = sphFixUint($val);
1247 }
1248 }
1249
1250 if ( $this->_arrayresult )
1251 $result["matches"][$idx]["attrs"] = $attrvals;
1252 else
1253 $result["matches"][$doc]["attrs"] = $attrvals;
1254 }
1255
1256 list ( $total, $total_found, $msecs, $words ) =
1257 array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
1258 $result["total"] = sprintf ( "%u", $total );
1259 $result["total_found"] = sprintf ( "%u", $total_found );
1260 $result["time"] = sprintf ( "%.3f", $msecs/1000 );
1261 $p += 16;
1262
1263 while ( $words-->0 && $p<$max )
1264 {
1265 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1266 $word = substr ( $response, $p, $len ); $p += $len;
1267 list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1268 $result["words"][$word] = array (
1269 "docs"=>sprintf ( "%u", $docs ),
1270 "hits"=>sprintf ( "%u", $hits ) );
1271 }
1272 }
1273
1274 $this->_MBPop ();
1275 return $results;
1276 }
1277
1278 /////////////////////////////////////////////////////////////////////////////
1279 // excerpts generation
1280 /////////////////////////////////////////////////////////////////////////////
1281
1282 /// connect to searchd server, and generate exceprts (snippets)
1283 /// of given documents for given query. returns false on failure,
1284 /// an array of snippets on success
1285 function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1286 {
1287 assert ( is_array($docs) );
1288 assert ( is_string($index) );
1289 assert ( is_string($words) );
1290 assert ( is_array($opts) );
1291
1292 $this->_MBPush ();
1293
1294 if (!( $fp = $this->_Connect() ))
1295 {
1296 $this->_MBPop();
1297 return false;
1298 }
1299
1300 /////////////////
1301 // fixup options
1302 /////////////////
1303
1304 if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
1305 if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
1306 if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
1307 if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
1308 if ( !isset($opts["around"]) ) $opts["around"] = 5;
1309 if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
1310 if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
1311 if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
1312 if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
1313
1314 /////////////////
1315 // build request
1316 /////////////////
1317
1318 // v.1.0 req
1319 $flags = 1; // remove spaces
1320 if ( $opts["exact_phrase"] ) $flags |= 2;
1321 if ( $opts["single_passage"] ) $flags |= 4;
1322 if ( $opts["use_boundaries"] ) $flags |= 8;
1323 if ( $opts["weight_order"] ) $flags |= 16;
1324 $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1325 $req .= pack ( "N", strlen($index) ) . $index; // req index
1326 $req .= pack ( "N", strlen($words) ) . $words; // req words
1327
1328 // options
1329 $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1330 $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1331 $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1332 $req .= pack ( "N", (int)$opts["limit"] );
1333 $req .= pack ( "N", (int)$opts["around"] );
1334
1335 // documents
1336 $req .= pack ( "N", count($docs) );
1337 foreach ( $docs as $doc )
1338 {
1339 assert ( is_string($doc) );
1340 $req .= pack ( "N", strlen($doc) ) . $doc;
1341 }
1342
1343 ////////////////////////////
1344 // send query, get response
1345 ////////////////////////////
1346
1347 $len = strlen($req);
1348 $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
1349 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1350 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
1351 {
1352 $this->_MBPop ();
1353 return false;
1354 }
1355
1356 //////////////////
1357 // parse response
1358 //////////////////
1359
1360 $pos = 0;
1361 $res = array ();
1362 $rlen = strlen($response);
1363 for ( $i=0; $i<count($docs); $i++ )
1364 {
1365 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1366 $pos += 4;
1367
1368 if ( $pos+$len > $rlen )
1369 {
1370 $this->_error = "incomplete reply";
1371 $this->_MBPop ();
1372 return false;
1373 }
1374 $res[] = $len ? substr ( $response, $pos, $len ) : "";
1375 $pos += $len;
1376 }
1377
1378 $this->_MBPop ();
1379 return $res;
1380 }
1381
1382
1383 /////////////////////////////////////////////////////////////////////////////
1384 // keyword generation
1385 /////////////////////////////////////////////////////////////////////////////
1386
1387 /// connect to searchd server, and generate keyword list for a given query
1388 /// returns false on failure,
1389 /// an array of words on success
1390 function BuildKeywords ( $query, $index, $hits )
1391 {
1392 assert ( is_string($query) );
1393 assert ( is_string($index) );
1394 assert ( is_bool($hits) );
1395
1396 $this->_MBPush ();
1397
1398 if (!( $fp = $this->_Connect() ))
1399 {
1400 $this->_MBPop();
1401 return false;
1402 }
1403
1404 /////////////////
1405 // build request
1406 /////////////////
1407
1408 // v.1.0 req
1409 $req = pack ( "N", strlen($query) ) . $query; // req query
1410 $req .= pack ( "N", strlen($index) ) . $index; // req index
1411 $req .= pack ( "N", (int)$hits );
1412
1413 ////////////////////////////
1414 // send query, get response
1415 ////////////////////////////
1416
1417 $len = strlen($req);
1418 $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
1419 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1420 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
1421 {
1422 $this->_MBPop ();
1423 return false;
1424 }
1425
1426 //////////////////
1427 // parse response
1428 //////////////////
1429
1430 $pos = 0;
1431 $res = array ();
1432 $rlen = strlen($response);
1433 list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1434 $pos += 4;
1435 for ( $i=0; $i<$nwords; $i++ )
1436 {
1437 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1438 $tokenized = $len ? substr ( $response, $pos, $len ) : "";
1439 $pos += $len;
1440
1441 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1442 $normalized = $len ? substr ( $response, $pos, $len ) : "";
1443 $pos += $len;
1444
1445 $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1446
1447 if ( $hits )
1448 {
1449 list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1450 $pos += 8;
1451 $res [$i]["docs"] = $ndocs;
1452 $res [$i]["hits"] = $nhits;
1453 }
1454
1455 if ( $pos > $rlen )
1456 {
1457 $this->_error = "incomplete reply";
1458 $this->_MBPop ();
1459 return false;
1460 }
1461 }
1462
1463 $this->_MBPop ();
1464 return $res;
1465 }
1466
1467 function EscapeString ( $string )
1468 {
1469 $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
1470 $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
1471
1472 return str_replace ( $from, $to, $string );
1473 }
1474
1475 /////////////////////////////////////////////////////////////////////////////
1476 // attribute updates
1477 /////////////////////////////////////////////////////////////////////////////
1478
1479 /// batch update given attributes in given rows in given indexes
1480 /// returns amount of updated documents (0 or more) on success, or -1 on failure
1481 function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1482 {
1483 // verify everything
1484 assert ( is_string($index) );
1485 assert ( is_bool($mva) );
1486
1487 assert ( is_array($attrs) );
1488 foreach ( $attrs as $attr )
1489 assert ( is_string($attr) );
1490
1491 assert ( is_array($values) );
1492 foreach ( $values as $id=>$entry )
1493 {
1494 assert ( is_numeric($id) );
1495 assert ( is_array($entry) );
1496 assert ( count($entry)==count($attrs) );
1497 foreach ( $entry as $v )
1498 {
1499 if ( $mva )
1500 {
1501 assert ( is_array($v) );
1502 foreach ( $v as $vv )
1503 assert ( is_int($vv) );
1504 } else
1505 assert ( is_int($v) );
1506 }
1507 }
1508
1509 // build request
1510 $req = pack ( "N", strlen($index) ) . $index;
1511
1512 $req .= pack ( "N", count($attrs) );
1513 foreach ( $attrs as $attr )
1514 {
1515 $req .= pack ( "N", strlen($attr) ) . $attr;
1516 $req .= pack ( "N", $mva ? 1 : 0 );
1517 }
1518
1519 $req .= pack ( "N", count($values) );
1520 foreach ( $values as $id=>$entry )
1521 {
1522 $req .= sphPackU64 ( $id );
1523 foreach ( $entry as $v )
1524 {
1525 $req .= pack ( "N", $mva ? count($v) : $v );
1526 if ( $mva )
1527 foreach ( $v as $vv )
1528 $req .= pack ( "N", $vv );
1529 }
1530 }
1531
1532 // connect, send query, get response
1533 if (!( $fp = $this->_Connect() ))
1534 return -1;
1535
1536 $len = strlen($req);
1537 $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1538 if ( !$this->_Send ( $fp, $req, $len+8 ) )
1539 return -1;
1540
1541 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1542 return -1;
1543
1544 // parse response
1545 list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1546 return $updated;
1547 }
1548
1549 /////////////////////////////////////////////////////////////////////////////
1550 // persistent connections
1551 /////////////////////////////////////////////////////////////////////////////
1552
1553 function Open()
1554 {
1555 if ( $this->_socket !== false )
1556 {
1557 $this->_error = 'already connected';
1558 return false;
1559 }
1560 if ( !$fp = $this->_Connect() )
1561 return false;
1562
1563 // command, command version = 0, body length = 4, body = 1
1564 $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
1565 if ( !$this->_Send ( $fp, $req, 12 ) )
1566 return false;
1567
1568 $this->_socket = $fp;
1569 return true;
1570 }
1571
1572 function Close()
1573 {
1574 if ( $this->_socket === false )
1575 {
1576 $this->_error = 'not connected';
1577 return false;
1578 }
1579
1580 fclose ( $this->_socket );
1581 $this->_socket = false;
1582
1583 return true;
1584 }
1585
1586 //////////////////////////////////////////////////////////////////////////
1587 // status
1588 //////////////////////////////////////////////////////////////////////////
1589
1590 function Status ()
1591 {
1592 $this->_MBPush ();
1593 if (!( $fp = $this->_Connect() ))
1594 {
1595 $this->_MBPop();
1596 return false;
1597 }
1598
1599 $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
1600 if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
1601 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
1602 {
1603 $this->_MBPop ();
1604 return false;
1605 }
1606
1607 $res = substr ( $response, 4 ); // just ignore length, error handling, etc
1608 $p = 0;
1609 list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1610
1611 $res = array();
1612 for ( $i=0; $i<$rows; $i++ )
1613 for ( $j=0; $j<$cols; $j++ )
1614 {
1615 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1616 $res[$i][] = substr ( $response, $p, $len ); $p += $len;
1617 }
1618
1619 $this->_MBPop ();
1620 return $res;
1621 }
1622 }
1623
1624 //
1625 // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
1626 //