]> git.wh0rd.org - tt-rss.git/blob - plugins/search_sphinx/sphinxapi.php
41b0fac41bd2a1f3f57ffe4b9b4758cea52975e2
[tt-rss.git] / plugins / search_sphinx / sphinxapi.php
1 <?php
2
3 //
4 // $Id: sphinxapi.php 2758 2011-04-04 11:10:44Z kevg $
5 //
6
7 //
8 // Copyright (c) 2001-2011, Andrew Aksyonoff
9 // Copyright (c) 2008-2011, Sphinx Technologies Inc
10 // All rights reserved
11 //
12 // This program is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU General Public License. You should have
14 // received a copy of the GPL license along with this program; if you
15 // did not, you can find it at http://www.gnu.org/
16 //
17
18 /////////////////////////////////////////////////////////////////////////////
19 // PHP version of Sphinx searchd client (PHP API)
20 /////////////////////////////////////////////////////////////////////////////
21
22 /// known searchd commands
23 define ( "SEARCHD_COMMAND_SEARCH", 0 );
24 define ( "SEARCHD_COMMAND_EXCERPT", 1 );
25 define ( "SEARCHD_COMMAND_UPDATE", 2 );
26 define ( "SEARCHD_COMMAND_KEYWORDS", 3 );
27 define ( "SEARCHD_COMMAND_PERSIST", 4 );
28 define ( "SEARCHD_COMMAND_STATUS", 5 );
29 define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 );
30
31 /// current client-side command implementation versions
32 define ( "VER_COMMAND_SEARCH", 0x118 );
33 define ( "VER_COMMAND_EXCERPT", 0x103 );
34 define ( "VER_COMMAND_UPDATE", 0x102 );
35 define ( "VER_COMMAND_KEYWORDS", 0x100 );
36 define ( "VER_COMMAND_STATUS", 0x100 );
37 define ( "VER_COMMAND_QUERY", 0x100 );
38 define ( "VER_COMMAND_FLUSHATTRS", 0x100 );
39
40 /// known searchd status codes
41 define ( "SEARCHD_OK", 0 );
42 define ( "SEARCHD_ERROR", 1 );
43 define ( "SEARCHD_RETRY", 2 );
44 define ( "SEARCHD_WARNING", 3 );
45
46 /// known match modes
47 define ( "SPH_MATCH_ALL", 0 );
48 define ( "SPH_MATCH_ANY", 1 );
49 define ( "SPH_MATCH_PHRASE", 2 );
50 define ( "SPH_MATCH_BOOLEAN", 3 );
51 define ( "SPH_MATCH_EXTENDED", 4 );
52 define ( "SPH_MATCH_FULLSCAN", 5 );
53 define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
54
55 /// known ranking modes (ext2 only)
56 define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
57 define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
58 define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
59 define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
60 define ( "SPH_RANK_PROXIMITY", 4 );
61 define ( "SPH_RANK_MATCHANY", 5 );
62 define ( "SPH_RANK_FIELDMASK", 6 );
63 define ( "SPH_RANK_SPH04", 7 );
64 define ( "SPH_RANK_TOTAL", 8 );
65
66 /// known sort modes
67 define ( "SPH_SORT_RELEVANCE", 0 );
68 define ( "SPH_SORT_ATTR_DESC", 1 );
69 define ( "SPH_SORT_ATTR_ASC", 2 );
70 define ( "SPH_SORT_TIME_SEGMENTS", 3 );
71 define ( "SPH_SORT_EXTENDED", 4 );
72 define ( "SPH_SORT_EXPR", 5 );
73
74 /// known filter types
75 define ( "SPH_FILTER_VALUES", 0 );
76 define ( "SPH_FILTER_RANGE", 1 );
77 define ( "SPH_FILTER_FLOATRANGE", 2 );
78
79 /// known attribute types
80 define ( "SPH_ATTR_INTEGER", 1 );
81 define ( "SPH_ATTR_TIMESTAMP", 2 );
82 define ( "SPH_ATTR_ORDINAL", 3 );
83 define ( "SPH_ATTR_BOOL", 4 );
84 define ( "SPH_ATTR_FLOAT", 5 );
85 define ( "SPH_ATTR_BIGINT", 6 );
86 define ( "SPH_ATTR_STRING", 7 );
87 define ( "SPH_ATTR_MULTI", 0x40000000 );
88
89 /// known grouping functions
90 define ( "SPH_GROUPBY_DAY", 0 );
91 define ( "SPH_GROUPBY_WEEK", 1 );
92 define ( "SPH_GROUPBY_MONTH", 2 );
93 define ( "SPH_GROUPBY_YEAR", 3 );
94 define ( "SPH_GROUPBY_ATTR", 4 );
95 define ( "SPH_GROUPBY_ATTRPAIR", 5 );
96
97 // important properties of PHP's integers:
98 // - always signed (one bit short of PHP_INT_SIZE)
99 // - conversion from string to int is saturated
100 // - float is double
101 // - div converts arguments to floats
102 // - mod converts arguments to ints
103
104 // the packing code below works as follows:
105 // - when we got an int, just pack it
106 // if performance is a problem, this is the branch users should aim for
107 //
108 // - otherwise, we got a number in string form
109 // this might be due to different reasons, but we assume that this is
110 // because it didn't fit into PHP int
111 //
112 // - factor the string into high and low ints for packing
113 // - if we have bcmath, then it is used
114 // - if we don't, we have to do it manually (this is the fun part)
115 //
116 // - x64 branch does factoring using ints
117 // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
118 //
119 // unpacking routines are pretty much the same.
120 // - return ints if we can
121 // - otherwise format number into a string
122
123 /// pack 64-bit signed
124 function sphPackI64 ( $v )
125 {
126 assert ( is_numeric($v) );
127
128 // x64
129 if ( PHP_INT_SIZE>=8 )
130 {
131 $v = (int)$v;
132 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
133 }
134
135 // x32, int
136 if ( is_int($v) )
137 return pack ( "NN", $v < 0 ? -1 : 0, $v );
138
139 // x32, bcmath
140 if ( function_exists("bcmul") )
141 {
142 if ( bccomp ( $v, 0 ) == -1 )
143 $v = bcadd ( "18446744073709551616", $v );
144 $h = bcdiv ( $v, "4294967296", 0 );
145 $l = bcmod ( $v, "4294967296" );
146 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
147 }
148
149 // x32, no-bcmath
150 $p = max(0, strlen($v) - 13);
151 $lo = abs((float)substr($v, $p));
152 $hi = abs((float)substr($v, 0, $p));
153
154 $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
155 $q = floor($m/4294967296.0);
156 $l = $m - ($q*4294967296.0);
157 $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
158
159 if ( $v<0 )
160 {
161 if ( $l==0 )
162 $h = 4294967296.0 - $h;
163 else
164 {
165 $h = 4294967295.0 - $h;
166 $l = 4294967296.0 - $l;
167 }
168 }
169 return pack ( "NN", $h, $l );
170 }
171
172 /// pack 64-bit unsigned
173 function sphPackU64 ( $v )
174 {
175 assert ( is_numeric($v) );
176
177 // x64
178 if ( PHP_INT_SIZE>=8 )
179 {
180 assert ( $v>=0 );
181
182 // x64, int
183 if ( is_int($v) )
184 return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
185
186 // x64, bcmath
187 if ( function_exists("bcmul") )
188 {
189 $h = bcdiv ( $v, 4294967296, 0 );
190 $l = bcmod ( $v, 4294967296 );
191 return pack ( "NN", $h, $l );
192 }
193
194 // x64, no-bcmath
195 $p = max ( 0, strlen($v) - 13 );
196 $lo = (int)substr ( $v, $p );
197 $hi = (int)substr ( $v, 0, $p );
198
199 $m = $lo + $hi*1316134912;
200 $l = $m % 4294967296;
201 $h = $hi*2328 + (int)($m/4294967296);
202
203 return pack ( "NN", $h, $l );
204 }
205
206 // x32, int
207 if ( is_int($v) )
208 return pack ( "NN", 0, $v );
209
210 // x32, bcmath
211 if ( function_exists("bcmul") )
212 {
213 $h = bcdiv ( $v, "4294967296", 0 );
214 $l = bcmod ( $v, "4294967296" );
215 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
216 }
217
218 // x32, no-bcmath
219 $p = max(0, strlen($v) - 13);
220 $lo = (float)substr($v, $p);
221 $hi = (float)substr($v, 0, $p);
222
223 $m = $lo + $hi*1316134912.0;
224 $q = floor($m / 4294967296.0);
225 $l = $m - ($q * 4294967296.0);
226 $h = $hi*2328.0 + $q;
227
228 return pack ( "NN", $h, $l );
229 }
230
231 // unpack 64-bit unsigned
232 function sphUnpackU64 ( $v )
233 {
234 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
235
236 if ( PHP_INT_SIZE>=8 )
237 {
238 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
239 if ( $lo<0 ) $lo += (1<<32);
240
241 // x64, int
242 if ( $hi<=2147483647 )
243 return ($hi<<32) + $lo;
244
245 // x64, bcmath
246 if ( function_exists("bcmul") )
247 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
248
249 // x64, no-bcmath
250 $C = 100000;
251 $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
252 $l = (($hi % $C) << 32) + ($lo % $C);
253 if ( $l>$C )
254 {
255 $h += (int)($l / $C);
256 $l = $l % $C;
257 }
258
259 if ( $h==0 )
260 return $l;
261 return sprintf ( "%d%05d", $h, $l );
262 }
263
264 // x32, int
265 if ( $hi==0 )
266 {
267 if ( $lo>0 )
268 return $lo;
269 return sprintf ( "%u", $lo );
270 }
271
272 $hi = sprintf ( "%u", $hi );
273 $lo = sprintf ( "%u", $lo );
274
275 // x32, bcmath
276 if ( function_exists("bcmul") )
277 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
278
279 // x32, no-bcmath
280 $hi = (float)$hi;
281 $lo = (float)$lo;
282
283 $q = floor($hi/10000000.0);
284 $r = $hi - $q*10000000.0;
285 $m = $lo + $r*4967296.0;
286 $mq = floor($m/10000000.0);
287 $l = $m - $mq*10000000.0;
288 $h = $q*4294967296.0 + $r*429.0 + $mq;
289
290 $h = sprintf ( "%.0f", $h );
291 $l = sprintf ( "%07.0f", $l );
292 if ( $h=="0" )
293 return sprintf( "%.0f", (float)$l );
294 return $h . $l;
295 }
296
297 // unpack 64-bit signed
298 function sphUnpackI64 ( $v )
299 {
300 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
301
302 // x64
303 if ( PHP_INT_SIZE>=8 )
304 {
305 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
306 if ( $lo<0 ) $lo += (1<<32);
307
308 return ($hi<<32) + $lo;
309 }
310
311 // x32, int
312 if ( $hi==0 )
313 {
314 if ( $lo>0 )
315 return $lo;
316 return sprintf ( "%u", $lo );
317 }
318 // x32, int
319 elseif ( $hi==-1 )
320 {
321 if ( $lo<0 )
322 return $lo;
323 return sprintf ( "%.0f", $lo - 4294967296.0 );
324 }
325
326 $neg = "";
327 $c = 0;
328 if ( $hi<0 )
329 {
330 $hi = ~$hi;
331 $lo = ~$lo;
332 $c = 1;
333 $neg = "-";
334 }
335
336 $hi = sprintf ( "%u", $hi );
337 $lo = sprintf ( "%u", $lo );
338
339 // x32, bcmath
340 if ( function_exists("bcmul") )
341 return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
342
343 // x32, no-bcmath
344 $hi = (float)$hi;
345 $lo = (float)$lo;
346
347 $q = floor($hi/10000000.0);
348 $r = $hi - $q*10000000.0;
349 $m = $lo + $r*4967296.0;
350 $mq = floor($m/10000000.0);
351 $l = $m - $mq*10000000.0 + $c;
352 $h = $q*4294967296.0 + $r*429.0 + $mq;
353 if ( $l==10000000 )
354 {
355 $l = 0;
356 $h += 1;
357 }
358
359 $h = sprintf ( "%.0f", $h );
360 $l = sprintf ( "%07.0f", $l );
361 if ( $h=="0" )
362 return $neg . sprintf( "%.0f", (float)$l );
363 return $neg . $h . $l;
364 }
365
366
367 function sphFixUint ( $value )
368 {
369 if ( PHP_INT_SIZE>=8 )
370 {
371 // x64 route, workaround broken unpack() in 5.2.2+
372 if ( $value<0 ) $value += (1<<32);
373 return $value;
374 }
375 else
376 {
377 // x32 route, workaround php signed/unsigned braindamage
378 return sprintf ( "%u", $value );
379 }
380 }
381
382
383 /// sphinx searchd client class
384 class SphinxClient
385 {
386 var $_host; ///< searchd host (default is "localhost")
387 var $_port; ///< searchd port (default is 9312)
388 var $_offset; ///< how many records to seek from result-set start (default is 0)
389 var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
390 var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
391 var $_weights; ///< per-field weights (default is 1 for all fields)
392 var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
393 var $_sortby; ///< attribute to sort by (defualt is "")
394 var $_min_id; ///< min ID to match (default is 0, which means no limit)
395 var $_max_id; ///< max ID to match (default is 0, which means no limit)
396 var $_filters; ///< search filters
397 var $_groupby; ///< group-by attribute name
398 var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
399 var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
400 var $_groupdistinct;///< group-by count-distinct attribute
401 var $_maxmatches; ///< max matches to retrieve
402 var $_cutoff; ///< cutoff to stop searching at (default is 0)
403 var $_retrycount; ///< distributed retries count
404 var $_retrydelay; ///< distributed retries delay
405 var $_anchor; ///< geographical anchor point
406 var $_indexweights; ///< per-index weights
407 var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
408 var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
409 var $_fieldweights; ///< per-field-name weights
410 var $_overrides; ///< per-query attribute values overrides
411 var $_select; ///< select-list (attributes or expressions, with optional aliases)
412
413 var $_error; ///< last error message
414 var $_warning; ///< last warning message
415 var $_connerror; ///< connection error vs remote error flag
416
417 var $_reqs; ///< requests array for multi-query
418 var $_mbenc; ///< stored mbstring encoding
419 var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
420 var $_timeout; ///< connect timeout
421
422 /////////////////////////////////////////////////////////////////////////////
423 // common stuff
424 /////////////////////////////////////////////////////////////////////////////
425
426 /// create a new client object and fill defaults
427 function SphinxClient ()
428 {
429 // per-client-object settings
430 $this->_host = "localhost";
431 $this->_port = 9312;
432 $this->_path = false;
433 $this->_socket = false;
434
435 // per-query settings
436 $this->_offset = 0;
437 $this->_limit = 20;
438 $this->_mode = SPH_MATCH_ALL;
439 $this->_weights = array ();
440 $this->_sort = SPH_SORT_RELEVANCE;
441 $this->_sortby = "";
442 $this->_min_id = 0;
443 $this->_max_id = 0;
444 $this->_filters = array ();
445 $this->_groupby = "";
446 $this->_groupfunc = SPH_GROUPBY_DAY;
447 $this->_groupsort = "@group desc";
448 $this->_groupdistinct= "";
449 $this->_maxmatches = 1000;
450 $this->_cutoff = 0;
451 $this->_retrycount = 0;
452 $this->_retrydelay = 0;
453 $this->_anchor = array ();
454 $this->_indexweights= array ();
455 $this->_ranker = SPH_RANK_PROXIMITY_BM25;
456 $this->_maxquerytime= 0;
457 $this->_fieldweights= array();
458 $this->_overrides = array();
459 $this->_select = "*";
460
461 $this->_error = ""; // per-reply fields (for single-query case)
462 $this->_warning = "";
463 $this->_connerror = false;
464
465 $this->_reqs = array (); // requests storage (for multi-query case)
466 $this->_mbenc = "";
467 $this->_arrayresult = false;
468 $this->_timeout = 0;
469 }
470
471 function __destruct()
472 {
473 if ( $this->_socket !== false )
474 fclose ( $this->_socket );
475 }
476
477 /// get last error message (string)
478 function GetLastError ()
479 {
480 return $this->_error;
481 }
482
483 /// get last warning message (string)
484 function GetLastWarning ()
485 {
486 return $this->_warning;
487 }
488
489 /// get last error flag (to tell network connection errors from searchd errors or broken responses)
490 function IsConnectError()
491 {
492 return $this->_connerror;
493 }
494
495 /// set searchd host name (string) and port (integer)
496 function SetServer ( $host, $port = 0 )
497 {
498 assert ( is_string($host) );
499 if ( $host[0] == '/')
500 {
501 $this->_path = 'unix://' . $host;
502 return;
503 }
504 if ( substr ( $host, 0, 7 )=="unix://" )
505 {
506 $this->_path = $host;
507 return;
508 }
509
510 assert ( is_int($port) );
511 $this->_host = $host;
512 $this->_port = $port;
513 $this->_path = '';
514
515 }
516
517 /// set server connection timeout (0 to remove)
518 function SetConnectTimeout ( $timeout )
519 {
520 assert ( is_numeric($timeout) );
521 $this->_timeout = $timeout;
522 }
523
524
525 function _Send ( $handle, $data, $length )
526 {
527 if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
528 {
529 $this->_error = 'connection unexpectedly closed (timed out?)';
530 $this->_connerror = true;
531 return false;
532 }
533 return true;
534 }
535
536 /////////////////////////////////////////////////////////////////////////////
537
538 /// enter mbstring workaround mode
539 function _MBPush ()
540 {
541 $this->_mbenc = "";
542 if ( ini_get ( "mbstring.func_overload" ) & 2 )
543 {
544 $this->_mbenc = mb_internal_encoding();
545 mb_internal_encoding ( "latin1" );
546 }
547 }
548
549 /// leave mbstring workaround mode
550 function _MBPop ()
551 {
552 if ( $this->_mbenc )
553 mb_internal_encoding ( $this->_mbenc );
554 }
555
556 /// connect to searchd server
557 function _Connect ()
558 {
559 if ( $this->_socket!==false )
560 {
561 // we are in persistent connection mode, so we have a socket
562 // however, need to check whether it's still alive
563 if ( !@feof ( $this->_socket ) )
564 return $this->_socket;
565
566 // force reopen
567 $this->_socket = false;
568 }
569
570 $errno = 0;
571 $errstr = "";
572 $this->_connerror = false;
573
574 if ( $this->_path )
575 {
576 $host = $this->_path;
577 $port = 0;
578 }
579 else
580 {
581 $host = $this->_host;
582 $port = $this->_port;
583 }
584
585 if ( $this->_timeout<=0 )
586 $fp = @fsockopen ( $host, $port, $errno, $errstr );
587 else
588 $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
589
590 if ( !$fp )
591 {
592 if ( $this->_path )
593 $location = $this->_path;
594 else
595 $location = "{$this->_host}:{$this->_port}";
596
597 $errstr = trim ( $errstr );
598 $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
599 $this->_connerror = true;
600 return false;
601 }
602
603 // send my version
604 // this is a subtle part. we must do it before (!) reading back from searchd.
605 // because otherwise under some conditions (reported on FreeBSD for instance)
606 // TCP stack could throttle write-write-read pattern because of Nagle.
607 if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
608 {
609 fclose ( $fp );
610 $this->_error = "failed to send client protocol version";
611 return false;
612 }
613
614 // check version
615 list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
616 $v = (int)$v;
617 if ( $v<1 )
618 {
619 fclose ( $fp );
620 $this->_error = "expected searchd protocol version 1+, got version '$v'";
621 return false;
622 }
623
624 return $fp;
625 }
626
627 /// get and check response packet from searchd server
628 function _GetResponse ( $fp, $client_ver )
629 {
630 $response = "";
631 $len = 0;
632
633 $header = fread ( $fp, 8 );
634 if ( strlen($header)==8 )
635 {
636 list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
637 $left = $len;
638 while ( $left>0 && !feof($fp) )
639 {
640 $chunk = fread ( $fp, min ( 8192, $left ) );
641 if ( $chunk )
642 {
643 $response .= $chunk;
644 $left -= strlen($chunk);
645 }
646 }
647 }
648 if ( $this->_socket === false )
649 fclose ( $fp );
650
651 // check response
652 $read = strlen ( $response );
653 if ( !$response || $read!=$len )
654 {
655 $this->_error = $len
656 ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
657 : "received zero-sized searchd response";
658 return false;
659 }
660
661 // check status
662 if ( $status==SEARCHD_WARNING )
663 {
664 list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
665 $this->_warning = substr ( $response, 4, $wlen );
666 return substr ( $response, 4+$wlen );
667 }
668 if ( $status==SEARCHD_ERROR )
669 {
670 $this->_error = "searchd error: " . substr ( $response, 4 );
671 return false;
672 }
673 if ( $status==SEARCHD_RETRY )
674 {
675 $this->_error = "temporary searchd error: " . substr ( $response, 4 );
676 return false;
677 }
678 if ( $status!=SEARCHD_OK )
679 {
680 $this->_error = "unknown status code '$status'";
681 return false;
682 }
683
684 // check version
685 if ( $ver<$client_ver )
686 {
687 $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
688 $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
689 }
690
691 return $response;
692 }
693
694 /////////////////////////////////////////////////////////////////////////////
695 // searching
696 /////////////////////////////////////////////////////////////////////////////
697
698 /// set offset and count into result set,
699 /// and optionally set max-matches and cutoff limits
700 function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
701 {
702 assert ( is_int($offset) );
703 assert ( is_int($limit) );
704 assert ( $offset>=0 );
705 assert ( $limit>0 );
706 assert ( $max>=0 );
707 $this->_offset = $offset;
708 $this->_limit = $limit;
709 if ( $max>0 )
710 $this->_maxmatches = $max;
711 if ( $cutoff>0 )
712 $this->_cutoff = $cutoff;
713 }
714
715 /// set maximum query time, in milliseconds, per-index
716 /// integer, 0 means "do not limit"
717 function SetMaxQueryTime ( $max )
718 {
719 assert ( is_int($max) );
720 assert ( $max>=0 );
721 $this->_maxquerytime = $max;
722 }
723
724 /// set matching mode
725 function SetMatchMode ( $mode )
726 {
727 assert ( $mode==SPH_MATCH_ALL
728 || $mode==SPH_MATCH_ANY
729 || $mode==SPH_MATCH_PHRASE
730 || $mode==SPH_MATCH_BOOLEAN
731 || $mode==SPH_MATCH_EXTENDED
732 || $mode==SPH_MATCH_FULLSCAN
733 || $mode==SPH_MATCH_EXTENDED2 );
734 $this->_mode = $mode;
735 }
736
737 /// set ranking mode
738 function SetRankingMode ( $ranker )
739 {
740 assert ( $ranker>=0 && $ranker<SPH_RANK_TOTAL );
741 $this->_ranker = $ranker;
742 }
743
744 /// set matches sorting mode
745 function SetSortMode ( $mode, $sortby="" )
746 {
747 assert (
748 $mode==SPH_SORT_RELEVANCE ||
749 $mode==SPH_SORT_ATTR_DESC ||
750 $mode==SPH_SORT_ATTR_ASC ||
751 $mode==SPH_SORT_TIME_SEGMENTS ||
752 $mode==SPH_SORT_EXTENDED ||
753 $mode==SPH_SORT_EXPR );
754 assert ( is_string($sortby) );
755 assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
756
757 $this->_sort = $mode;
758 $this->_sortby = $sortby;
759 }
760
761 /// bind per-field weights by order
762 /// DEPRECATED; use SetFieldWeights() instead
763 function SetWeights ( $weights )
764 {
765 assert ( is_array($weights) );
766 foreach ( $weights as $weight )
767 assert ( is_int($weight) );
768
769 $this->_weights = $weights;
770 }
771
772 /// bind per-field weights by name
773 function SetFieldWeights ( $weights )
774 {
775 assert ( is_array($weights) );
776 foreach ( $weights as $name=>$weight )
777 {
778 assert ( is_string($name) );
779 assert ( is_int($weight) );
780 }
781 $this->_fieldweights = $weights;
782 }
783
784 /// bind per-index weights by name
785 function SetIndexWeights ( $weights )
786 {
787 assert ( is_array($weights) );
788 foreach ( $weights as $index=>$weight )
789 {
790 assert ( is_string($index) );
791 assert ( is_int($weight) );
792 }
793 $this->_indexweights = $weights;
794 }
795
796 /// set IDs range to match
797 /// only match records if document ID is beetwen $min and $max (inclusive)
798 function SetIDRange ( $min, $max )
799 {
800 assert ( is_numeric($min) );
801 assert ( is_numeric($max) );
802 assert ( $min<=$max );
803 $this->_min_id = $min;
804 $this->_max_id = $max;
805 }
806
807 /// set values set filter
808 /// only match records where $attribute value is in given set
809 function SetFilter ( $attribute, $values, $exclude=false )
810 {
811 assert ( is_string($attribute) );
812 assert ( is_array($values) );
813 assert ( count($values) );
814
815 if ( is_array($values) && count($values) )
816 {
817 foreach ( $values as $value )
818 assert ( is_numeric($value) );
819
820 $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
821 }
822 }
823
824 /// set range filter
825 /// only match records if $attribute value is beetwen $min and $max (inclusive)
826 function SetFilterRange ( $attribute, $min, $max, $exclude=false )
827 {
828 assert ( is_string($attribute) );
829 assert ( is_numeric($min) );
830 assert ( is_numeric($max) );
831 assert ( $min<=$max );
832
833 $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
834 }
835
836 /// set float range filter
837 /// only match records if $attribute value is beetwen $min and $max (inclusive)
838 function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
839 {
840 assert ( is_string($attribute) );
841 assert ( is_float($min) );
842 assert ( is_float($max) );
843 assert ( $min<=$max );
844
845 $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
846 }
847
848 /// setup anchor point for geosphere distance calculations
849 /// required to use @geodist in filters and sorting
850 /// latitude and longitude must be in radians
851 function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
852 {
853 assert ( is_string($attrlat) );
854 assert ( is_string($attrlong) );
855 assert ( is_float($lat) );
856 assert ( is_float($long) );
857
858 $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
859 }
860
861 /// set grouping attribute and function
862 function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
863 {
864 assert ( is_string($attribute) );
865 assert ( is_string($groupsort) );
866 assert ( $func==SPH_GROUPBY_DAY
867 || $func==SPH_GROUPBY_WEEK
868 || $func==SPH_GROUPBY_MONTH
869 || $func==SPH_GROUPBY_YEAR
870 || $func==SPH_GROUPBY_ATTR
871 || $func==SPH_GROUPBY_ATTRPAIR );
872
873 $this->_groupby = $attribute;
874 $this->_groupfunc = $func;
875 $this->_groupsort = $groupsort;
876 }
877
878 /// set count-distinct attribute for group-by queries
879 function SetGroupDistinct ( $attribute )
880 {
881 assert ( is_string($attribute) );
882 $this->_groupdistinct = $attribute;
883 }
884
885 /// set distributed retries count and delay
886 function SetRetries ( $count, $delay=0 )
887 {
888 assert ( is_int($count) && $count>=0 );
889 assert ( is_int($delay) && $delay>=0 );
890 $this->_retrycount = $count;
891 $this->_retrydelay = $delay;
892 }
893
894 /// set result set format (hash or array; hash by default)
895 /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
896 function SetArrayResult ( $arrayresult )
897 {
898 assert ( is_bool($arrayresult) );
899 $this->_arrayresult = $arrayresult;
900 }
901
902 /// set attribute values override
903 /// there can be only one override per attribute
904 /// $values must be a hash that maps document IDs to attribute values
905 function SetOverride ( $attrname, $attrtype, $values )
906 {
907 assert ( is_string ( $attrname ) );
908 assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
909 assert ( is_array ( $values ) );
910
911 $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
912 }
913
914 /// set select-list (attributes or expressions), SQL-like syntax
915 function SetSelect ( $select )
916 {
917 assert ( is_string ( $select ) );
918 $this->_select = $select;
919 }
920
921 //////////////////////////////////////////////////////////////////////////////
922
923 /// clear all filters (for multi-queries)
924 function ResetFilters ()
925 {
926 $this->_filters = array();
927 $this->_anchor = array();
928 }
929
930 /// clear groupby settings (for multi-queries)
931 function ResetGroupBy ()
932 {
933 $this->_groupby = "";
934 $this->_groupfunc = SPH_GROUPBY_DAY;
935 $this->_groupsort = "@group desc";
936 $this->_groupdistinct= "";
937 }
938
939 /// clear all attribute value overrides (for multi-queries)
940 function ResetOverrides ()
941 {
942 $this->_overrides = array ();
943 }
944
945 //////////////////////////////////////////////////////////////////////////////
946
947 /// connect to searchd server, run given search query through given indexes,
948 /// and return the search results
949 function Query ( $query, $index="*", $comment="" )
950 {
951 assert ( empty($this->_reqs) );
952
953 $this->AddQuery ( $query, $index, $comment );
954 $results = $this->RunQueries ();
955 $this->_reqs = array (); // just in case it failed too early
956
957 if ( !is_array($results) )
958 return false; // probably network error; error message should be already filled
959
960 $this->_error = $results[0]["error"];
961 $this->_warning = $results[0]["warning"];
962 if ( $results[0]["status"]==SEARCHD_ERROR )
963 return false;
964 else
965 return $results[0];
966 }
967
968 /// helper to pack floats in network byte order
969 function _PackFloat ( $f )
970 {
971 $t1 = pack ( "f", $f ); // machine order
972 list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
973 return pack ( "N", $t2 );
974 }
975
976 /**
977 * @SuppressWarnings(unused)
978 */
979
980 /// add query to multi-query batch
981 /// returns index into results array from RunQueries() call
982 function AddQuery ( $query, $index="*", $comment="" )
983 {
984 // mbstring workaround
985 $this->_MBPush ();
986
987 // build request
988 $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
989 $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
990 $req .= pack ( "N", strlen($query) ) . $query; // query itself
991 $req .= pack ( "N", count($this->_weights) ); // weights
992 foreach ( $this->_weights as $weight )
993 $req .= pack ( "N", (int)$weight );
994 $req .= pack ( "N", strlen($index) ) . $index; // indexes
995 $req .= pack ( "N", 1 ); // id64 range marker
996 $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
997
998 // filters
999 $req .= pack ( "N", count($this->_filters) );
1000 foreach ( $this->_filters as $filter )
1001 {
1002 $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
1003 $req .= pack ( "N", $filter["type"] );
1004 switch ( $filter["type"] )
1005 {
1006 case SPH_FILTER_VALUES:
1007 $req .= pack ( "N", count($filter["values"]) );
1008 foreach ( $filter["values"] as $value )
1009 $req .= sphPackI64 ( $value );
1010 break;
1011
1012 case SPH_FILTER_RANGE:
1013 $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
1014 break;
1015
1016 case SPH_FILTER_FLOATRANGE:
1017 $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
1018 break;
1019
1020 default:
1021 assert ( 0 && "internal error: unhandled filter type" );
1022 }
1023 $req .= pack ( "N", $filter["exclude"] );
1024 }
1025
1026 // group-by clause, max-matches count, group-sort clause, cutoff count
1027 $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
1028 $req .= pack ( "N", $this->_maxmatches );
1029 $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
1030 $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
1031 $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
1032
1033 // anchor point
1034 if ( empty($this->_anchor) )
1035 {
1036 $req .= pack ( "N", 0 );
1037 } else
1038 {
1039 $a =& $this->_anchor;
1040 $req .= pack ( "N", 1 );
1041 $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
1042 $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
1043 $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
1044 }
1045
1046 // per-index weights
1047 $req .= pack ( "N", count($this->_indexweights) );
1048 foreach ( $this->_indexweights as $idx=>$weight )
1049 $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
1050
1051 // max query time
1052 $req .= pack ( "N", $this->_maxquerytime );
1053
1054 // per-field weights
1055 $req .= pack ( "N", count($this->_fieldweights) );
1056 foreach ( $this->_fieldweights as $field=>$weight )
1057 $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
1058
1059 // comment
1060 $req .= pack ( "N", strlen($comment) ) . $comment;
1061
1062 // attribute overrides
1063 $req .= pack ( "N", count($this->_overrides) );
1064 foreach ( $this->_overrides as $key => $entry )
1065 {
1066 $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
1067 $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
1068 foreach ( $entry["values"] as $id=>$val )
1069 {
1070 assert ( is_numeric($id) );
1071 assert ( is_numeric($val) );
1072
1073 $req .= sphPackU64 ( $id );
1074 switch ( $entry["type"] )
1075 {
1076 case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
1077 case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
1078 default: $req .= pack ( "N", $val ); break;
1079 }
1080 }
1081 }
1082
1083 // select-list
1084 $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
1085
1086 // mbstring workaround
1087 $this->_MBPop ();
1088
1089 // store request to requests array
1090 $this->_reqs[] = $req;
1091 return count($this->_reqs)-1;
1092 }
1093
1094 /// connect to searchd, run queries batch, and return an array of result sets
1095 function RunQueries ()
1096 {
1097 if ( empty($this->_reqs) )
1098 {
1099 $this->_error = "no queries defined, issue AddQuery() first";
1100 return false;
1101 }
1102
1103 // mbstring workaround
1104 $this->_MBPush ();
1105
1106 if (!( $fp = $this->_Connect() ))
1107 {
1108 $this->_MBPop ();
1109 return false;
1110 }
1111
1112 // send query, get response
1113 $nreqs = count($this->_reqs);
1114 $req = join ( "", $this->_reqs );
1115 $len = 8+strlen($req);
1116 $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header
1117
1118 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1119 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
1120 {
1121 $this->_MBPop ();
1122 return false;
1123 }
1124
1125 // query sent ok; we can reset reqs now
1126 $this->_reqs = array ();
1127
1128 // parse and return response
1129 return $this->_ParseSearchResponse ( $response, $nreqs );
1130 }
1131
1132 /// parse and return search query (or queries) response
1133 function _ParseSearchResponse ( $response, $nreqs )
1134 {
1135 $p = 0; // current position
1136 $max = strlen($response); // max position for checks, to protect against broken responses
1137
1138 $results = array ();
1139 for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
1140 {
1141 $results[] = array();
1142 $result =& $results[$ires];
1143
1144 $result["error"] = "";
1145 $result["warning"] = "";
1146
1147 // extract status
1148 list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1149 $result["status"] = $status;
1150 if ( $status!=SEARCHD_OK )
1151 {
1152 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1153 $message = substr ( $response, $p, $len ); $p += $len;
1154
1155 if ( $status==SEARCHD_WARNING )
1156 {
1157 $result["warning"] = $message;
1158 } else
1159 {
1160 $result["error"] = $message;
1161 continue;
1162 }
1163 }
1164
1165 // read schema
1166 $fields = array ();
1167 $attrs = array ();
1168
1169 list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1170 while ( $nfields-->0 && $p<$max )
1171 {
1172 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1173 $fields[] = substr ( $response, $p, $len ); $p += $len;
1174 }
1175 $result["fields"] = $fields;
1176
1177 list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1178 while ( $nattrs-->0 && $p<$max )
1179 {
1180 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1181 $attr = substr ( $response, $p, $len ); $p += $len;
1182 list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1183 $attrs[$attr] = $type;
1184 }
1185 $result["attrs"] = $attrs;
1186
1187 // read match count
1188 list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1189 list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1190
1191 // read matches
1192 $idx = -1;
1193 while ( $count-->0 && $p<$max )
1194 {
1195 // index into result array
1196 $idx++;
1197
1198 // parse document id and weight
1199 if ( $id64 )
1200 {
1201 $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
1202 list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1203 }
1204 else
1205 {
1206 list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
1207 substr ( $response, $p, 8 ) ) );
1208 $p += 8;
1209 $doc = sphFixUint($doc);
1210 }
1211 $weight = sprintf ( "%u", $weight );
1212
1213 // create match entry
1214 if ( $this->_arrayresult )
1215 $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
1216 else
1217 $result["matches"][$doc]["weight"] = $weight;
1218
1219 // parse and create attributes
1220 $attrvals = array ();
1221 foreach ( $attrs as $attr=>$type )
1222 {
1223 // handle 64bit ints
1224 if ( $type==SPH_ATTR_BIGINT )
1225 {
1226 $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
1227 continue;
1228 }
1229
1230 // handle floats
1231 if ( $type==SPH_ATTR_FLOAT )
1232 {
1233 list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1234 list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
1235 $attrvals[$attr] = $fval;
1236 continue;
1237 }
1238
1239 // handle everything else as unsigned ints
1240 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1241 if ( $type & SPH_ATTR_MULTI )
1242 {
1243 $attrvals[$attr] = array ();
1244 $nvalues = $val;
1245 while ( $nvalues-->0 && $p<$max )
1246 {
1247 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1248 $attrvals[$attr][] = sphFixUint($val);
1249 }
1250 } else if ( $type==SPH_ATTR_STRING )
1251 {
1252 $attrvals[$attr] = substr ( $response, $p, $val );
1253 $p += $val;
1254 } else
1255 {
1256 $attrvals[$attr] = sphFixUint($val);
1257 }
1258 }
1259
1260 if ( $this->_arrayresult )
1261 $result["matches"][$idx]["attrs"] = $attrvals;
1262 else
1263 $result["matches"][$doc]["attrs"] = $attrvals;
1264 }
1265
1266 list ( $total, $total_found, $msecs, $words ) =
1267 array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
1268 $result["total"] = sprintf ( "%u", $total );
1269 $result["total_found"] = sprintf ( "%u", $total_found );
1270 $result["time"] = sprintf ( "%.3f", $msecs/1000 );
1271 $p += 16;
1272
1273 while ( $words-->0 && $p<$max )
1274 {
1275 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1276 $word = substr ( $response, $p, $len ); $p += $len;
1277 list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1278 $result["words"][$word] = array (
1279 "docs"=>sprintf ( "%u", $docs ),
1280 "hits"=>sprintf ( "%u", $hits ) );
1281 }
1282 }
1283
1284 $this->_MBPop ();
1285 return $results;
1286 }
1287
1288 /////////////////////////////////////////////////////////////////////////////
1289 // excerpts generation
1290 /////////////////////////////////////////////////////////////////////////////
1291
1292 /// connect to searchd server, and generate exceprts (snippets)
1293 /// of given documents for given query. returns false on failure,
1294 /// an array of snippets on success
1295 function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1296 {
1297 assert ( is_array($docs) );
1298 assert ( is_string($index) );
1299 assert ( is_string($words) );
1300 assert ( is_array($opts) );
1301
1302 $this->_MBPush ();
1303
1304 if (!( $fp = $this->_Connect() ))
1305 {
1306 $this->_MBPop();
1307 return false;
1308 }
1309
1310 /////////////////
1311 // fixup options
1312 /////////////////
1313
1314 if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
1315 if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
1316 if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
1317 if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
1318 if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0;
1319 if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0;
1320 if ( !isset($opts["around"]) ) $opts["around"] = 5;
1321 if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
1322 if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
1323 if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
1324 if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
1325 if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false;
1326 if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false;
1327 if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1;
1328 if ( !isset($opts["load_files"]) ) $opts["load_files"] = false;
1329 if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index";
1330 if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false;
1331 if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none";
1332 if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false;
1333
1334 /////////////////
1335 // build request
1336 /////////////////
1337
1338 // v.1.2 req
1339 $flags = 1; // remove spaces
1340 if ( $opts["exact_phrase"] ) $flags |= 2;
1341 if ( $opts["single_passage"] ) $flags |= 4;
1342 if ( $opts["use_boundaries"] ) $flags |= 8;
1343 if ( $opts["weight_order"] ) $flags |= 16;
1344 if ( $opts["query_mode"] ) $flags |= 32;
1345 if ( $opts["force_all_words"] ) $flags |= 64;
1346 if ( $opts["load_files"] ) $flags |= 128;
1347 if ( $opts["allow_empty"] ) $flags |= 256;
1348 if ( $opts["emit_zones"] ) $flags |= 512;
1349 $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1350 $req .= pack ( "N", strlen($index) ) . $index; // req index
1351 $req .= pack ( "N", strlen($words) ) . $words; // req words
1352
1353 // options
1354 $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1355 $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1356 $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1357 $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] );
1358 $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2
1359 $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"];
1360 $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"];
1361
1362 // documents
1363 $req .= pack ( "N", count($docs) );
1364 foreach ( $docs as $doc )
1365 {
1366 assert ( is_string($doc) );
1367 $req .= pack ( "N", strlen($doc) ) . $doc;
1368 }
1369
1370 ////////////////////////////
1371 // send query, get response
1372 ////////////////////////////
1373
1374 $len = strlen($req);
1375 $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
1376 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1377 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
1378 {
1379 $this->_MBPop ();
1380 return false;
1381 }
1382
1383 //////////////////
1384 // parse response
1385 //////////////////
1386
1387 $pos = 0;
1388 $res = array ();
1389 $rlen = strlen($response);
1390 for ( $i=0; $i<count($docs); $i++ )
1391 {
1392 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1393 $pos += 4;
1394
1395 if ( $pos+$len > $rlen )
1396 {
1397 $this->_error = "incomplete reply";
1398 $this->_MBPop ();
1399 return false;
1400 }
1401 $res[] = $len ? substr ( $response, $pos, $len ) : "";
1402 $pos += $len;
1403 }
1404
1405 $this->_MBPop ();
1406 return $res;
1407 }
1408
1409
1410 /////////////////////////////////////////////////////////////////////////////
1411 // keyword generation
1412 /////////////////////////////////////////////////////////////////////////////
1413
1414 /// connect to searchd server, and generate keyword list for a given query
1415 /// returns false on failure,
1416 /// an array of words on success
1417 function BuildKeywords ( $query, $index, $hits )
1418 {
1419 assert ( is_string($query) );
1420 assert ( is_string($index) );
1421 assert ( is_bool($hits) );
1422
1423 $this->_MBPush ();
1424
1425 if (!( $fp = $this->_Connect() ))
1426 {
1427 $this->_MBPop();
1428 return false;
1429 }
1430
1431 /////////////////
1432 // build request
1433 /////////////////
1434
1435 // v.1.0 req
1436 $req = pack ( "N", strlen($query) ) . $query; // req query
1437 $req .= pack ( "N", strlen($index) ) . $index; // req index
1438 $req .= pack ( "N", (int)$hits );
1439
1440 ////////////////////////////
1441 // send query, get response
1442 ////////////////////////////
1443
1444 $len = strlen($req);
1445 $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
1446 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
1447 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
1448 {
1449 $this->_MBPop ();
1450 return false;
1451 }
1452
1453 //////////////////
1454 // parse response
1455 //////////////////
1456
1457 $pos = 0;
1458 $res = array ();
1459 $rlen = strlen($response);
1460 list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1461 $pos += 4;
1462 for ( $i=0; $i<$nwords; $i++ )
1463 {
1464 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1465 $tokenized = $len ? substr ( $response, $pos, $len ) : "";
1466 $pos += $len;
1467
1468 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
1469 $normalized = $len ? substr ( $response, $pos, $len ) : "";
1470 $pos += $len;
1471
1472 $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1473
1474 if ( $hits )
1475 {
1476 list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1477 $pos += 8;
1478 $res [$i]["docs"] = $ndocs;
1479 $res [$i]["hits"] = $nhits;
1480 }
1481
1482 if ( $pos > $rlen )
1483 {
1484 $this->_error = "incomplete reply";
1485 $this->_MBPop ();
1486 return false;
1487 }
1488 }
1489
1490 $this->_MBPop ();
1491 return $res;
1492 }
1493
1494 function EscapeString ( $string )
1495 {
1496 $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
1497 $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
1498
1499 return str_replace ( $from, $to, $string );
1500 }
1501
1502 /////////////////////////////////////////////////////////////////////////////
1503 // attribute updates
1504 /////////////////////////////////////////////////////////////////////////////
1505
1506 /// batch update given attributes in given rows in given indexes
1507 /// returns amount of updated documents (0 or more) on success, or -1 on failure
1508 function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1509 {
1510 // verify everything
1511 assert ( is_string($index) );
1512 assert ( is_bool($mva) );
1513
1514 assert ( is_array($attrs) );
1515 foreach ( $attrs as $attr )
1516 assert ( is_string($attr) );
1517
1518 assert ( is_array($values) );
1519 foreach ( $values as $id=>$entry )
1520 {
1521 assert ( is_numeric($id) );
1522 assert ( is_array($entry) );
1523 assert ( count($entry)==count($attrs) );
1524 foreach ( $entry as $v )
1525 {
1526 if ( $mva )
1527 {
1528 assert ( is_array($v) );
1529 foreach ( $v as $vv )
1530 assert ( is_int($vv) );
1531 } else
1532 assert ( is_int($v) );
1533 }
1534 }
1535
1536 // build request
1537 $this->_MBPush ();
1538 $req = pack ( "N", strlen($index) ) . $index;
1539
1540 $req .= pack ( "N", count($attrs) );
1541 foreach ( $attrs as $attr )
1542 {
1543 $req .= pack ( "N", strlen($attr) ) . $attr;
1544 $req .= pack ( "N", $mva ? 1 : 0 );
1545 }
1546
1547 $req .= pack ( "N", count($values) );
1548 foreach ( $values as $id=>$entry )
1549 {
1550 $req .= sphPackU64 ( $id );
1551 foreach ( $entry as $v )
1552 {
1553 $req .= pack ( "N", $mva ? count($v) : $v );
1554 if ( $mva )
1555 foreach ( $v as $vv )
1556 $req .= pack ( "N", $vv );
1557 }
1558 }
1559
1560 // connect, send query, get response
1561 if (!( $fp = $this->_Connect() ))
1562 {
1563 $this->_MBPop ();
1564 return -1;
1565 }
1566
1567 $len = strlen($req);
1568 $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1569 if ( !$this->_Send ( $fp, $req, $len+8 ) )
1570 {
1571 $this->_MBPop ();
1572 return -1;
1573 }
1574
1575 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1576 {
1577 $this->_MBPop ();
1578 return -1;
1579 }
1580
1581 // parse response
1582 list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1583 $this->_MBPop ();
1584 return $updated;
1585 }
1586
1587 /////////////////////////////////////////////////////////////////////////////
1588 // persistent connections
1589 /////////////////////////////////////////////////////////////////////////////
1590
1591 function Open()
1592 {
1593 if ( $this->_socket !== false )
1594 {
1595 $this->_error = 'already connected';
1596 return false;
1597 }
1598 if ( !$fp = $this->_Connect() )
1599 return false;
1600
1601 // command, command version = 0, body length = 4, body = 1
1602 $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
1603 if ( !$this->_Send ( $fp, $req, 12 ) )
1604 return false;
1605
1606 $this->_socket = $fp;
1607 return true;
1608 }
1609
1610 function Close()
1611 {
1612 if ( $this->_socket === false )
1613 {
1614 $this->_error = 'not connected';
1615 return false;
1616 }
1617
1618 fclose ( $this->_socket );
1619 $this->_socket = false;
1620
1621 return true;
1622 }
1623
1624 //////////////////////////////////////////////////////////////////////////
1625 // status
1626 //////////////////////////////////////////////////////////////////////////
1627
1628 function Status ()
1629 {
1630 $this->_MBPush ();
1631 if (!( $fp = $this->_Connect() ))
1632 {
1633 $this->_MBPop();
1634 return false;
1635 }
1636
1637 $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
1638 if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
1639 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
1640 {
1641 $this->_MBPop ();
1642 return false;
1643 }
1644
1645 $res = substr ( $response, 4 ); // just ignore length, error handling, etc
1646 $p = 0;
1647 list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
1648
1649 $res = array();
1650 for ( $i=0; $i<$rows; $i++ )
1651 for ( $j=0; $j<$cols; $j++ )
1652 {
1653 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
1654 $res[$i][] = substr ( $response, $p, $len ); $p += $len;
1655 }
1656
1657 $this->_MBPop ();
1658 return $res;
1659 }
1660
1661 //////////////////////////////////////////////////////////////////////////
1662 // flush
1663 //////////////////////////////////////////////////////////////////////////
1664
1665 function FlushAttributes ()
1666 {
1667 $this->_MBPush ();
1668 if (!( $fp = $this->_Connect() ))
1669 {
1670 $this->_MBPop();
1671 return -1;
1672 }
1673
1674 $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0
1675 if ( !( $this->_Send ( $fp, $req, 8 ) ) ||
1676 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) )
1677 {
1678 $this->_MBPop ();
1679 return -1;
1680 }
1681
1682 $tag = -1;
1683 if ( strlen($response)==4 )
1684 list(,$tag) = unpack ( "N*", $response );
1685 else
1686 $this->_error = "unexpected response length";
1687
1688 $this->_MBPop ();
1689 return $tag;
1690 }
1691 }
1692
1693 //
1694 // $Id: sphinxapi.php 2758 2011-04-04 11:10:44Z kevg $
1695 //