xref: /illumos-gate/usr/src/cmd/refer/hunt2.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved. The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include "refer..c"
18 
19 static int *coord = 0;
20 int hh[50];
21 extern int *hfreq, hfrflg;
22 extern int prfreqs;
23 union ptr {
24 	unsigned *a;
25 	long *b;
26 };
27 
28 extern int hash();
29 extern void shell();
30 extern void *zalloc();
31 
32 static long getl(FILE *);
33 static int hcomp(int, int);
34 static void hexch(int, int);
35 
36 int
37 doquery(long *hpt, int nhash, FILE *fb, int nitem,
38 	    char *qitem[], unsigned *mptr)
39 {
40 	long k;
41 	union ptr prevdrop, master;
42 	int nf = 0, best = 0, nterm = 0, i, g, j;
43 	int *prevcoord;
44 	long lp;
45 	extern int lmaster, colevel, reached;
46 	extern int iflong;
47 
48 	if (iflong) {
49 		master.b = (long *)mptr;
50 	} else {
51 		master.a = mptr;
52 	}
53 
54 #if D1
55 	fprintf(stderr, "entering doquery nitem %d\n", nitem);
56 	fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
57 	    hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
58 	fprintf(stderr, "and frequencies are  %d %d %d %d %d\n",
59 	    hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
60 #endif
61 	assert(lmaster > 0);
62 	if (coord == 0)
63 		coord = (int *)zalloc(lmaster, sizeof (lmaster));
64 	if (colevel > 0) {
65 		if (iflong)
66 			prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
67 		else
68 			prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
69 		prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
70 	} else {
71 		prevdrop.a = master.a;
72 		prevcoord = coord;
73 	}
74 #if D1
75 	fprintf(stderr, "nitem %d\n", nitem);
76 #endif
77 	for (i = 0; i < nitem; i++) {
78 		hh[i] = hash(qitem[i])%nhash;
79 #if D1
80 		fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
81 #endif
82 	}
83 #if D1
84 	fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
85 #endif
86 	if (prfreqs)
87 		for (i = 0; i < nitem; i++)
88 			fprintf(stderr, "item %s hash %d hfreq %d\n",
89 			    qitem[i], hh[i], hfreq[hh[i]]);
90 	/* if possible, sort query into decreasing frequency of hashes */
91 	if (hfrflg)
92 		shell(nitem, hcomp, hexch);
93 #if D1
94 	for (i = 0; i < nitem; i++)
95 		fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
96 #endif
97 	lp = hpt [hh[0]];
98 #if D1
99 	fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
100 #endif
101 	assert(fb != NULL);
102 	assert(fseek(fb, lp, 0) != -1);
103 	for (i = 0; i < lmaster; i++) {
104 		if (iflong)
105 			master.b[i] = getl(fb);
106 		else
107 			master.a[i] = getw(fb);
108 		coord[i] = 1;
109 #if D2
110 		if (iflong)
111 			fprintf(stderr, "master has %ld\n", (master.b[i]));
112 		else
113 			fprintf(stderr, "master has %d\n", (master.a[i]));
114 #endif
115 		assert(i < lmaster);
116 		if (iflong) {
117 			if (master.b[i] == -1L) break;
118 		} else {
119 			if (master.a[i] == -1) break;
120 		}
121 	}
122 	nf = i;
123 	for (nterm = 1; nterm < nitem; nterm++) {
124 #ifdef D1
125 		fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
126 #endif
127 		if (colevel > 0) {
128 			for (j = 0; j < nf; j++) {
129 				if (iflong)
130 					prevdrop.b[j] = master.b[j];
131 				else
132 					prevdrop.a[j] = master.a[j];
133 				prevcoord[j] = coord[j];
134 			}
135 		}
136 		lp = hpt[hh[nterm]];
137 		assert(fseek(fb, lp, 0) != -1);
138 #if D1
139 		fprintf(stderr, "item %d hash %d seek to %ld\n",
140 		    nterm, hh[nterm], lp);
141 #endif
142 		g = j = 0;
143 		while (1) {
144 			if (iflong)
145 				k = getl(fb);
146 			else
147 				k = getw(fb);
148 			if (k == -1) break;
149 #if D2
150 			fprintf(stderr, "next term finds %ld\n", k);
151 #endif
152 #if D3
153 			if (iflong)
154 				fprintf(stderr,
155 				    "bfwh j %d nf %d master %ld k %ld\n",
156 				    j, nf, prevdrop.b[j], (long)(k));
157 			else
158 				fprintf(stderr,
159 				    "bfwh j %d nf %d master %ld k %ld\n",
160 				    j, nf, prevdrop.a[j], (long)(k));
161 #endif
162 			while (j < nf &&
163 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
164 #if D3
165 				if (iflong)
166 					fprintf(stderr, "j %d nf %d prevdrop "
167 					    "%ld prevcoord %d colevel %d nterm "
168 					    "%d k %ld\n", j, nf, prevdrop.b[j],
169 					    prevcoord[j], colevel, nterm,
170 					    (long)(k));
171 				else
172 					fprintf(stderr, "j %d nf %d prevdrop "
173 					    "%ld prevcoord %d colevel %d nterm "
174 					    "%d k %ld\n", j, nf, prevdrop.a[j],
175 					    prevcoord[j], colevel, nterm,
176 					    (long)(k));
177 #endif
178 				if (prevcoord[j] + colevel <= nterm)
179 					j++;
180 				else {
181 					assert(g < lmaster);
182 					if (iflong)
183 						master.b[g] = prevdrop.b[j];
184 					else
185 						master.a[g] = prevdrop.a[j];
186 					coord[g++] = prevcoord[j++];
187 #if D1
188 					if (iflong)
189 						fprintf(stderr, " not skip g "
190 						    "%d doc %d coord %d note "
191 						    "%d\n", g, master.b[g-1],
192 						    coord[g-1], master.b[j-1]);
193 					else
194 						fprintf(stderr, " not skip g "
195 						    "%d doc %ld coord %d nterm "
196 						    "%d\n", g, master.a[g-1],
197 						    coord[g-1], nterm);
198 #endif
199 					continue;
200 				}
201 			}
202 			if (colevel == 0 && j >= nf) break;
203 			if (j < nf &&
204 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
205 				if (iflong)
206 					master.b[g] = k;
207 				else
208 					master.a[g] = k;
209 				coord[g++] = prevcoord[j++]+1;
210 #if D1
211 				if (iflong)
212 					fprintf(stderr, " at g %d item %ld "
213 					    "coord %d note %ld\n", g,
214 					    master.b[g-1], coord[g-1],
215 					    master.b[j-1]);
216 				else
217 					fprintf(stderr, " at g %d item %d "
218 					    "coord %d note %d\n", g,
219 					    master.a[g-1], coord[g-1],
220 					    master.a[j-1]);
221 #endif
222 			} else
223 				if (colevel >= nterm) {
224 					if (iflong)
225 						master.b[g] = k;
226 					else
227 						master.a[g] = k;
228 					coord[g++] = 1;
229 				}
230 		}
231 #if D1
232 		fprintf(stderr, "now have %d items\n", g);
233 #endif
234 		if (colevel > 0)
235 			for (; j < nf; j++)
236 				if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
237 				    colevel > nterm) {
238 					assert(g < lmaster);
239 					if (iflong)
240 						master.b[g] = prevdrop.b[j];
241 					else
242 						master.a[g] = prevdrop.a[j];
243 					coord[g++] = prevcoord[j];
244 #if D3
245 					if (iflong)
246 						fprintf(stderr, "copied over "
247 						    "%ld coord %d\n",
248 						    master.b[g-1], coord[g-1]);
249 					else
250 						fprintf(stderr, "copied over "
251 						    "%d coord %d\n",
252 						    master.a[g-1], coord[g-1]);
253 #endif
254 				}
255 		nf = g;
256 	}
257 	if (colevel > 0) {
258 		best = 0;
259 		for (j = 0; j < nf; j++)
260 			if (coord[j] > best) best = coord[j];
261 #if D1
262 		fprintf(stderr, "colevel %d best %d\n", colevel, best);
263 #endif
264 		reached = best;
265 		for (g = j = 0; j < nf; j++)
266 			if (coord[j] == best) {
267 				if (iflong)
268 					master.b[g++] = master.b[j];
269 				else
270 					master.a[g++] = master.a[j];
271 			}
272 		nf = g;
273 #if D1
274 		fprintf(stderr, "yet got %d\n", nf);
275 #endif
276 	}
277 #ifdef D1
278 	fprintf(stderr, " returning with %d\n", nf);
279 #endif
280 	if (colevel) {
281 		free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
282 		free(prevcoord, lmaster, sizeof (lmaster));
283 	}
284 #if D3
285 	for (g = 0; g < nf; g++)
286 		if (iflong)
287 			fprintf(stderr, ":%ld\n", master.b[g]);
288 		else
289 			fprintf(stderr, ":%d\n", master.a[g]);
290 #endif
291 	return (nf);
292 }
293 
294 static long
295 getl(FILE *fb)
296 {
297 	return (getw(fb));
298 }
299 
300 static int
301 hcomp(int n1, int n2)
302 {
303 	return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
304 }
305 
306 static void
307 hexch(int n1, int n2)
308 {
309 	int t;
310 	t = hh[n1];
311 	hh[n1] = hh[n2];
312 	hh[n2] = t;
313 }
314