xref: /illumos-gate/usr/src/uts/common/sys/dnlc.h (revision e7b9901ea28b51d04ea82ac040c27457112687ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
26 /*	  All Rights Reserved	*/
27 
28 /*
29  * University Copyright- Copyright (c) 1982, 1986, 1988
30  * The Regents of the University of California
31  * All Rights Reserved
32  *
33  * University Acknowledgment- Portions of this document are derived from
34  * software developed by the University of California, Berkeley, and its
35  * contributors.
36  */
37 
38 #ifndef _SYS_DNLC_H
39 #define	_SYS_DNLC_H
40 
41 #ifdef	__cplusplus
42 extern "C" {
43 #endif
44 
45 #include <sys/kstat.h>
46 
47 /*
48  * DNLC - Directory name lookup cache.
49  * There are now two sorts of name caching:
50  *
51  * Standard dnlc: This original cache holds recent mappings
52  *                of <directory vnode, name> to vnode mappings.
53  *
54  * Directory caches: Entire large directories can be cached, subject to
55  *		     memory availability and tunables. A directory cache
56  *		     anchor point must be provided in the xxnode for
57  *		     a directory.
58  */
59 
60 
61 /*
62  * Standard dnlc
63  * =============
64  */
65 
66 /*
67  * This structure describes the elements in the cache of recent
68  * names looked up.
69  *
70  * Note namlen is a uchar_t to conserve space
71  * and alignment padding. The max length of any
72  * pathname component is defined as MAXNAMELEN
73  * which is 256 (including the terminating null).
74  * So provided this doesn't change, we don't include the null,
75  * we always use bcmp to compare strings, and we don't start
76  * storing full names, then we are ok. The space savings are worth it.
77  */
78 typedef struct ncache {
79 	struct ncache *hash_next;	/* hash chain, MUST BE FIRST */
80 	struct ncache *hash_prev;
81 	struct vnode *vp;		/* vnode the name refers to */
82 	struct vnode *dp;		/* vnode of parent of name */
83 	int hash;			/* hash signature */
84 	uchar_t namlen;			/* length of name */
85 	char name[];			/* segment name */
86 } ncache_t;
87 
88 /*
89  * Produce size of struct ncache with space allocated in name string.
90  */
91 #define	NCACHE_SIZE(namelen)	offsetof(ncache_t, name) + (namelen)
92 
93 /*
94  * Hash table bucket structure of name cache entries for fast lookup.
95  */
96 typedef struct nc_hash	{
97 	ncache_t *hash_next;
98 	ncache_t *hash_prev;
99 	kmutex_t hash_lock;
100 } nc_hash_t;
101 
102 /*
103  * Statistics on name cache
104  * Not protected by locks
105  */
106 /*
107  * ncstats has been deprecated, due to the integer size of the counters
108  * which can easily overflow in the dnlc.
109  * It is maintained (at some expense) for compatability.
110  * The preferred interface is the kstat accessible nc_stats below, ehich
111  * is actually shared with directory caching.
112  */
113 struct ncstats {
114 	int	hits;		/* hits that we can really use */
115 	int	misses;		/* cache misses */
116 	int	enters;		/* number of enters done */
117 	int	dbl_enters;	/* number of enters tried when already cached */
118 	int	long_enter;	/* deprecated, no longer accounted */
119 	int	long_look;	/* deprecated, no longer accounted */
120 	int	move_to_front;	/* entry moved to front of hash chain */
121 	int	purges;		/* number of purges of cache */
122 };
123 
124 struct nc_stats {
125 	kstat_named_t ncs_hits;		/* cache hits */
126 	kstat_named_t ncs_misses;	/* cache misses */
127 	kstat_named_t ncs_neg_hits;	/* negative cache hits */
128 	kstat_named_t ncs_enters;	/* enters */
129 	kstat_named_t ncs_dbl_enters;	/* enters when entry already cached */
130 	kstat_named_t ncs_purge_total;	/* total entries prurged */
131 	kstat_named_t ncs_purge_all;	/* dnlc_purge() calls */
132 	kstat_named_t ncs_purge_vp;	/* dnlc_purge_vp() calls */
133 	kstat_named_t ncs_purge_vfs;	/* dnlc_purge_vfs() calls */
134 	kstat_named_t ncs_purge_fs1;	/* dnlc_purge_fs1() calls */
135 	kstat_named_t ncs_pick_free;	/* found a free ncache */
136 	kstat_named_t ncs_pick_heur;	/* found ncache w/ NULL vpages */
137 	kstat_named_t ncs_pick_last;	/* found last ncache on chain */
138 
139 	/* directory caching stats */
140 
141 	kstat_named_t ncs_dir_hits;	/* dir cache hits */
142 	kstat_named_t ncs_dir_misses;	/* dir cache misses */
143 	kstat_named_t ncs_cur_dirs;	/* current # directories cached */
144 	kstat_named_t ncs_dir_num_ents;	/* current # entries cached */
145 	kstat_named_t ncs_dirs_cached;	/* total # directories cached */
146 	kstat_named_t ncs_dir_start_nm;	/* dir start no memory */
147 	kstat_named_t ncs_dir_add_nm;	/* add entry/space - no memory */
148 	kstat_named_t ncs_dir_addabort;	/* add entry/space - abort */
149 	kstat_named_t ncs_dir_add_max;	/* add entry/space - max exceeded */
150 	kstat_named_t ncs_dir_reme_fai;	/* remove entry fail */
151 	kstat_named_t ncs_dir_rems_fai;	/* remove space fail */
152 	kstat_named_t ncs_dir_upd_fail;	/* update space fail */
153 	kstat_named_t ncs_dir_finipurg;	/* fini purges */
154 	kstat_named_t ncs_dir_rec_last;	/* reclaim last */
155 	kstat_named_t ncs_dir_recl_any;	/* reclaim any */
156 };
157 
158 /*
159  * The dnlc hashing function.
160  * Although really a kernel macro we export it to allow validation
161  * of ncache_t entries by mdb. Note, mdb can handle the ASSERT.
162  *
163  * 'hash' and 'namlen' must be l-values. A check is made to ensure
164  * the name length fits into an unsigned char (see ncache_t).
165  */
166 #define	DNLCHASH(name, dvp, hash, namlen)			\
167 	{							\
168 		char Xc;					\
169 		const char *Xcp;				\
170 		hash = (int)((uintptr_t)(dvp)) >> 8;		\
171 		for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)	\
172 			(hash) = ((hash) << 4) + (hash) + Xc;	\
173 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
174 		(namlen) = Xcp - (name);			\
175 	}
176 
177 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
178 
179 #include <sys/vfs.h>
180 #include <sys/vnode.h>
181 
182 extern int ncsize;		/* set in param_init() # of dnlc entries */
183 extern vnode_t negative_cache_vnode;
184 #define	DNLC_NO_VNODE &negative_cache_vnode
185 
186 void	dnlc_init(void);
187 void	dnlc_enter(vnode_t *, const char *, vnode_t *);
188 void	dnlc_update(vnode_t *, const char *, vnode_t *);
189 vnode_t	*dnlc_lookup(vnode_t *, const char *);
190 void	dnlc_purge(void);
191 void	dnlc_purge_vp(vnode_t *);
192 int	dnlc_purge_vfsp(vfs_t *, int);
193 void	dnlc_remove(vnode_t *, const char *);
194 int	dnlc_fs_purge1(struct vnodeops *);
195 void	dnlc_reduce_cache(void *);
196 
197 #endif	/* defined(_KERNEL) */
198 
199 
200 /*
201  * Directory caching interfaces
202  * ============================
203  */
204 
205 /*
206  * Typically for large directories, the file names will be the same or
207  * at least similar lengths. So there's no point in anything more elaborate
208  * than a simple unordered linked list of free space entries.
209  * For small directories the name length distribution doesn't really matter.
210  */
211 typedef struct dcfree {
212 	uint64_t df_handle;		/* fs supplied handle */
213 	struct dcfree *df_next;		/* link to next free entry in bucket */
214 	uint_t df_len;			/* length of free entry */
215 } dcfree_t;
216 
217 typedef struct dcentry {
218 	uint64_t de_handle;		/* fs supplied and returned data */
219 	struct dcentry *de_next;	/* link to next name entry in bucket */
220 	int de_hash;			/* hash signature */
221 	uchar_t de_namelen;		/* length of name excluding null */
222 	char de_name[];			/* name */
223 } dcentry_t;
224 
225 /*
226  * Produce size of struct dcentry with space allocated in name string.
227  */
228 #define	DCENTTRY_SIZE(namelen)	offsetof(dcentry_t, de_name) + (namelen)
229 
230 typedef struct dircache {
231 	struct dircache *dc_next;	/* chain - for purge purposes */
232 	struct dircache *dc_prev;	/* chain - for purge purposes */
233 	int64_t dc_actime;		/* dir access time, from lbolt64 */
234 	dcentry_t **dc_namehash;	/* entry hash table pointer */
235 	dcfree_t **dc_freehash;		/* free entry hash table pointer */
236 	uint_t dc_num_entries;		/* no of named entries */
237 	uint_t dc_num_free;		/* no of free space entries */
238 	uint_t dc_nhash_mask;		/* name hash table size - 1 */
239 	uint_t dc_fhash_mask;		/* free space hash table size - 1 */
240 	struct dcanchor *dc_anchor;	/* back ptr to anchor */
241 	boolean_t dc_complete;		/* cache complete boolean */
242 } dircache_t;
243 
244 typedef struct dcanchor {
245 	void *dca_dircache;	/* pointer to directory cache */
246 	kmutex_t dca_lock;		/* protects the directory cache */
247 } dcanchor_t;
248 
249 /*
250  * Head struct for doubly linked chain of dircache_t
251  * The next and prev fields must match those of a dircache_t
252  */
253 typedef struct {
254 	dircache_t *dch_next;		/* next in chain */
255 	dircache_t *dch_prev;		/* prev in chain */
256 	kmutex_t dch_lock;		/* lock for the chain */
257 } dchead_t;
258 
259 
260 #if defined(_KERNEL)
261 
262 /*
263  * Status returns from the directory cache interfaces
264  */
265 typedef enum {
266 	DOK,		/* operation sucessful */
267 	DNOCACHE,	/* there is no cache */
268 	DFOUND,		/* entry found */
269 	DNOENT,		/* no entry found */
270 	DTOOBIG,	/* exceeds tunable dnlc_max_dir_cache */
271 	DNOMEM		/* no memory */
272 } dcret_t;
273 
274 /*
275  * dnlc_dir_start() requests that a directory be cached.
276  * This must be called initially to enable caching on a directory.
277  * After a successful call, directory entries and free space can be
278  * added (see below) until the directory is marked complete.
279  * "num_entries" is an estimate of the current number of
280  * directory entries. The request is rejected with DNOCACHE
281  * if num_entries falls below the tunable dnlc_dir_min_size (see
282  * below), and rejected with DTOOBIG if it's above dnlc_dir_max_size.
283  * Returns DOK, DNOCACHE, DTOOBIG, DNOMEM.
284  *
285  * Due to memory shortages, directory caches can be purged at
286  * any time. If the last directory cache is purged due to memory
287  * shortage, then the directory cache is marked internally
288  * as "no memory". Future returns will all be DNOCACHE until
289  * the next dnlc_start_dir() which will return DNOMEM once.
290  * This memory shortage may only be transient. It's up to the
291  * file system as to what to do about this condition, but an
292  * attempt to immediately re-build the cache will very likely
293  * lead to the same shortage of memory and a thrashing situation.
294  *
295  * It's file system policy as to when and what size directories to cache.
296  */
297 dcret_t dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries);
298 
299 /*
300  * dnlc_dir_add_entry() adds an entry (name and handle) into a
301  * partial or complete cache. "handle" is a file system specific
302  * quantity that is returned on calls to dnlc_dir_lookup() - see below.
303  * For example, "handle" for ufs holds the inumber and a directory
304  * entry offset. Returns DOK, DNOCACHE, DTOOBIG.
305  */
306 dcret_t dnlc_dir_add_entry(dcanchor_t *dcap, const char *name, uint64_t handle);
307 
308 /*
309  * dnlc_dir_add_space adds free space (length and file system specific
310  * handle) into a partial or complete cache. "handle" is a file
311  * system specific quantity that is returned on calls to
312  * dnlc_dir_rem_space_by_len(). For example, "handle" for ufs holds
313  * the directory entry offset.  Returns DOK, DNOCACHE, DTOOBIG.
314  */
315 dcret_t dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle);
316 
317 /*
318  * dnlc_dir_complete() indicates the previously partial cache is now complete.
319  */
320 void dnlc_dir_complete(dcanchor_t *dcap);
321 
322 /*
323  * dnlc_dir_purge() deletes a partial or complete directory cache
324  */
325 void dnlc_dir_purge(dcanchor_t *dcap);
326 
327 /*
328  * dnlc_dir_lookup() lookups a file name in a directory cache
329  * and returns the file system handle specified on dnlc_dir_add_entry()
330  * in "handlep". Returns DFOUND, DNOENT, DNOCACHE.
331  */
332 dcret_t dnlc_dir_lookup(dcanchor_t *dcap, const char *name, uint64_t *handlep);
333 
334 /*
335  * dnlc_dir_update() amends the handle for an entry in a directory cache
336  * "handle" is the new file system specific handle for the file "name".
337  * Returns DFOUND, DNOENT, DNOCACHE.
338  */
339 dcret_t dnlc_dir_update(dcanchor_t *dcap, const char *name, uint64_t handle);
340 
341 /*
342  * dnlc_dir_rem_entry() removes an entry form a directory cache.
343  * Returns the handle if "handlep" non null.
344  * Returns DFOUND, DNOENT, DNOCACHE.
345  */
346 dcret_t dnlc_dir_rem_entry(dcanchor_t *dcap, const char *name,
347     uint64_t *handlep);
348 
349 /*
350  * dnlc_dir_rem_space_by_len() looks up and returns free space in a
351  * directory cache of at least the given "len". Returns in "handlep"
352  * the handle supplied when adding the free space in dnlc_dir_add_space().
353  * Returns DFOUND, DNOENT, DNOCACHE.
354  */
355 dcret_t dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len,
356     uint64_t *handlep);
357 
358 /*
359  * dnlc_dir_rem_space_by_handle() looks up and removes the free space in
360  * a directory cache with the given handle. Returns DFOUND, DNOENT, DNOCACHE.
361  */
362 dcret_t dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle);
363 
364 /*
365  * dnlc_dir_init() initialises a directory anchor
366  */
367 #define	dnlc_dir_init(dcap) { \
368 	(dcap)->dca_dircache = NULL; \
369 	mutex_init(&(dcap)->dca_lock, NULL, MUTEX_DEFAULT, NULL); }
370 
371 /*
372  * dnlc_dir_fini() is called to indicate the anchor is no longer used.
373  * It ensures there's no directory cache and mutex_destroys the lock
374  */
375 void dnlc_dir_fini(dcanchor_t *dcap);
376 
377 #endif	/* defined(_KERNEL) */
378 
379 #ifdef	__cplusplus
380 }
381 #endif
382 
383 #endif	/* _SYS_DNLC_H */
384