xref: /illumos-gate/usr/src/cmd/svc/startd/transition.c (revision d656abb5804319b33c85955a73ee450ef7ff9739)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * transition.c - Graph State Machine
29  *
30  * The graph state machine is implemented here, with a typical approach
31  * of a function per state.  Separating the implementation allows more
32  * clarity into the actions taken on notification of state change, as well
33  * as a place for future expansion including hooks for configurable actions.
34  * All functions are called with dgraph_lock held.
35  *
36  * The start action for this state machine is not explicit.  The states
37  * (ONLINE and DEGRADED) which need to know when they're entering the state
38  * due to a daemon restart implement this understanding by checking for
39  * transition from uninitialized.  In the future, this would likely be better
40  * as an explicit start action instead of relying on an overloaded transition.
41  *
42  * All gt_enter functions use the same set of return codes.
43  *    0              success
44  *    ECONNABORTED   repository connection aborted
45  */
46 
47 #include "startd.h"
48 
49 static int
50 gt_running(restarter_instance_state_t state)
51 {
52 	if (state == RESTARTER_STATE_ONLINE ||
53 	    state == RESTARTER_STATE_DEGRADED)
54 		return (1);
55 
56 	return (0);
57 }
58 
59 static int
60 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
61     restarter_instance_state_t old_state, restarter_error_t rerr)
62 {
63 	int err;
64 	scf_instance_t *inst;
65 
66 	/* Initialize instance by refreshing it. */
67 
68 	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
69 	switch (err) {
70 	case 0:
71 		break;
72 
73 	case ECONNABORTED:
74 		return (ECONNABORTED);
75 
76 	case ENOENT:
77 		return (0);
78 
79 	case EINVAL:
80 	case ENOTSUP:
81 	default:
82 		bad_error("libscf_fmri_get_instance", err);
83 	}
84 
85 	err = refresh_vertex(v, inst);
86 	if (err == 0)
87 		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
88 
89 	scf_instance_destroy(inst);
90 
91 	/* If the service was running, propagate a stop event. */
92 	if (gt_running(old_state)) {
93 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
94 		    v->gv_name);
95 
96 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
97 	}
98 
99 	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
100 	return (0);
101 }
102 
103 /* ARGSUSED */
104 static int
105 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
106     restarter_instance_state_t old_state, restarter_error_t rerr)
107 {
108 	int to_offline = v->gv_flags & GV_TOOFFLINE;
109 
110 	/*
111 	 * If the service was running, propagate a stop event.  If the
112 	 * service was not running the maintenance transition may satisfy
113 	 * optional dependencies and should be propagated to determine
114 	 * whether new dependents are satisfiable.
115 	 * Instances that transition to maintenance and have the GV_TOOFFLINE
116 	 * flag are special because they can expose new subtree leaves so
117 	 * propagate the offline to the instance dependencies.
118 	 */
119 
120 	/* instance transitioning to maintenance is considered disabled */
121 	v->gv_flags &= ~GV_TODISABLE;
122 	v->gv_flags &= ~GV_TOOFFLINE;
123 
124 	if (gt_running(old_state)) {
125 		/*
126 		 * Handle state change during instance disabling.
127 		 * Propagate offline to the new exposed leaves.
128 		 */
129 		if (to_offline) {
130 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
131 			    v->gv_name);
132 
133 			graph_offline_subtree_leaves(v, (void *)h);
134 		}
135 
136 		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
137 		    "%s.\n", v->gv_name);
138 
139 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
140 	} else {
141 		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
142 		    v->gv_name);
143 
144 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
145 	}
146 
147 	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
148 	return (0);
149 }
150 
151 /* ARGSUSED */
152 static int
153 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
154     restarter_instance_state_t old_state, restarter_error_t rerr)
155 {
156 	int to_offline = v->gv_flags & GV_TOOFFLINE;
157 
158 	v->gv_flags &= ~GV_TOOFFLINE;
159 
160 	/*
161 	 * If the instance should be enabled, see if we can start it.
162 	 * Otherwise send a disable command.
163 	 * If a instance has the GV_TOOFFLINE flag set then it must
164 	 * remains offline until the disable process completes.
165 	 */
166 	if (v->gv_flags & GV_ENABLED) {
167 		if (to_offline == 0)
168 			graph_start_if_satisfied(v);
169 	} else {
170 		if (gt_running(old_state) && v->gv_post_disable_f)
171 			v->gv_post_disable_f();
172 
173 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
174 	}
175 
176 	/*
177 	 * If the service was running, propagate a stop event.  If the
178 	 * service was not running the offline transition may satisfy
179 	 * optional dependencies and should be propagated to determine
180 	 * whether new dependents are satisfiable.
181 	 * Instances that transition to offline and have the GV_TOOFFLINE flag
182 	 * are special because they can expose new subtree leaves so propagate
183 	 * the offline to the instance dependencies.
184 	 */
185 	if (gt_running(old_state)) {
186 		/*
187 		 * Handle state change during instance disabling.
188 		 * Propagate offline to the new exposed leaves.
189 		 */
190 		if (to_offline) {
191 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
192 			    v->gv_name);
193 
194 			graph_offline_subtree_leaves(v, (void *)h);
195 		}
196 
197 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
198 		    v->gv_name);
199 
200 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
201 
202 		/*
203 		 * The offline transition may satisfy require_any/restart
204 		 * dependencies and should be propagated to determine
205 		 * whether new dependents are satisfiable.
206 		 */
207 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
208 	} else {
209 		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
210 		    v->gv_name);
211 
212 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
213 	}
214 
215 	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
216 	return (0);
217 }
218 
219 /* ARGSUSED */
220 static int
221 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
222     restarter_instance_state_t old_state, restarter_error_t rerr)
223 {
224 	int to_offline = v->gv_flags & GV_TOOFFLINE;
225 
226 	v->gv_flags &= ~GV_TODISABLE;
227 	v->gv_flags &= ~GV_TOOFFLINE;
228 
229 	/*
230 	 * If the instance should be disabled, no problem.  Otherwise,
231 	 * send an enable command, which should result in the instance
232 	 * moving to OFFLINE unless the instance is part of a subtree
233 	 * (non root) and in this case the result is unpredictable.
234 	 */
235 	if (v->gv_flags & GV_ENABLED) {
236 		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
237 	} else if (gt_running(old_state) && v->gv_post_disable_f) {
238 		v->gv_post_disable_f();
239 	}
240 
241 	/*
242 	 * If the service was running, propagate this as a stop.  If the
243 	 * service was not running the disabled transition may satisfy
244 	 * optional dependencies and should be propagated to determine
245 	 * whether new dependents are satisfiable.
246 	 */
247 	if (gt_running(old_state)) {
248 		/*
249 		 * We need to propagate the offline to new exposed leaves in
250 		 * case we've just disabled an instance that was part of a
251 		 * subtree.
252 		 */
253 		if (to_offline) {
254 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
255 			    v->gv_name);
256 
257 			/*
258 			 * Handle state change during instance disabling.
259 			 * Propagate offline to the new exposed leaves.
260 			 */
261 			graph_offline_subtree_leaves(v, (void *)h);
262 		}
263 
264 
265 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
266 		    v->gv_name);
267 
268 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
269 
270 	} else {
271 		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
272 		    v->gv_name);
273 
274 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
275 	}
276 
277 	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
278 	return (0);
279 }
280 
281 static int
282 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
283     restarter_instance_state_t old_state, restarter_error_t rerr)
284 {
285 	int r;
286 
287 	/*
288 	 * If the instance has just come up, update the start
289 	 * snapshot.
290 	 */
291 	if (gt_running(old_state) == 0) {
292 		/*
293 		 * Don't fire if we're just recovering state
294 		 * after a restart.
295 		 */
296 		if (old_state != RESTARTER_STATE_UNINIT &&
297 		    v->gv_post_online_f)
298 			v->gv_post_online_f();
299 
300 		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
301 		switch (r) {
302 		case 0:
303 		case ENOENT:
304 			/*
305 			 * If ENOENT, the instance must have been
306 			 * deleted.  Pretend we were successful since
307 			 * we should get a delete event later.
308 			 */
309 			break;
310 
311 		case ECONNABORTED:
312 			return (ECONNABORTED);
313 
314 		case EACCES:
315 		case ENOTSUP:
316 		default:
317 			bad_error("libscf_snapshots_poststart", r);
318 		}
319 	}
320 	if (!(v->gv_flags & GV_ENABLED))
321 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
322 
323 	if (gt_running(old_state) == 0) {
324 		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
325 		    v->gv_name);
326 
327 		graph_transition_propagate(v, PROPAGATE_START, rerr);
328 	} else if (rerr == RERR_REFRESH) {
329 		/* For refresh we'll get a message sans state change */
330 
331 		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
332 		    v->gv_name);
333 
334 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
335 	}
336 
337 	return (0);
338 }
339 
340 static int
341 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
342     restarter_instance_state_t old_state, restarter_error_t rerr)
343 {
344 	int r;
345 
346 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
347 	if (r != 0)
348 		return (r);
349 
350 	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
351 	return (0);
352 }
353 
354 static int
355 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
356     restarter_instance_state_t old_state, restarter_error_t rerr)
357 {
358 	int r;
359 
360 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
361 	if (r != 0)
362 		return (r);
363 
364 	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
365 	return (0);
366 }
367 
368 /*
369  * gt_transition() implements the state transition for the graph
370  * state machine.  It can return:
371  *    0              success
372  *    ECONNABORTED   repository connection aborted
373  *
374  * v->gv_state should be set to the state we're transitioning to before
375  * calling this function.
376  */
377 int
378 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
379     restarter_instance_state_t old_state)
380 {
381 	int err;
382 	int lost_repository = 0;
383 
384 	/*
385 	 * If there's a common set of work to be done on exit from the
386 	 * old_state, include it as a separate set of functions here.  For
387 	 * now there's no such work, so there are no gt_exit functions.
388 	 */
389 
390 	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
391 	switch (err) {
392 	case 0:
393 		break;
394 
395 	case ECONNABORTED:
396 		lost_repository = 1;
397 		break;
398 
399 	default:
400 		bad_error("vertex_subgraph_dependencies_shutdown", err);
401 	}
402 
403 	/*
404 	 * Now call the appropriate gt_enter function for the new state.
405 	 */
406 	switch (v->gv_state) {
407 	case RESTARTER_STATE_UNINIT:
408 		err = gt_enter_uninit(h, v, old_state, rerr);
409 		break;
410 
411 	case RESTARTER_STATE_DISABLED:
412 		err = gt_enter_disabled(h, v, old_state, rerr);
413 		break;
414 
415 	case RESTARTER_STATE_OFFLINE:
416 		err = gt_enter_offline(h, v, old_state, rerr);
417 		break;
418 
419 	case RESTARTER_STATE_ONLINE:
420 		err = gt_enter_online(h, v, old_state, rerr);
421 		break;
422 
423 	case RESTARTER_STATE_DEGRADED:
424 		err = gt_enter_degraded(h, v, old_state, rerr);
425 		break;
426 
427 	case RESTARTER_STATE_MAINT:
428 		err = gt_enter_maint(h, v, old_state, rerr);
429 		break;
430 
431 	default:
432 		/* Shouldn't be in an invalid state. */
433 #ifndef NDEBUG
434 		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
435 		    v->gv_state);
436 #endif
437 		abort();
438 	}
439 
440 	switch (err) {
441 	case 0:
442 		break;
443 
444 	case ECONNABORTED:
445 		lost_repository = 1;
446 		break;
447 
448 	default:
449 #ifndef NDEBUG
450 		uu_warn("%s:%d: "
451 		    "gt_enter_%s() failed with unexpected error %d.\n",
452 		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
453 #endif
454 		abort();
455 	}
456 
457 	return (lost_repository ? ECONNABORTED : 0);
458 }
459