xref: /illumos-gate/usr/src/cmd/svc/startd/transition.c (revision 5f82aa32fbc5dc2c59bca6ff315f44a4c4c9ea86)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2016 RackTop Systems.
26  */
27 
28 
29 /*
30  * transition.c - Graph State Machine
31  *
32  * The graph state machine is implemented here, with a typical approach
33  * of a function per state.  Separating the implementation allows more
34  * clarity into the actions taken on notification of state change, as well
35  * as a place for future expansion including hooks for configurable actions.
36  * All functions are called with dgraph_lock held.
37  *
38  * The start action for this state machine is not explicit.  The states
39  * (ONLINE and DEGRADED) which need to know when they're entering the state
40  * due to a daemon restart implement this understanding by checking for
41  * transition from uninitialized.  In the future, this would likely be better
42  * as an explicit start action instead of relying on an overloaded transition.
43  *
44  * All gt_enter functions use the same set of return codes.
45  *    0              success
46  *    ECONNABORTED   repository connection aborted
47  */
48 
49 #include "startd.h"
50 
51 static int
52 gt_running(restarter_instance_state_t state)
53 {
54 	if (state == RESTARTER_STATE_ONLINE ||
55 	    state == RESTARTER_STATE_DEGRADED)
56 		return (1);
57 
58 	return (0);
59 }
60 
61 static int
62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63     restarter_instance_state_t old_state, restarter_error_t rerr)
64 {
65 	int err;
66 	scf_instance_t *inst;
67 
68 	/* Initialize instance by refreshing it. */
69 
70 	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71 	switch (err) {
72 	case 0:
73 		break;
74 
75 	case ECONNABORTED:
76 		return (ECONNABORTED);
77 
78 	case ENOENT:
79 		return (0);
80 
81 	case EINVAL:
82 	case ENOTSUP:
83 	default:
84 		bad_error("libscf_fmri_get_instance", err);
85 	}
86 
87 	err = refresh_vertex(v, inst);
88 	if (err == 0)
89 		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90 
91 	scf_instance_destroy(inst);
92 
93 	/* If the service was running, propagate a stop event. */
94 	if (gt_running(old_state)) {
95 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96 		    v->gv_name);
97 
98 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99 	}
100 
101 	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102 	return (0);
103 }
104 
105 /* ARGSUSED */
106 static int
107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108     restarter_instance_state_t old_state, restarter_error_t rerr)
109 {
110 	int to_offline = v->gv_flags & GV_TOOFFLINE;
111 
112 	/*
113 	 * If the service was running, propagate a stop event.  If the
114 	 * service was not running the maintenance transition may satisfy
115 	 * optional dependencies and should be propagated to determine
116 	 * whether new dependents are satisfiable.
117 	 * Instances that transition to maintenance and have the GV_TOOFFLINE
118 	 * flag are special because they can expose new subtree leaves so
119 	 * propagate the offline to the instance dependencies.
120 	 */
121 
122 	/* instance transitioning to maintenance is considered disabled */
123 	v->gv_flags &= ~GV_TODISABLE;
124 	v->gv_flags &= ~GV_TOOFFLINE;
125 
126 	if (gt_running(old_state)) {
127 		/*
128 		 * Handle state change during instance disabling.
129 		 * Propagate offline to the new exposed leaves.
130 		 */
131 		if (to_offline) {
132 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
133 			    v->gv_name);
134 
135 			graph_offline_subtree_leaves(v, (void *)h);
136 		}
137 
138 		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139 		    "%s.\n", v->gv_name);
140 
141 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142 
143 		/*
144 		 * The maintenance transition may satisfy optional_all/restart
145 		 * dependencies and should be propagated to determine
146 		 * whether new dependents are satisfiable.
147 		 */
148 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149 	} else {
150 		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151 		    v->gv_name);
152 
153 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154 	}
155 
156 	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157 	return (0);
158 }
159 
160 /* ARGSUSED */
161 static int
162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163     restarter_instance_state_t old_state, restarter_error_t rerr)
164 {
165 	int to_offline = v->gv_flags & GV_TOOFFLINE;
166 	int to_disable = v->gv_flags & GV_TODISABLE;
167 
168 	v->gv_flags &= ~GV_TOOFFLINE;
169 
170 	/*
171 	 * If the instance should be enabled, see if we can start it.
172 	 * Otherwise send a disable command.
173 	 * If a instance has the GV_TOOFFLINE flag set then it must
174 	 * remains offline until the disable process completes.
175 	 */
176 	if (v->gv_flags & GV_ENABLED) {
177 		if (to_offline == 0 && to_disable == 0)
178 			graph_start_if_satisfied(v);
179 	} else {
180 		if (gt_running(old_state) && v->gv_post_disable_f)
181 			v->gv_post_disable_f();
182 
183 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
184 	}
185 
186 	/*
187 	 * If the service was running, propagate a stop event.  If the
188 	 * service was not running the offline transition may satisfy
189 	 * optional dependencies and should be propagated to determine
190 	 * whether new dependents are satisfiable.
191 	 * Instances that transition to offline and have the GV_TOOFFLINE flag
192 	 * are special because they can expose new subtree leaves so propagate
193 	 * the offline to the instance dependencies.
194 	 */
195 	if (gt_running(old_state)) {
196 		/*
197 		 * Handle state change during instance disabling.
198 		 * Propagate offline to the new exposed leaves.
199 		 */
200 		if (to_offline) {
201 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
202 			    v->gv_name);
203 
204 			graph_offline_subtree_leaves(v, (void *)h);
205 		}
206 
207 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
208 		    v->gv_name);
209 
210 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
211 
212 		/*
213 		 * The offline transition may satisfy require_any/restart
214 		 * dependencies and should be propagated to determine
215 		 * whether new dependents are satisfiable.
216 		 */
217 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
218 	} else {
219 		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
220 		    v->gv_name);
221 
222 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
223 	}
224 
225 	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
226 	return (0);
227 }
228 
229 /* ARGSUSED */
230 static int
231 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
232     restarter_instance_state_t old_state, restarter_error_t rerr)
233 {
234 	int to_offline = v->gv_flags & GV_TOOFFLINE;
235 
236 	v->gv_flags &= ~GV_TODISABLE;
237 	v->gv_flags &= ~GV_TOOFFLINE;
238 
239 	/*
240 	 * If the instance should be disabled, no problem.  Otherwise,
241 	 * send an enable command, which should result in the instance
242 	 * moving to OFFLINE unless the instance is part of a subtree
243 	 * (non root) and in this case the result is unpredictable.
244 	 */
245 	if (v->gv_flags & GV_ENABLED) {
246 		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
247 	} else if (gt_running(old_state) && v->gv_post_disable_f) {
248 		v->gv_post_disable_f();
249 	}
250 
251 	/*
252 	 * If the service was running, propagate this as a stop.  If the
253 	 * service was not running the disabled transition may satisfy
254 	 * optional dependencies and should be propagated to determine
255 	 * whether new dependents are satisfiable.
256 	 */
257 	if (gt_running(old_state)) {
258 		/*
259 		 * We need to propagate the offline to new exposed leaves in
260 		 * case we've just disabled an instance that was part of a
261 		 * subtree.
262 		 */
263 		if (to_offline) {
264 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
265 			    v->gv_name);
266 
267 			/*
268 			 * Handle state change during instance disabling.
269 			 * Propagate offline to the new exposed leaves.
270 			 */
271 			graph_offline_subtree_leaves(v, (void *)h);
272 		}
273 
274 
275 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
276 		    v->gv_name);
277 
278 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
279 
280 		/*
281 		 * The disable transition may satisfy optional_all/restart
282 		 * dependencies and should be propagated to determine
283 		 * whether new dependents are satisfiable.
284 		 */
285 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
286 	} else {
287 		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
288 		    v->gv_name);
289 
290 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
291 	}
292 
293 	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
294 	return (0);
295 }
296 
297 static int
298 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
299     restarter_instance_state_t old_state, restarter_error_t rerr)
300 {
301 	int r;
302 
303 	/*
304 	 * If the instance has just come up, update the start
305 	 * snapshot.
306 	 */
307 	if (gt_running(old_state) == 0) {
308 		/*
309 		 * Don't fire if we're just recovering state
310 		 * after a restart.
311 		 */
312 		if (old_state != RESTARTER_STATE_UNINIT &&
313 		    v->gv_post_online_f)
314 			v->gv_post_online_f();
315 
316 		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
317 		switch (r) {
318 		case 0:
319 		case ENOENT:
320 			/*
321 			 * If ENOENT, the instance must have been
322 			 * deleted.  Pretend we were successful since
323 			 * we should get a delete event later.
324 			 */
325 			break;
326 
327 		case ECONNABORTED:
328 			return (ECONNABORTED);
329 
330 		case EACCES:
331 		case ENOTSUP:
332 		default:
333 			bad_error("libscf_snapshots_poststart", r);
334 		}
335 	}
336 
337 	if (!(v->gv_flags & GV_ENABLED)) {
338 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
339 	} else if (v->gv_flags & GV_TOOFFLINE) {
340 		/*
341 		 * If the vertex has the GV_TOOFFLINE flag set then that's
342 		 * because the instance was transitioning from offline to
343 		 * online and the reverse disable algorithm doesn't offline
344 		 * those instances because it was already appearing offline.
345 		 * So do it now.
346 		 */
347 		offline_vertex(v);
348 	}
349 
350 	if (gt_running(old_state) == 0) {
351 		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
352 		    v->gv_name);
353 
354 		graph_transition_propagate(v, PROPAGATE_START, rerr);
355 	} else if (rerr == RERR_REFRESH) {
356 		/* For refresh we'll get a message sans state change */
357 
358 		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
359 		    v->gv_name);
360 
361 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
362 	}
363 
364 	return (0);
365 }
366 
367 static int
368 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
369     restarter_instance_state_t old_state, restarter_error_t rerr)
370 {
371 	int r;
372 
373 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
374 	if (r != 0)
375 		return (r);
376 
377 	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
378 	return (0);
379 }
380 
381 static int
382 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
383     restarter_instance_state_t old_state, restarter_error_t rerr)
384 {
385 	int r;
386 
387 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
388 	if (r != 0)
389 		return (r);
390 
391 	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
392 	return (0);
393 }
394 
395 /*
396  * gt_transition() implements the state transition for the graph
397  * state machine.  It can return:
398  *    0              success
399  *    ECONNABORTED   repository connection aborted
400  *
401  * v->gv_state should be set to the state we're transitioning to before
402  * calling this function.
403  */
404 int
405 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
406     restarter_instance_state_t old_state)
407 {
408 	int err;
409 	int lost_repository = 0;
410 
411 	/*
412 	 * If there's a common set of work to be done on exit from the
413 	 * old_state, include it as a separate set of functions here.  For
414 	 * now there's no such work, so there are no gt_exit functions.
415 	 */
416 
417 	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
418 	switch (err) {
419 	case 0:
420 		break;
421 
422 	case ECONNABORTED:
423 		lost_repository = 1;
424 		break;
425 
426 	default:
427 		bad_error("vertex_subgraph_dependencies_shutdown", err);
428 	}
429 
430 	/*
431 	 * Now call the appropriate gt_enter function for the new state.
432 	 */
433 	switch (v->gv_state) {
434 	case RESTARTER_STATE_UNINIT:
435 		err = gt_enter_uninit(h, v, old_state, rerr);
436 		break;
437 
438 	case RESTARTER_STATE_DISABLED:
439 		err = gt_enter_disabled(h, v, old_state, rerr);
440 		break;
441 
442 	case RESTARTER_STATE_OFFLINE:
443 		err = gt_enter_offline(h, v, old_state, rerr);
444 		break;
445 
446 	case RESTARTER_STATE_ONLINE:
447 		err = gt_enter_online(h, v, old_state, rerr);
448 		break;
449 
450 	case RESTARTER_STATE_DEGRADED:
451 		err = gt_enter_degraded(h, v, old_state, rerr);
452 		break;
453 
454 	case RESTARTER_STATE_MAINT:
455 		err = gt_enter_maint(h, v, old_state, rerr);
456 		break;
457 
458 	default:
459 		/* Shouldn't be in an invalid state. */
460 #ifndef NDEBUG
461 		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
462 		    v->gv_state);
463 #endif
464 		abort();
465 	}
466 
467 	switch (err) {
468 	case 0:
469 		break;
470 
471 	case ECONNABORTED:
472 		lost_repository = 1;
473 		break;
474 
475 	default:
476 #ifndef NDEBUG
477 		uu_warn("%s:%d: "
478 		    "gt_enter_%s() failed with unexpected error %d.\n",
479 		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
480 #endif
481 		abort();
482 	}
483 
484 	return (lost_repository ? ECONNABORTED : 0);
485 }
486