001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.service;
016
017 import java.util.ArrayList;
018 import java.util.List;
019
020 import org.apache.hadoop.conf.Configuration;
021 import org.apache.oozie.CoordinatorActionBean;
022 import org.apache.oozie.WorkflowActionBean;
023 import org.apache.oozie.command.coord.CoordActionCheckCommand;
024 import org.apache.oozie.command.wf.ActionCheckCommand;
025 import org.apache.oozie.store.CoordinatorStore;
026 import org.apache.oozie.store.Store;
027 import org.apache.oozie.store.StoreException;
028 import org.apache.oozie.store.WorkflowStore;
029 import org.apache.oozie.util.XCallable;
030 import org.apache.oozie.util.XLog;
031
032 /**
033 * The Action Checker Service queue ActionCheckCommands to check the status of running actions and
034 * CoordActionCheckCommands to check the status of coordinator actions. The delay between checks on the same action can
035 * be configured.
036 */
037 public class ActionCheckerService implements Service {
038
039 public static final String CONF_PREFIX = Service.CONF_PREFIX + "ActionCheckerService.";
040 /**
041 * The frequency at which the ActionCheckService will run.
042 */
043 public static final String CONF_ACTION_CHECK_INTERVAL = CONF_PREFIX + "action.check.interval";
044 /**
045 * The time, in seconds, between an ActionCheck for the same action.
046 */
047 public static final String CONF_ACTION_CHECK_DELAY = CONF_PREFIX + "action.check.delay";
048
049 /**
050 * The number of callables to be queued in a batch.
051 */
052 public static final String CONF_CALLABLE_BATCH_SIZE = CONF_PREFIX + "callable.batch.size";
053
054 protected static final String INSTRUMENTATION_GROUP = "actionchecker";
055 protected static final String INSTR_CHECK_ACTIONS_COUNTER = "checks_wf_actions";
056 protected static final String INSTR_CHECK_COORD_ACTIONS_COUNTER = "checks_coord_actions";
057
058 /**
059 * {@link ActionCheckRunnable} is the runnable which is scheduled to run and queue Action checks.
060 */
061 static class ActionCheckRunnable<S extends Store> implements Runnable {
062 private int actionCheckDelay;
063 private List<XCallable<Void>> callables;
064 private StringBuilder msg = null;
065
066 public ActionCheckRunnable(int actionCheckDelay) {
067 this.actionCheckDelay = actionCheckDelay;
068 }
069
070 public void run() {
071 XLog.Info.get().clear();
072 XLog log = XLog.getLog(getClass());
073 msg = new StringBuilder();
074 runWFActionCheck();
075 runCoordActionCheck();
076 log.debug("QUEUING [{0}] for potential checking", msg.toString());
077 if (null != callables) {
078 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables);
079 if (ret == false) {
080 log.warn("Unable to queue the callables commands for CheckerService. "
081 + "Most possibly command queue is full. Queue size is :"
082 + Services.get().get(CallableQueueService.class).queueSize());
083 }
084 callables = null;
085 }
086 }
087
088 /**
089 * check workflow actions
090 */
091 private void runWFActionCheck() {
092 XLog.Info.get().clear();
093 XLog log = XLog.getLog(getClass());
094
095 WorkflowStore store = null;
096 try {
097 store = (WorkflowStore) Services.get().get(StoreService.class).getStore(WorkflowStore.class);
098 store.beginTrx();
099 List<WorkflowActionBean> actions = store.getRunningActions(actionCheckDelay);
100 msg.append(" WF_ACTIONS : " + actions.size());
101 for (WorkflowActionBean action : actions) {
102 Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP,
103 INSTR_CHECK_ACTIONS_COUNTER, 1);
104 queueCallable(new ActionCheckCommand(action.getId()));
105 }
106 store.commitTrx();
107 }
108 catch (StoreException ex) {
109 if (store != null) {
110 store.rollbackTrx();
111 }
112 log.warn("Exception while accessing the store", ex);
113 }
114 catch (Exception ex) {
115 log.error("Exception, {0}", ex.getMessage(), ex);
116 if (store != null && store.isActive()) {
117 try {
118 store.rollbackTrx();
119 }
120 catch (RuntimeException rex) {
121 log.warn("openjpa error, {0}", rex.getMessage(), rex);
122 }
123 }
124 }
125 finally {
126 if (store != null) {
127 if (!store.isActive()) {
128 try {
129 store.closeTrx();
130 }
131 catch (RuntimeException rex) {
132 log.warn("Exception while attempting to close store", rex);
133 }
134 }
135 else {
136 log.warn("transaction is not committed or rolled back before closing entitymanager.");
137 }
138 }
139 }
140 }
141
142 /**
143 * check coordinator actions
144 */
145 private void runCoordActionCheck() {
146 XLog.Info.get().clear();
147 XLog log = XLog.getLog(getClass());
148
149 CoordinatorStore store = null;
150 try {
151 store = Services.get().get(StoreService.class).getStore(CoordinatorStore.class);
152 store.beginTrx();
153 List<CoordinatorActionBean> cactions = store.getRunningActionsOlderThan(actionCheckDelay, false);
154 msg.append(" COORD_ACTIONS : " + cactions.size());
155 for (CoordinatorActionBean caction : cactions) {
156 Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP,
157 INSTR_CHECK_COORD_ACTIONS_COUNTER, 1);
158 queueCallable(new CoordActionCheckCommand(caction.getId(), actionCheckDelay));
159 }
160 store.commitTrx();
161 }
162 catch (StoreException ex) {
163 if (store != null) {
164 store.rollbackTrx();
165 }
166 log.warn("Exception while accessing the store", ex);
167 }
168 catch (Exception ex) {
169 log.error("Exception, {0}", ex.getMessage(), ex);
170 if (store != null && store.isActive()) {
171 try {
172 store.rollbackTrx();
173 }
174 catch (RuntimeException rex) {
175 log.warn("openjpa error, {0}", rex.getMessage(), rex);
176 }
177 }
178 }
179 finally {
180 if (store != null) {
181 if (!store.isActive()) {
182 try {
183 store.closeTrx();
184 }
185 catch (RuntimeException rex) {
186 log.warn("Exception while attempting to close store", rex);
187 }
188 }
189 else {
190 log.warn("transaction is not committed or rolled back before closing entitymanager.");
191 }
192 }
193 }
194 }
195
196 /**
197 * Adds callables to a list. If the number of callables in the list reaches {@link
198 * ActionCheckerService#CONF_CALLABLE_BATCH_SIZE}, the entire batch is queued and the callables list is reset.
199 *
200 * @param callable the callable to queue.
201 */
202 private void queueCallable(XCallable<Void> callable) {
203 if (callables == null) {
204 callables = new ArrayList<XCallable<Void>>();
205 }
206 callables.add(callable);
207 if (callables.size() == Services.get().getConf().getInt(CONF_CALLABLE_BATCH_SIZE, 10)) {
208 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables);
209 if (ret == false) {
210 XLog.getLog(getClass()).warn(
211 "Unable to queue the callables commands for CheckerService. "
212 + "Most possibly command queue is full. Queue size is :"
213 + Services.get().get(CallableQueueService.class).queueSize());
214 }
215 callables = new ArrayList<XCallable<Void>>();
216 }
217 }
218 }
219
220 /**
221 * Initializes the Action Check service.
222 *
223 * @param services services instance.
224 */
225 @Override
226 public void init(Services services) {
227 Configuration conf = services.getConf();
228 Runnable actionCheckRunnable = new ActionCheckRunnable(conf.getInt(CONF_ACTION_CHECK_DELAY, 600));
229 services.get(SchedulerService.class).schedule(actionCheckRunnable, 10,
230 conf.getInt(CONF_ACTION_CHECK_INTERVAL, 60), SchedulerService.Unit.SEC);
231 }
232
233 /**
234 * Destroy the Action Checker Services.
235 */
236 @Override
237 public void destroy() {
238 }
239
240 /**
241 * Return the public interface for the action checker service.
242 *
243 * @return {@link ActionCheckerService}.
244 */
245 @Override
246 public Class<? extends Service> getInterface() {
247 return ActionCheckerService.class;
248 }
249 }