001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.command.coord;
016
017 import java.io.IOException;
018 import java.io.StringReader;
019 import java.util.ArrayList;
020 import java.util.Date;
021 import java.util.HashSet;
022 import java.util.List;
023 import java.util.Set;
024
025 import org.apache.hadoop.conf.Configuration;
026 import org.apache.hadoop.fs.Path;
027 import org.apache.oozie.CoordinatorActionBean;
028 import org.apache.oozie.CoordinatorActionInfo;
029 import org.apache.oozie.CoordinatorJobBean;
030 import org.apache.oozie.ErrorCode;
031 import org.apache.oozie.XException;
032 import org.apache.oozie.action.ActionExecutorException;
033 import org.apache.oozie.action.hadoop.FsActionExecutor;
034 import org.apache.oozie.client.CoordinatorAction;
035 import org.apache.oozie.client.CoordinatorJob;
036 import org.apache.oozie.client.SLAEvent.SlaAppType;
037 import org.apache.oozie.client.rest.RestConstants;
038 import org.apache.oozie.command.CommandException;
039 import org.apache.oozie.coord.CoordELFunctions;
040 import org.apache.oozie.store.CoordinatorStore;
041 import org.apache.oozie.store.StoreException;
042 import org.apache.oozie.util.DateUtils;
043 import org.apache.oozie.util.ParamChecker;
044 import org.apache.oozie.util.XConfiguration;
045 import org.apache.oozie.util.XLog;
046 import org.apache.oozie.util.XmlUtils;
047 import org.apache.oozie.util.db.SLADbOperations;
048 import org.jdom.Element;
049 import org.jdom.JDOMException;
050
051 public class CoordRerunCommand extends CoordinatorCommand<CoordinatorActionInfo> {
052
053 private String jobId;
054 private String rerunType;
055 private String scope;
056 private boolean refresh;
057 private boolean noCleanup;
058 private final XLog log = XLog.getLog(getClass());
059
060 public CoordRerunCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) {
061 super("coord_rerun", "coord_rerun", 1, XLog.STD);
062 this.jobId = ParamChecker.notEmpty(jobId, "jobId");
063 this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType");
064 this.scope = ParamChecker.notEmpty(scope, "scope");
065 this.refresh = refresh;
066 this.noCleanup = noCleanup;
067 }
068
069 @Override
070 protected CoordinatorActionInfo call(CoordinatorStore store) throws StoreException, CommandException {
071 try {
072 CoordinatorJobBean coordJob = store.getCoordinatorJob(jobId, false);
073 CoordinatorActionInfo coordInfo = null;
074 setLogInfo(coordJob);
075 if (coordJob.getStatus() != CoordinatorJob.Status.KILLED
076 && coordJob.getStatus() != CoordinatorJob.Status.FAILED) {
077 incrJobCounter(1);
078
079 List<CoordinatorActionBean> coordActions;
080 if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) {
081 coordActions = getCoordActionsFromDates(jobId, scope, store);
082 }
083 else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) {
084 coordActions = getCoordActionsFromIds(jobId, scope, store);
085 }
086 else {
087 throw new CommandException(ErrorCode.E1018, "date or action expected.");
088 }
089 if (checkAllActionsRunnable(coordActions)) {
090 for (CoordinatorActionBean coordAction : coordActions) {
091 String actionXml = coordAction.getActionXml();
092 if (!noCleanup) {
093 Element eAction = XmlUtils.parseXml(actionXml);
094 cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup());
095 }
096 if (refresh) {
097 refreshAction(coordJob, coordAction, store);
098 }
099 updateAction(coordJob, coordAction, actionXml, store);
100
101 // TODO: time 100s should be configurable
102 queueCallable(new CoordActionNotification(coordAction), 100);
103 queueCallable(new CoordActionInputCheckCommand(coordAction.getId()), 100);
104 }
105 }
106 else {
107 throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
108 }
109 coordInfo = new CoordinatorActionInfo(coordActions);
110 }
111 else {
112 log.info("CoordRerunCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid="
113 + jobId);
114 throw new CommandException(ErrorCode.E1018,
115 "coordinator job is killed or failed so all actions are not eligible to rerun!");
116 }
117 return coordInfo;
118 }
119 catch (XException xex) {
120 throw new CommandException(xex);
121 }
122 catch (JDOMException jex) {
123 throw new CommandException(ErrorCode.E0700, jex);
124 }
125 catch (Exception ex) {
126 throw new CommandException(ErrorCode.E1018, ex);
127 }
128 }
129
130 /**
131 * Get the list of actions for given id ranges
132 *
133 * @param jobId
134 * @param scope
135 * @param store
136 * @return the list of all actions to rerun
137 * @throws CommandException
138 * @throws StoreException
139 */
140 private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope, CoordinatorStore store)
141 throws CommandException, StoreException {
142 ParamChecker.notEmpty(jobId, "jobId");
143 ParamChecker.notEmpty(scope, "scope");
144
145 Set<String> actions = new HashSet<String>();
146 String[] list = scope.split(",");
147 for (String s : list) {
148 s = s.trim();
149 if (s.contains("-")) {
150 String[] range = s.split("-");
151 if (range.length != 2) {
152 throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
153 }
154 int start;
155 int end;
156 try {
157 start = Integer.parseInt(range[0].trim());
158 end = Integer.parseInt(range[1].trim());
159 if (start > end) {
160 throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
161 }
162 }
163 catch (NumberFormatException ne) {
164 throw new CommandException(ErrorCode.E0302, ne);
165 }
166 for (int i = start; i <= end; i++) {
167 actions.add(jobId + "@" + i);
168 }
169 }
170 else {
171 try {
172 Integer.parseInt(s);
173 }
174 catch (NumberFormatException ne) {
175 throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s
176 + "'. Integer only.");
177 }
178 actions.add(jobId + "@" + s);
179 }
180 }
181
182 List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
183 for (String id : actions) {
184 CoordinatorActionBean coordAction = store.getCoordinatorAction(id, false);
185 coordActions.add(coordAction);
186 log.debug("Rerun coordinator for actionId='" + id + "'");
187 }
188 return coordActions;
189 }
190
191 /**
192 * Get the list of actions for given date ranges
193 *
194 * @param jobId
195 * @param scope
196 * @param store
197 * @return the list of dates to rerun
198 * @throws CommandException
199 * @throws StoreException
200 */
201 private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope, CoordinatorStore store)
202 throws CommandException, StoreException {
203 ParamChecker.notEmpty(jobId, "jobId");
204 ParamChecker.notEmpty(scope, "scope");
205
206 Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>();
207 String[] list = scope.split(",");
208 for (String s : list) {
209 s = s.trim();
210 if (s.contains("::")) {
211 String[] dateRange = s.split("::");
212 if (dateRange.length != 2) {
213 throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'");
214 }
215 Date start;
216 Date end;
217 try {
218 start = DateUtils.parseDateUTC(dateRange[0].trim());
219 end = DateUtils.parseDateUTC(dateRange[1].trim());
220 if (start.after(end)) {
221 throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'");
222 }
223 }
224 catch (Exception e) {
225 throw new CommandException(ErrorCode.E0302, e);
226 }
227
228 List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end, store);
229 actionSet.addAll(listOfActions);
230 }
231 else {
232 Date date;
233 try {
234 date = DateUtils.parseDateUTC(s.trim());
235 }
236 catch (Exception e) {
237 throw new CommandException(ErrorCode.E0302, e);
238 }
239
240 CoordinatorActionBean coordAction = store.getCoordActionForNominalTime(jobId, date);
241 actionSet.add(coordAction);
242 }
243 }
244
245 List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
246 for (CoordinatorActionBean coordAction : actionSet) {
247 coordActions.add(coordAction);
248 log.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'");
249 }
250 return coordActions;
251 }
252
253 private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end,
254 CoordinatorStore store)
255 throws StoreException {
256 List<CoordinatorActionBean> list = store.getCoordActionsForDates(jobId, start, end);
257 return list;
258 }
259
260 /**
261 * Check if all given actions are eligible to rerun.
262 *
263 * @param actions list of CoordinatorActionBean
264 * @return true if all actions are eligible to rerun
265 */
266 private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) {
267 for (CoordinatorActionBean coordAction : coordActions) {
268 if (!coordAction.isTerminalStatus()) {
269 return false;
270 }
271 }
272 return true;
273 }
274
275 /**
276 * Cleanup output-events directories
277 *
278 * @param eAction
279 * @param workflow
280 * @param action
281 */
282 @SuppressWarnings("unchecked")
283 private void cleanupOutputEvents(Element eAction, String user, String group) {
284 Element outputList = eAction.getChild("output-events", eAction.getNamespace());
285 for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
286 if (data.getChild("uris", data.getNamespace()) != null) {
287 String uris = data.getChild("uris", data.getNamespace()).getTextTrim();
288 if (uris != null) {
289 String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR);
290 FsActionExecutor fsAe = new FsActionExecutor();
291 for (String uri : uriArr) {
292 Path path = new Path(uri);
293 try {
294 fsAe.delete(user, group, path);
295 log.debug("Cleanup the output dir " + path);
296 }
297 catch (ActionExecutorException ae) {
298 log.warn("Failed to cleanup the output dir " + uri, ae);
299 }
300 }
301 }
302
303 }
304 }
305 }
306
307 /**
308 * Refresh an Action
309 *
310 * @param coordJob
311 * @param coordAction
312 * @param store
313 * @throws Exception
314 */
315 private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, CoordinatorStore store)
316 throws Exception {
317 Configuration jobConf = null;
318 try {
319 jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
320 }
321 catch (IOException ioe) {
322 log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
323 throw new CommandException(ErrorCode.E1005, ioe);
324 }
325 String jobXml = coordJob.getJobXml();
326 Element eJob = XmlUtils.parseXml(jobXml);
327 String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction
328 .getNominalTime(), coordAction.getActionNumber(), jobConf, coordAction);
329 log.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml="
330 + XmlUtils.prettyPrint(actionXml).toString());
331 coordAction.setActionXml(actionXml);
332 }
333
334 /**
335 * Update an Action into database table
336 *
337 * @param coordJob
338 * @param coordAction
339 * @param actionXml
340 * @param store
341 * @throws Exception
342 */
343 private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml,
344 CoordinatorStore store) throws Exception {
345 log.debug("updateAction for actionId=" + coordAction.getId());
346 coordAction.setStatus(CoordinatorAction.Status.WAITING);
347 coordAction.setExternalId("");
348 coordAction.setExternalStatus("");
349 coordAction.setRerunTime(new Date());
350 store.updateCoordinatorAction(coordAction);
351 writeActionRegistration(coordAction.getActionXml(), coordAction, store, coordJob.getUser(), coordJob.getGroup());
352 }
353
354 /**
355 * Create SLA RegistrationEvent
356 *
357 * @param actionXml
358 * @param actionBean
359 * @param store
360 * @param user
361 * @param group
362 * @throws Exception
363 */
364 private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, CoordinatorStore store,
365 String user, String group)
366 throws Exception {
367 Element eAction = XmlUtils.parseXml(actionXml);
368 Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
369 SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user,
370 group);
371 }
372
373 @Override
374 protected CoordinatorActionInfo execute(CoordinatorStore store) throws StoreException, CommandException {
375 log.info("STARTED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
376 CoordinatorActionInfo coordInfo = null;
377 try {
378 if (lock(jobId)) {
379 coordInfo = call(store);
380 }
381 else {
382 queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
383 log.warn("CoordRerunCommand lock was not acquired - " + " failed " + jobId + ". Requeing the same.");
384 }
385 }
386 catch (InterruptedException e) {
387 queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
388 log.warn("CoordRerunCommand lock acquiring failed " + " with exception " + e.getMessage() + " for job id "
389 + jobId + ". Requeing the same.");
390 }
391 finally {
392 log.info("ENDED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
393 }
394 return coordInfo;
395 }
396
397 }