001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapred;
020    
021    import java.io.DataInput;
022    import java.io.DataOutput;
023    import java.io.IOException;
024    import java.util.HashSet;
025    import java.util.Set;
026    
027    import org.apache.hadoop.classification.InterfaceAudience;
028    import org.apache.hadoop.classification.InterfaceStability;
029    import org.apache.hadoop.fs.FileStatus;
030    import org.apache.hadoop.fs.FileSystem;
031    import org.apache.hadoop.fs.Path;
032    import org.apache.hadoop.fs.BlockLocation;
033    import org.apache.hadoop.io.Text;
034    import org.apache.hadoop.io.Text;
035    import org.apache.hadoop.mapred.lib.CombineFileSplit;
036    
037    /**
038     * A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit 
039     * class does not represent a split of a file, but a split of input files 
040     * into smaller sets. The atomic unit of split is a file. <br> 
041     * MultiFileSplit can be used to implement {@link RecordReader}'s, with 
042     * reading one record per file.
043     * @see FileSplit
044     * @see MultiFileInputFormat 
045     * @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead
046     */
047    @Deprecated
048    @InterfaceAudience.Public
049    @InterfaceStability.Stable
050    public class MultiFileSplit extends CombineFileSplit {
051    
052      MultiFileSplit() {}
053      
054      public MultiFileSplit(JobConf job, Path[] files, long[] lengths) {
055        super(job, files, lengths);
056      }
057    
058      public String[] getLocations() throws IOException {
059        HashSet<String> hostSet = new HashSet<String>();
060        for (Path file : getPaths()) {
061          FileSystem fs = file.getFileSystem(getJob());
062          FileStatus status = fs.getFileStatus(file);
063          BlockLocation[] blkLocations = fs.getFileBlockLocations(status,
064                                              0, status.getLen());
065          if (blkLocations != null && blkLocations.length > 0) {
066            addToSet(hostSet, blkLocations[0].getHosts());
067          }
068        }
069        return hostSet.toArray(new String[hostSet.size()]);
070      }
071    
072      private void addToSet(Set<String> set, String[] array) {
073        for(String s:array)
074          set.add(s); 
075      }
076    
077      @Override
078      public String toString() {
079        StringBuffer sb = new StringBuffer();
080        for(int i=0; i < getPaths().length; i++) {
081          sb.append(getPath(i).toUri().getPath() + ":0+" + getLength(i));
082          if (i < getPaths().length -1) {
083            sb.append("\n");
084          }
085        }
086    
087        return sb.toString();
088      }
089    }
090