001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.util;
020
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.zip.CRC32;
026import java.util.zip.Checksum;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.fs.ChecksumException;
031
032/**
033 * This class provides inteface and utilities for processing checksums for
034 * DFS data transfers.
035 */
036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037@InterfaceStability.Evolving
038public class DataChecksum implements Checksum {
039  
040  // Misc constants
041  public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
042  
043  // checksum types
044  public static final int CHECKSUM_NULL    = 0;
045  public static final int CHECKSUM_CRC32   = 1;
046  public static final int CHECKSUM_CRC32C  = 2;
047  public static final int CHECKSUM_DEFAULT = 3; 
048  public static final int CHECKSUM_MIXED   = 4;
049 
050  /** The checksum types */
051  public static enum Type {
052    NULL  (CHECKSUM_NULL, 0),
053    CRC32 (CHECKSUM_CRC32, 4),
054    CRC32C(CHECKSUM_CRC32C, 4),
055    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
056    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
057
058    public final int id;
059    public final int size;
060    
061    private Type(int id, int size) {
062      this.id = id;
063      this.size = size;
064    }
065
066    /** @return the type corresponding to the id. */
067    public static Type valueOf(int id) {
068      if (id < 0 || id >= values().length) {
069        throw new IllegalArgumentException("id=" + id
070            + " out of range [0, " + values().length + ")");
071      }
072      return values()[id];
073    }
074  }
075
076  /**
077   * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
078   * is chosen depending on the platform.
079   */
080  public static Checksum newCrc32() {
081    return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
082  }
083
084  public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
085    if ( bytesPerChecksum <= 0 ) {
086      return null;
087    }
088    
089    switch ( type ) {
090    case NULL :
091      return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
092    case CRC32 :
093      return new DataChecksum(type, newCrc32(), bytesPerChecksum );
094    case CRC32C:
095      return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
096    default:
097      return null;  
098    }
099  }
100  
101  /**
102   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
103   * @return DataChecksum of the type in the array or null in case of an error.
104   */
105  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
106    if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
107      return null;
108    }
109    
110    // like readInt():
111    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
112                           ( (bytes[offset+2] & 0xff) << 16 ) |
113                           ( (bytes[offset+3] & 0xff) << 8 )  |
114                           ( (bytes[offset+4] & 0xff) );
115    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
116  }
117  
118  /**
119   * This constructucts a DataChecksum by reading HEADER_LEN bytes from
120   * input stream <i>in</i>
121   */
122  public static DataChecksum newDataChecksum( DataInputStream in )
123                                 throws IOException {
124    int type = in.readByte();
125    int bpc = in.readInt();
126    DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
127    if ( summer == null ) {
128      throw new IOException( "Could not create DataChecksum of type " +
129                             type + " with bytesPerChecksum " + bpc );
130    }
131    return summer;
132  }
133  
134  /**
135   * Writes the checksum header to the output stream <i>out</i>.
136   */
137  public void writeHeader( DataOutputStream out ) 
138                           throws IOException { 
139    out.writeByte( type.id );
140    out.writeInt( bytesPerChecksum );
141  }
142
143  public byte[] getHeader() {
144    byte[] header = new byte[DataChecksum.HEADER_LEN];
145    header[0] = (byte) (type.id & 0xff);
146    // Writing in buffer just like DataOutput.WriteInt()
147    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
148    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
149    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
150    header[1+3] = (byte) (bytesPerChecksum & 0xff);
151    return header;
152  }
153  
154  /**
155   * Writes the current checksum to the stream.
156   * If <i>reset</i> is true, then resets the checksum.
157   * @return number of bytes written. Will be equal to getChecksumSize();
158   */
159   public int writeValue( DataOutputStream out, boolean reset )
160                          throws IOException {
161     if ( type.size <= 0 ) {
162       return 0;
163     }
164
165     if ( type.size == 4 ) {
166       out.writeInt( (int) summer.getValue() );
167     } else {
168       throw new IOException( "Unknown Checksum " + type );
169     }
170     
171     if ( reset ) {
172       reset();
173     }
174     
175     return type.size;
176   }
177   
178   /**
179    * Writes the current checksum to a buffer.
180    * If <i>reset</i> is true, then resets the checksum.
181    * @return number of bytes written. Will be equal to getChecksumSize();
182    */
183    public int writeValue( byte[] buf, int offset, boolean reset )
184                           throws IOException {
185      if ( type.size <= 0 ) {
186        return 0;
187      }
188
189      if ( type.size == 4 ) {
190        int checksum = (int) summer.getValue();
191        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
192        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
193        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
194        buf[offset+3] = (byte) (checksum & 0xff);
195      } else {
196        throw new IOException( "Unknown Checksum " + type );
197      }
198      
199      if ( reset ) {
200        reset();
201      }
202      
203      return type.size;
204    }
205   
206   /**
207    * Compares the checksum located at buf[offset] with the current checksum.
208    * @return true if the checksum matches and false otherwise.
209    */
210   public boolean compare( byte buf[], int offset ) {
211     if ( type.size == 4 ) {
212       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
213                      ( (buf[offset+1] & 0xff) << 16 ) |
214                      ( (buf[offset+2] & 0xff) << 8 )  |
215                      ( (buf[offset+3] & 0xff) );
216       return checksum == (int) summer.getValue();
217     }
218     return type.size == 0;
219   }
220   
221  private final Type type;
222  private final Checksum summer;
223  private final int bytesPerChecksum;
224  private int inSum = 0;
225  
226  private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
227    this.type = type;
228    summer = checksum;
229    bytesPerChecksum = chunkSize;
230  }
231  
232  // Accessors
233  public Type getChecksumType() {
234    return type;
235  }
236  public int getChecksumSize() {
237    return type.size;
238  }
239  public int getBytesPerChecksum() {
240    return bytesPerChecksum;
241  }
242  public int getNumBytesInSum() {
243    return inSum;
244  }
245  
246  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
247  static public int getChecksumHeaderSize() {
248    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
249  }
250  //Checksum Interface. Just a wrapper around member summer.
251  @Override
252  public long getValue() {
253    return summer.getValue();
254  }
255  @Override
256  public void reset() {
257    summer.reset();
258    inSum = 0;
259  }
260  @Override
261  public void update( byte[] b, int off, int len ) {
262    if ( len > 0 ) {
263      summer.update( b, off, len );
264      inSum += len;
265    }
266  }
267  @Override
268  public void update( int b ) {
269    summer.update( b );
270    inSum += 1;
271  }
272  
273  /**
274   * Verify that the given checksums match the given data.
275   * 
276   * The 'mark' of the ByteBuffer parameters may be modified by this function,.
277   * but the position is maintained.
278   *  
279   * @param data the DirectByteBuffer pointing to the data to verify.
280   * @param checksums the DirectByteBuffer pointing to a series of stored
281   *                  checksums
282   * @param fileName the name of the file being read, for error-reporting
283   * @param basePos the file position to which the start of 'data' corresponds
284   * @throws ChecksumException if the checksums do not match
285   */
286  public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
287      String fileName, long basePos)
288  throws ChecksumException {
289    if (type.size == 0) return;
290    
291    if (data.hasArray() && checksums.hasArray()) {
292      verifyChunkedSums(
293          data.array(), data.arrayOffset() + data.position(), data.remaining(),
294          checksums.array(), checksums.arrayOffset() + checksums.position(),
295          fileName, basePos);
296      return;
297    }
298    if (NativeCrc32.isAvailable()) {
299      NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
300          fileName, basePos);
301      return;
302    }
303    
304    int startDataPos = data.position();
305    data.mark();
306    checksums.mark();
307    try {
308      byte[] buf = new byte[bytesPerChecksum];
309      byte[] sum = new byte[type.size];
310      while (data.remaining() > 0) {
311        int n = Math.min(data.remaining(), bytesPerChecksum);
312        checksums.get(sum);
313        data.get(buf, 0, n);
314        summer.reset();
315        summer.update(buf, 0, n);
316        int calculated = (int)summer.getValue();
317        int stored = (sum[0] << 24 & 0xff000000) |
318          (sum[1] << 16 & 0xff0000) |
319          (sum[2] << 8 & 0xff00) |
320          sum[3] & 0xff;
321        if (calculated != stored) {
322          long errPos = basePos + data.position() - startDataPos - n;
323          throw new ChecksumException(
324              "Checksum error: "+ fileName + " at "+ errPos +
325              " exp: " + stored + " got: " + calculated, errPos);
326        }
327      }
328    } finally {
329      data.reset();
330      checksums.reset();
331    }
332  }
333  
334  /**
335   * Implementation of chunked verification specifically on byte arrays. This
336   * is to avoid the copy when dealing with ByteBuffers that have array backing.
337   */
338  private void verifyChunkedSums(
339      byte[] data, int dataOff, int dataLen,
340      byte[] checksums, int checksumsOff, String fileName,
341      long basePos) throws ChecksumException {
342    
343    int remaining = dataLen;
344    int dataPos = 0;
345    while (remaining > 0) {
346      int n = Math.min(remaining, bytesPerChecksum);
347      
348      summer.reset();
349      summer.update(data, dataOff + dataPos, n);
350      dataPos += n;
351      remaining -= n;
352      
353      int calculated = (int)summer.getValue();
354      int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
355        (checksums[checksumsOff + 1] << 16 & 0xff0000) |
356        (checksums[checksumsOff + 2] << 8 & 0xff00) |
357        checksums[checksumsOff + 3] & 0xff;
358      checksumsOff += 4;
359      if (calculated != stored) {
360        long errPos = basePos + dataPos - n;
361        throw new ChecksumException(
362            "Checksum error: "+ fileName + " at "+ errPos +
363            " exp: " + stored + " got: " + calculated, errPos);
364      }
365    }
366  }
367
368  /**
369   * Calculate checksums for the given data.
370   * 
371   * The 'mark' of the ByteBuffer parameters may be modified by this function,
372   * but the position is maintained.
373   * 
374   * @param data the DirectByteBuffer pointing to the data to checksum.
375   * @param checksums the DirectByteBuffer into which checksums will be
376   *                  stored. Enough space must be available in this
377   *                  buffer to put the checksums.
378   */
379  public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
380    if (type.size == 0) return;
381    
382    if (data.hasArray() && checksums.hasArray()) {
383      calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
384          checksums.array(), checksums.arrayOffset() + checksums.position());
385      return;
386    }
387    
388    data.mark();
389    checksums.mark();
390    try {
391      byte[] buf = new byte[bytesPerChecksum];
392      while (data.remaining() > 0) {
393        int n = Math.min(data.remaining(), bytesPerChecksum);
394        data.get(buf, 0, n);
395        summer.reset();
396        summer.update(buf, 0, n);
397        checksums.putInt((int)summer.getValue());
398      }
399    } finally {
400      data.reset();
401      checksums.reset();
402    }
403  }
404
405  /**
406   * Implementation of chunked calculation specifically on byte arrays. This
407   * is to avoid the copy when dealing with ByteBuffers that have array backing.
408   */
409  private void calculateChunkedSums(
410      byte[] data, int dataOffset, int dataLength,
411      byte[] sums, int sumsOffset) {
412
413    int remaining = dataLength;
414    while (remaining > 0) {
415      int n = Math.min(remaining, bytesPerChecksum);
416      summer.reset();
417      summer.update(data, dataOffset, n);
418      dataOffset += n;
419      remaining -= n;
420      long calculated = summer.getValue();
421      sums[sumsOffset++] = (byte) (calculated >> 24);
422      sums[sumsOffset++] = (byte) (calculated >> 16);
423      sums[sumsOffset++] = (byte) (calculated >> 8);
424      sums[sumsOffset++] = (byte) (calculated);
425    }
426  }
427
428  @Override
429  public boolean equals(Object other) {
430    if (!(other instanceof DataChecksum)) {
431      return false;
432    }
433    DataChecksum o = (DataChecksum)other;
434    return o.bytesPerChecksum == this.bytesPerChecksum &&
435      o.type == this.type;
436  }
437  
438  @Override
439  public int hashCode() {
440    return (this.type.id + 31) * this.bytesPerChecksum;
441  }
442  
443  @Override
444  public String toString() {
445    return "DataChecksum(type=" + type +
446      ", chunkSize=" + bytesPerChecksum + ")";
447  }
448  
449  /**
450   * This just provides a dummy implimentation for Checksum class
451   * This is used when there is no checksum available or required for 
452   * data
453   */
454  static class ChecksumNull implements Checksum {
455    
456    public ChecksumNull() {}
457    
458    //Dummy interface
459    @Override
460    public long getValue() { return 0; }
461    @Override
462    public void reset() {}
463    @Override
464    public void update(byte[] b, int off, int len) {}
465    @Override
466    public void update(int b) {}
467  };
468}