org.apache.cassandra.io.sstable.CorruptSSTableException: org.apache.cassandra.io.compress.CorruptBlockException: (/var/lib/cassandra/data/<KEYSPACE>/<COLUMN_FAMILY>/<KEYSPACE>-<CF>-ic-112-Data.db): corruption detected, chunk at 19042661 of length 27265.
at org.apache.cassandra.io.compress.CompressedRandomAccessReader.reBuffer(CompressedRandomAccessReader.java:89)
at org.apache.cassandra.io.compress.CompressedThrottledReader.reBuffer(CompressedThrottledReader.java:45)
at org.apache.cassandra.io.util.RandomAccessReader.read(RandomAccessReader.java:355)
at java.io.RandomAccessFile.readFully(RandomAccessFile.java:444)
at java.io.RandomAccessFile.readFully(RandomAccessFile.java:424)
at org.apache.cassandra.io.util.RandomAccessReader.readBytes(RandomAccessReader.java:380)
at org.apache.cassandra.utils.ByteBufferUtil.read(ByteBufferUtil.java:391)
at org.apache.cassandra.utils.ByteBufferUtil.readWithShortLength(ByteBufferUtil.java:370)
at org.apache.cassandra.io.sstable.SSTableScanner$KeyScanningIterator.next(SSTableScanner.java:175)
at org.apache.cassandra.io.sstable.SSTableScanner$KeyScanningIterator.next(SSTableScanner.java:155)
at org.apache.cassandra.io.sstable.SSTableScanner.next(SSTableScanner.java:142)
at org.apache.cassandra.io.sstable.SSTableScanner.next(SSTableScanner.java:38)
at org.apache.cassandra.utils.MergeIterator$Candidate.advance(MergeIterator.java:145)
at org.apache.cassandra.utils.MergeIterator$ManyToOne.advance(MergeIterator.java:122)
at org.apache.cassandra.utils.MergeIterator$ManyToOne.computeNext(MergeIterator.java:96)
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143)
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138)
at org.apache.cassandra.db.compaction.CompactionTask.runWith(CompactionTask.java:145)
at org.apache.cassandra.io.util.DiskAwareRunnable.runMayThrow(DiskAwareRunnable.java:48)
at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28)
at org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:58)
at org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:60)
at org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:208)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.cassandra.io.compress.CorruptBlockException: (/var/lib/cassandra/data/<KEYSPACE>/<COLUMN_FAMILY>/<KEYSPACE>-<CF>-ic-112-Data.db): corruption detected, chunk at 19042661 of length 27265.
at org.apache.cassandra.io.compress.CompressedRandomAccessReader.decompressChunk(CompressedRandomAccessReader.java:128)
at org.apache.cassandra.io.compress.CompressedRandomAccessReader.reBuffer(CompressedRandomAccessReader.java:85)
... 27 more
Okay, let's trace into the stacktrace and study what actually cause this and what is sstable corruption means.
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Simple, a thread is run by the executor.
at org.apache.cassandra.db.compaction.CompactionTask.runWith(CompactionTask.java:145)
at org.apache.cassandra.io.util.DiskAwareRunnable.runMayThrow(DiskAwareRunnable.java:48)
at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28)
at org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:58)
at org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:60)
at org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:208)
We see that a background compaction task is started. Code below.
// the actual sstables to compact are not determined until we run the BCT; that way, if new sstables
// are created between task submission and execution, we execute against the most up-to-date information
class BackgroundCompactionTask implements Runnable
{
private final ColumnFamilyStore cfs;
BackgroundCompactionTask(ColumnFamilyStore cfs)
{
this.cfs = cfs;
}
public void run()
{
compactionLock.readLock().lock();
try
{
logger.debug("Checking {}.{}", cfs.table.name, cfs.columnFamily); // log after we get the lock so we can see delays from that if any
if (!cfs.isValid())
{
logger.debug("Aborting compaction for dropped CF");
return;
}
AbstractCompactionStrategy strategy = cfs.getCompactionStrategy();
AbstractCompactionTask task = strategy.getNextBackgroundTask(getDefaultGcBefore(cfs));
if (task == null)
{
logger.debug("No tasks available");
return;
}
task.execute(metrics);
}
finally
{
compactingCF.remove(cfs);
compactionLock.readLock().unlock();
}
submitBackground(cfs);
}
}
at org.apache.cassandra.utils.MergeIterator$Candidate.advance(MergeIterator.java:145)
at org.apache.cassandra.utils.MergeIterator$ManyToOne.advance(MergeIterator.java:122)
at org.apache.cassandra.utils.MergeIterator$ManyToOne.computeNext(MergeIterator.java:96)
at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:143)
at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:138)
Here we see an iterator going over the sstables for compaction.
at org.apache.cassandra.io.sstable.SSTableScanner$KeyScanningIterator.next(SSTableScanner.java:175)
at org.apache.cassandra.io.sstable.SSTableScanner$KeyScanningIterator.next(SSTableScanner.java:155)
at org.apache.cassandra.io.sstable.SSTableScanner.next(SSTableScanner.java:142)
at org.apache.cassandra.io.sstable.SSTableScanner.next(SSTableScanner.java:38)
DecoratedKey key = sstable.decodeKey(ByteBufferUtil.readWithShortLength(dfile));
Here we see that what actually get iterated is the sstable and particular on the key.
at org.apache.cassandra.io.compress.CompressedRandomAccessReader.reBuffer(CompressedRandomAccessReader.java:89)
at org.apache.cassandra.io.compress.CompressedThrottledReader.reBuffer(CompressedThrottledReader.java:45)
at org.apache.cassandra.io.util.RandomAccessReader.read(RandomAccessReader.java:355)
at java.io.RandomAccessFile.readFully(RandomAccessFile.java:444)
at java.io.RandomAccessFile.readFully(RandomAccessFile.java:424)
at org.apache.cassandra.io.util.RandomAccessReader.readBytes(RandomAccessReader.java:380)
at org.apache.cassandra.utils.ByteBufferUtil.read(ByteBufferUtil.java:391)
at org.apache.cassandra.utils.ByteBufferUtil.readWithShortLength(ByteBufferUtil.java:370)
Here, there is code reference has change in a few files due to method override, nonetheless, the important part on method reBuffer.
@Override
protected void reBuffer()
{
try
{
decompressChunk(metadata.chunkFor(current));
}
catch (CorruptBlockException e)
{
throw new CorruptSSTableException(e, getPath());
}
catch (IOException e)
{
throw new FSReadError(e, getPath());
}
}
private void decompressChunk(CompressionMetadata.Chunk chunk) throws IOException
{
if (channel.position() != chunk.offset)
channel.position(chunk.offset);
if (compressed.capacity() < chunk.length)
compressed = ByteBuffer.wrap(new byte[chunk.length]);
else
compressed.clear();
compressed.limit(chunk.length);
if (channel.read(compressed) != chunk.length)
throw new CorruptBlockException(getPath(), chunk);
// technically flip() is unnecessary since all the remaining work uses the raw array, but if that changes
// in the future this will save a lot of hair-pulling
compressed.flip();
try
{
validBufferBytes = metadata.compressor().uncompress(compressed.array(), 0, chunk.length, buffer, 0);
}
catch (IOException e)
{
throw new CorruptBlockException(getPath(), chunk);
}
if (metadata.parameters.getCrcCheckChance() > FBUtilities.threadLocalRandom().nextDouble())
{
checksum.update(buffer, 0, validBufferBytes);
if (checksum(chunk) != (int) checksum.getValue())
throw new CorruptBlockException(getPath(), chunk);
// reset checksum object back to the original (blank) state
checksum.reset();
}
// buffer offset is always aligned
bufferOffset = current & ~(buffer.length - 1);
}
we read that if chunk checksum is not the same as the updated crc32 checksum, this is consider sstable corruption.
For this type of exception, I remember I did many things such as below.
1. try online nodetool scrub, does not work
2. try offline sstablescrub, does not work.
3. wipeout the node and rebuild again, does not work.
we had to change the hardware altogether. Then we don't see the problem anymore.