# 2009-01-27. Test of degree of `silent error' (unreported corruption of data # on storage device) tolerance of linux md raid, at different raid levels. # Raid5 is expected to allow one component device to fail without data loss # if it is known that the device has failed; a silent error (wrong data from # a device, but without the device giving a warning) is different -- there # is no checking. Raid6, I've recently realised from reading on the web, # is (in spite of an often very vague definition) able in some implementations # to do much more than just allow two devices to fail `noisily' -- it also # seems to tolerate single device silent errors. # # The following bit of shell was used to do the testing, using 2.6.26-tuxonice # (gentoo) kernel with the 4 raid devices being small files used as loop devices. # After the script comes output data from some runs. # # The summary is that there is apparently no use of any `silent error' # checking even where there could be a good shot at it, e.g. RAID1 (on 4 devices). # If one knows the failed device, and removes then replaces it (at least # without writes, as here) the data will be ok. Otherwise, while the # corrupted device is active, there are errors at all the tried raid levels, # 1, 4, 5, 6, 10. # So, it would seem that at least with linux-md raid there's no point going to # raid6 with a hope of spotting silent corruption. # mkdir /root/raid_test cd /root/raid_test || exit for l in 1 4 5 6 10 do echo "Setting up loop devices" for n in 0 1 2 3 do dd if=/dev/zero of=rd$n bs=$((1024*1024)) count=32 2>&1 | grep MB echo "loop$n" losetup /dev/loop$n rd$n done ls -l echo " " echo "Create array, level $l, on all four [loop] disks" mdadm --create /dev/md0 --auto=yes -l $l -n 4 /dev/loop[0123] until grep UUUU /proc/mdstat ; do sleep 2 ; done cat /proc/mdstat mkfs.ext3 /dev/md0 >/dev/null mkdir mount mount -t ext3 /dev/md0 mount df -m mount s="`df -P -m mount | tail -n1 | awk '{ print $4 }'`" s=$((s-2)) echo "making random file of size $s MiB" dd if=/dev/urandom of=mount/rand.file bs=$((1024*1024)) count=$s 2>&1 | grep MB mkdir orig/ cp -av mount/rand.file orig/rand.file md5sum {orig,mount}/rand.file for n in 0 1 2 3 do echo " " umount mount echo "Corrupting member device n=$n" dd bs=$((1024*1024)) seek=18 count=1 if=/dev/urandom of=/dev/loop$n 2>&1 | grep MB sync mount -t ext3 /dev/md0 mount echo " " md5sum {orig,mount}/rand.file echo " " echo "Removing and re-adding (for rebuild) member device n=$n" mdadm --manage /dev/md0 --fail /dev/loop$n mdadm --manage /dev/md0 --remove /dev/loop$n mdadm --manage /dev/md0 --add /dev/loop$n until grep UUUU /proc/mdstat ; do sleep 2 ; done umount mount mount -t ext3 /dev/md0 mount echo " " md5sum {orig,mount}/rand.file echo " " done for n in 0 1 2 3 do echo " " echo "Corrupting member device (and not repairing) n=$n" umount mount dd bs=$((1024*1024)) seek=18 count=1 if=/dev/urandom of=/dev/loop$n 2>&1 | grep MB sync mount -t ext3 /dev/md0 mount echo " " md5sum {orig,mount}/rand.file echo " " done echo " " umount mount || echo "trouble on the unmount" echo "Stopping level$l array" mdadm --stop /dev/md0 echo "Removing loop devices" for n in 0 1 2 3 do losetup -d /dev/loop$n done echo " " done exit 0 ##################################################################################################### # Results from running: ## RAID1 Setting up loop devices 33554432 bytes (34 MB) copied, 0.183128 s, 183 MB/s loop0 33554432 bytes (34 MB) copied, 0.168032 s, 200 MB/s loop1 33554432 bytes (34 MB) copied, 0.151457 s, 222 MB/s loop2 33554432 bytes (34 MB) copied, 0.14078 s, 238 MB/s loop3 total 131216 -rw-r--r-- 1 root root 33554432 2009-01-28 00:51 rd0 -rw-r--r-- 1 root root 33554432 2009-01-28 00:51 rd1 -rw-r--r-- 1 root root 33554432 2009-01-28 00:51 rd2 -rw-r--r-- 1 root root 33554432 2009-01-28 00:51 rd3 Create array, level 1, on all four [loop] disks mdadm: array /dev/md0 started. 32704 blocks [4/4] [UUUU] Personalities : [raid6] [raid5] [raid4] [raid1] [raid10] md0 : active raid1 loop3[3] loop2[2] loop1[1] loop0[0] 32704 blocks [4/4] [UUUU] [============>........] resync = 62.5% (21248/32704) finish=0.0min speed=21248K/sec unused devices: mke2fs 1.40.6 (09-Feb-2008) Filesystem 1M-blocks Used Available Use% Mounted on /dev/md/0 31 2 28 5% /root/raid_test/mount making random file of size 26 MiB 27262976 bytes (27 MB) copied, 13.8401 s, 2.0 MB/s `mount/rand.file' -> `orig/rand.file' e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Corrupting member device n=0 1048576 bytes (1.0 MB) copied, 0.498991 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=0 mdadm: set /dev/loop0 faulty in /dev/md0 mdadm: hot removed /dev/loop0 mdadm: re-added /dev/loop0 32704 blocks [4/4] [UUUU] e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Corrupting member device n=1 1048576 bytes (1.0 MB) copied, 0.506734 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Removing and re-adding (for rebuild) member device n=1 mdadm: set /dev/loop1 faulty in /dev/md0 mdadm: hot removed /dev/loop1 mdadm: re-added /dev/loop1 32704 blocks [4/4] [UUUU] e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Corrupting member device n=2 1048576 bytes (1.0 MB) copied, 0.495108 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=2 mdadm: set /dev/loop2 faulty in /dev/md0 mdadm: hot removed /dev/loop2 mdadm: re-added /dev/loop2 32704 blocks [4/4] [UUUU] e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Corrupting member device n=3 1048576 bytes (1.0 MB) copied, 0.509754 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Removing and re-adding (for rebuild) member device n=3 mdadm: set /dev/loop3 faulty in /dev/md0 mdadm: hot removed /dev/loop3 mdadm: re-added /dev/loop3 32704 blocks [4/4] [UUUU] e48afa380b5a43e68d3b23689702beb4 orig/rand.file e48afa380b5a43e68d3b23689702beb4 mount/rand.file Corrupting member device (and not repairing) n=0 1048576 bytes (1.0 MB) copied, 0.493614 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=1 1048576 bytes (1.0 MB) copied, 0.553792 s, 1.9 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=2 1048576 bytes (1.0 MB) copied, 0.491664 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=3 1048576 bytes (1.0 MB) copied, 0.493936 s, 2.1 MB/s e48afa380b5a43e68d3b23689702beb4 orig/rand.file md5sum: mount/rand.file: Input/output error Stopping level1 array mdadm: stopped /dev/md0 Removing loop devices ## RAID4 Setting up loop devices 33554432 bytes (34 MB) copied, 0.154065 s, 218 MB/s loop0 33554432 bytes (34 MB) copied, 0.155835 s, 215 MB/s loop1 33554432 bytes (34 MB) copied, 0.149418 s, 225 MB/s loop2 33554432 bytes (34 MB) copied, 0.158076 s, 212 MB/s loop3 total 131224 drwxr-xr-x 2 root root 4096 2009-01-28 00:51 mount drwxr-xr-x 2 root root 4096 2009-01-28 00:51 orig -rw-r--r-- 1 root root 33554432 2009-01-28 00:53 rd0 -rw-r--r-- 1 root root 33554432 2009-01-28 00:53 rd1 -rw-r--r-- 1 root root 33554432 2009-01-28 00:53 rd2 -rw-r--r-- 1 root root 33554432 2009-01-28 00:53 rd3 Create array, level 4, on all four [loop] disks mdadm: array /dev/md0 started. 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] Personalities : [raid6] [raid5] [raid4] [raid1] [raid10] md0 : active raid4 loop3[3] loop2[2] loop1[1] loop0[0] 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] unused devices: mke2fs 1.40.6 (09-Feb-2008) mkdir: cannot create directory `mount': File exists Filesystem 1M-blocks Used Available Use% Mounted on /dev/md/0 93 6 83 7% /root/raid_test/mount making random file of size 81 MiB 84934656 bytes (85 MB) copied, 44.3903 s, 1.9 MB/s mkdir: cannot create directory `orig/': File exists `mount/rand.file' -> `orig/rand.file' e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Corrupting member device n=0 1048576 bytes (1.0 MB) copied, 0.497983 s, 2.1 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=0 mdadm: set /dev/loop0 faulty in /dev/md0 mdadm: hot removed /dev/loop0 mdadm: re-added /dev/loop0 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Corrupting member device n=1 1048576 bytes (1.0 MB) copied, 0.494596 s, 2.1 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=1 mdadm: set /dev/loop1 faulty in /dev/md0 mdadm: hot removed /dev/loop1 mdadm: re-added /dev/loop1 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Corrupting member device n=2 1048576 bytes (1.0 MB) copied, 0.51765 s, 2.0 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=2 mdadm: set /dev/loop2 faulty in /dev/md0 mdadm: hot removed /dev/loop2 mdadm: re-added /dev/loop2 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Corrupting member device n=3 1048576 bytes (1.0 MB) copied, 0.499217 s, 2.1 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Removing and re-adding (for rebuild) member device n=3 mdadm: set /dev/loop3 faulty in /dev/md0 mdadm: hot removed /dev/loop3 mdadm: re-added /dev/loop3 98112 blocks level 4, 64k chunk, algorithm 0 [4/4] [UUUU] e37c2861c6676af869cc849a8599d226 orig/rand.file e37c2861c6676af869cc849a8599d226 mount/rand.file Corrupting member device (and not repairing) n=0 1048576 bytes (1.0 MB) copied, 0.494858 s, 2.1 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=1 1048576 bytes (1.0 MB) copied, 0.494683 s, 2.1 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=2 1048576 bytes (1.0 MB) copied, 0.559 s, 1.9 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=3 1048576 bytes (1.0 MB) copied, 0.557135 s, 1.9 MB/s e37c2861c6676af869cc849a8599d226 orig/rand.file md5sum: mount/rand.file: Input/output error Stopping level4 array mdadm: stopped /dev/md0 Removing loop devices ## RAID5 Setting up loop devices 33554432 bytes (34 MB) copied, 0.151013 s, 222 MB/s loop0 33554432 bytes (34 MB) copied, 0.147892 s, 227 MB/s loop1 33554432 bytes (34 MB) copied, 0.160077 s, 210 MB/s loop2 33554432 bytes (34 MB) copied, 0.152515 s, 220 MB/s loop3 total 131224 drwxr-xr-x 2 root root 4096 2009-01-28 00:51 mount drwxr-xr-x 2 root root 4096 2009-01-28 00:51 orig -rw-r--r-- 1 root root 33554432 2009-01-28 00:54 rd0 -rw-r--r-- 1 root root 33554432 2009-01-28 00:54 rd1 -rw-r--r-- 1 root root 33554432 2009-01-28 00:54 rd2 -rw-r--r-- 1 root root 33554432 2009-01-28 00:54 rd3 Create array, level 5, on all four [loop] disks mdadm: array /dev/md0 started. 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] Personalities : [raid6] [raid5] [raid4] [raid1] [raid10] md0 : active raid5 loop3[3] loop2[2] loop1[1] loop0[0] 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] unused devices: mke2fs 1.40.6 (09-Feb-2008) mkdir: cannot create directory `mount': File exists Filesystem 1M-blocks Used Available Use% Mounted on /dev/md/0 93 6 83 7% /root/raid_test/mount making random file of size 81 MiB 84934656 bytes (85 MB) copied, 41.5364 s, 2.0 MB/s mkdir: cannot create directory `orig/': File exists `mount/rand.file' -> `orig/rand.file' 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 9b120070d72bcf8924837e57e7a21b20 mount/rand.file Corrupting member device n=0 1048576 bytes (1.0 MB) copied, 0.563751 s, 1.9 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=0 mdadm: set /dev/loop0 faulty in /dev/md0 mdadm: hot removed /dev/loop0 mdadm: re-added /dev/loop0 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 9b120070d72bcf8924837e57e7a21b20 mount/rand.file Corrupting member device n=1 1048576 bytes (1.0 MB) copied, 0.556478 s, 1.9 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 6e0bc00203fc677df51628159113a87e mount/rand.file Removing and re-adding (for rebuild) member device n=1 mdadm: set /dev/loop1 faulty in /dev/md0 mdadm: hot removed /dev/loop1 mdadm: re-added /dev/loop1 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 9b120070d72bcf8924837e57e7a21b20 mount/rand.file Corrupting member device n=2 1048576 bytes (1.0 MB) copied, 0.50215 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 22906df9cc01cbb217add378931fddc8 mount/rand.file Removing and re-adding (for rebuild) member device n=2 mdadm: set /dev/loop2 faulty in /dev/md0 mdadm: hot removed /dev/loop2 mdadm: re-added /dev/loop2 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 9b120070d72bcf8924837e57e7a21b20 mount/rand.file Corrupting member device n=3 1048576 bytes (1.0 MB) copied, 0.507295 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 27e1b5142638a0dd013162bc2d4bcdc0 mount/rand.file Removing and re-adding (for rebuild) member device n=3 mdadm: set /dev/loop3 faulty in /dev/md0 mdadm: hot removed /dev/loop3 mdadm: re-added /dev/loop3 98112 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] 9b120070d72bcf8924837e57e7a21b20 orig/rand.file 9b120070d72bcf8924837e57e7a21b20 mount/rand.file Corrupting member device (and not repairing) n=0 1048576 bytes (1.0 MB) copied, 0.494627 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=1 1048576 bytes (1.0 MB) copied, 0.498009 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=2 1048576 bytes (1.0 MB) copied, 0.511239 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=3 1048576 bytes (1.0 MB) copied, 0.491407 s, 2.1 MB/s 9b120070d72bcf8924837e57e7a21b20 orig/rand.file md5sum: mount/rand.file: Input/output error Stopping level5 array mdadm: stopped /dev/md0 Removing loop devices ## RAID6 Setting up loop devices 33554432 bytes (34 MB) copied, 0.167961 s, 200 MB/s loop0 33554432 bytes (34 MB) copied, 0.149083 s, 225 MB/s loop1 33554432 bytes (34 MB) copied, 0.156879 s, 214 MB/s loop2 33554432 bytes (34 MB) copied, 0.145982 s, 230 MB/s loop3 total 131224 drwxr-xr-x 2 root root 4096 2009-01-28 00:51 mount drwxr-xr-x 2 root root 4096 2009-01-28 00:51 orig -rw-r--r-- 1 root root 33554432 2009-01-28 00:56 rd0 -rw-r--r-- 1 root root 33554432 2009-01-28 00:56 rd1 -rw-r--r-- 1 root root 33554432 2009-01-28 00:56 rd2 -rw-r--r-- 1 root root 33554432 2009-01-28 00:56 rd3 Create array, level 6, on all four [loop] disks mdadm: array /dev/md0 started. 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] Personalities : [raid6] [raid5] [raid4] [raid1] [raid10] md0 : active raid6 loop3[3] loop2[2] loop1[1] loop0[0] 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] [=>...................] resync = 6.2% (2048/32704) finish=0.2min speed=2048K/sec unused devices: mke2fs 1.40.6 (09-Feb-2008) mkdir: cannot create directory `mount': File exists Filesystem 1M-blocks Used Available Use% Mounted on /dev/md/0 62 6 54 9% /root/raid_test/mount making random file of size 52 MiB 54525952 bytes (55 MB) copied, 36.6221 s, 1.5 MB/s mkdir: cannot create directory `orig/': File exists `mount/rand.file' -> `orig/rand.file' a0869de940da0e1e267991605cbcd247 orig/rand.file a0869de940da0e1e267991605cbcd247 mount/rand.file Corrupting member device n=0 1048576 bytes (1.0 MB) copied, 0.733508 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file c131379711f3e60f7aaf7e2e0632ad8d mount/rand.file Removing and re-adding (for rebuild) member device n=0 mdadm: set /dev/loop0 faulty in /dev/md0 mdadm: hot removed /dev/loop0 mdadm: re-added /dev/loop0 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] a0869de940da0e1e267991605cbcd247 orig/rand.file a0869de940da0e1e267991605cbcd247 mount/rand.file Corrupting member device n=1 1048576 bytes (1.0 MB) copied, 0.717651 s, 1.5 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=1 mdadm: set /dev/loop1 faulty in /dev/md0 mdadm: hot removed /dev/loop1 mdadm: re-added /dev/loop1 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] a0869de940da0e1e267991605cbcd247 orig/rand.file a0869de940da0e1e267991605cbcd247 mount/rand.file Corrupting member device n=2 1048576 bytes (1.0 MB) copied, 0.748158 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file d9fd81aa40636f4f19b5960582557ebc mount/rand.file Removing and re-adding (for rebuild) member device n=2 mdadm: set /dev/loop2 faulty in /dev/md0 mdadm: hot removed /dev/loop2 mdadm: re-added /dev/loop2 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] a0869de940da0e1e267991605cbcd247 orig/rand.file a0869de940da0e1e267991605cbcd247 mount/rand.file Corrupting member device n=3 1048576 bytes (1.0 MB) copied, 0.74963 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=3 mdadm: set /dev/loop3 faulty in /dev/md0 mdadm: hot removed /dev/loop3 mdadm: re-added /dev/loop3 65408 blocks level 6, 64k chunk, algorithm 2 [4/4] [UUUU] a0869de940da0e1e267991605cbcd247 orig/rand.file a0869de940da0e1e267991605cbcd247 mount/rand.file Corrupting member device (and not repairing) n=0 1048576 bytes (1.0 MB) copied, 0.738222 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file b55fa6428949cad551953e848b7239f7 mount/rand.file Corrupting member device (and not repairing) n=1 1048576 bytes (1.0 MB) copied, 0.727818 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=2 1048576 bytes (1.0 MB) copied, 0.727676 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=3 1048576 bytes (1.0 MB) copied, 0.730324 s, 1.4 MB/s a0869de940da0e1e267991605cbcd247 orig/rand.file md5sum: mount/rand.file: Input/output error Stopping level6 array mdadm: stopped /dev/md0 Removing loop devices ## RAID10 Setting up loop devices 33554432 bytes (34 MB) copied, 0.149319 s, 225 MB/s loop0 33554432 bytes (34 MB) copied, 0.154809 s, 217 MB/s loop1 33554432 bytes (34 MB) copied, 0.14554 s, 231 MB/s loop2 33554432 bytes (34 MB) copied, 0.145711 s, 230 MB/s loop3 total 131224 drwxr-xr-x 2 root root 4096 2009-01-28 00:51 mount drwxr-xr-x 2 root root 4096 2009-01-28 00:51 orig -rw-r--r-- 1 root root 33554432 2009-01-28 00:58 rd0 -rw-r--r-- 1 root root 33554432 2009-01-28 00:58 rd1 -rw-r--r-- 1 root root 33554432 2009-01-28 00:58 rd2 -rw-r--r-- 1 root root 33554432 2009-01-28 00:58 rd3 Create array, level 10, on all four [loop] disks mdadm: array /dev/md0 started. 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] Personalities : [raid6] [raid5] [raid4] [raid1] [raid10] md0 : active raid10 loop3[3] loop2[2] loop1[1] loop0[0] 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] [===>.................] resync = 18.7% (12864/65408) finish=0.0min speed=12864K/sec unused devices: mke2fs 1.40.6 (09-Feb-2008) mkdir: cannot create directory `mount': File exists Filesystem 1M-blocks Used Available Use% Mounted on /dev/md/0 62 6 54 9% /root/raid_test/mount making random file of size 52 MiB 54525952 bytes (55 MB) copied, 27.6485 s, 2.0 MB/s mkdir: cannot create directory `orig/': File exists `mount/rand.file' -> `orig/rand.file' 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Corrupting member device n=0 1048576 bytes (1.0 MB) copied, 0.61701 s, 1.7 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file md5sum: mount/rand.file: Input/output error Removing and re-adding (for rebuild) member device n=0 mdadm: set /dev/loop0 faulty in /dev/md0 mdadm: hot removed /dev/loop0 mdadm: re-added /dev/loop0 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Corrupting member device n=1 1048576 bytes (1.0 MB) copied, 0.494539 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Removing and re-adding (for rebuild) member device n=1 mdadm: set /dev/loop1 faulty in /dev/md0 mdadm: hot removed /dev/loop1 mdadm: re-added /dev/loop1 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Corrupting member device n=2 1048576 bytes (1.0 MB) copied, 0.497395 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file a9ed9896a10893ef2cce1a4dd4383e8a mount/rand.file Removing and re-adding (for rebuild) member device n=2 mdadm: set /dev/loop2 faulty in /dev/md0 mdadm: hot removed /dev/loop2 mdadm: re-added /dev/loop2 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Corrupting member device n=3 1048576 bytes (1.0 MB) copied, 0.499264 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Removing and re-adding (for rebuild) member device n=3 mdadm: set /dev/loop3 faulty in /dev/md0 mdadm: hot removed /dev/loop3 mdadm: re-added /dev/loop3 65408 blocks 64K chunks 2 near-copies [4/4] [UUUU] 45f00a98b5a239340d5257d459460dcd orig/rand.file 45f00a98b5a239340d5257d459460dcd mount/rand.file Corrupting member device (and not repairing) n=0 1048576 bytes (1.0 MB) copied, 0.500458 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=1 1048576 bytes (1.0 MB) copied, 0.497263 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=2 1048576 bytes (1.0 MB) copied, 0.504304 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file md5sum: mount/rand.file: Input/output error Corrupting member device (and not repairing) n=3 1048576 bytes (1.0 MB) copied, 0.498214 s, 2.1 MB/s 45f00a98b5a239340d5257d459460dcd orig/rand.file md5sum: mount/rand.file: Input/output error Stopping level10 array mdadm: stopped /dev/md0 Removing loop devices # (end of script's output) ################################################################################## # The preliminary notes from when the above script was not written # but interactive commands were used. `>' denotes a response # rather than a command. # Lack of sync and remount may have been the reason for apparent `nice' # (but unrepresentative) behaviour here. mkdir /root/raid_test cd /root/raid_test for n in 0 1 2 3 do dd if=/dev/zero of=rd$n bs=$((1024*1024)) count=64 losetup /dev/loop$n rd$n done mdadm --create /dev/md0 --auto=yes -l 5 -n 4 /dev/loop[0123] cat /proc/mdstat > md0 : active raid5 loop3[3] loop2[2] loop1[1] loop0[0] > 196416 blocks level 5, 64k chunk, algorithm 2 [4/4] [UUUU] mkfs.ext3 /dev/md0 mkdir mount mount -t ext3 /dev/md0 mount mkdir orig cp -a /etc /sbin /bin /lib /boot orig/ df -h mount > /dev/md/0 186M 136M 41M 77% /root/raid_test/mount rsync -a --delete -c -v orig/ mount/ > deleting lost+found/ > etc/gconf/gconf.xml.defaults/%gconf-tree-nb.xml ?? dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=rd3 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=rd3 count=1 dmesg > attempt to access beyond end of device > md0: rw=0, want=1818782832, limit=392832 > EXT3-fs error (device md0): ext3_free_blocks: Freeing blocks not in datazone - block = 1869897588, count = 1 [last repeated, with varied block number, many times] > EXT3-fs error (device md0): htree_dirblock_to_tree: bad entry in directory #36870: rec_len is smaller than minimal - offset=5120, inode=0, rec_len=0, name_len=0 That was a little stupid, as dd to the file rather than to the loop device has truncated the file, even a little more extreme an abuse than had been considered before. Try again, still R5. Make it all smaller -- no real need of big arrays. [New numbers, not 0,1... are because the old one's still got unkillable processes on it.] ----- umount -fl mount for n in 4 5 6 7 do dd if=/dev/zero of=rd$n bs=$((1024*1024)) count=32 losetup /dev/loop$n rd$n done mdadm --create /dev/md1 --auto=yes -l 5 -n 4 /dev/loop[4567] mkfs.ext3 /dev/md1 mount -t ext3 /dev/md1 mount tar orig >mount/tar [^C after a while, about 32MB, then again to mount/tar2 then mount/tar1 till full] rm -r orig mkdir orig cp -a mount/* orig/ rsync -a -c -v --delete orig/ mount/ > ./ dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=/dev/loop7 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=/dev/loop7 count=1 ls -l rd* > -rw-r--r-- 1 root root 33554432 2009-01-27 18:29 rd4 > -rw-r--r-- 1 root root 33554432 2009-01-27 18:29 rd5 > -rw-r--r-- 1 root root 33554432 2009-01-27 18:29 rd6 > -rw-r--r-- 1 root root 33554432 2009-01-27 18:29 rd7 root@mlap ~/raid_test # md5sum orig/* mount/* md5sum: orig/lost+found: Is a directory 0ce64177752b4ffad0b483e27cb0df1f orig/tar b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 afc28edf1a03c83a606d3f130664f86c orig/tar2 md5sum: mount/lost+found: Is a directory 18713c5464a3828591f35efc5a302e6c mount/tar b0716c52927f45ad7bd3dcc4b2f467a1 mount/tar1 md5sum: mount/tar2: Input/output error messages: Jan 27 18:41:24 mlap kernel: attempt to access beyond end of device Jan 27 18:41:24 mlap kernel: md1: rw=0, want=4284279638, limit=196224 Jan 27 18:41:24 mlap kernel: __ratelimit: 258 messages suppressed umount mount mdadm --stop /dev/md1 for d in /dev/loop[4567] do losetup -d $d done ------- for n in 4 5 6 7 do dd if=/dev/zero of=rd$n bs=$((1024*1024)) count=32 losetup /dev/loop$n rd$n done mdadm --create /dev/md1 --auto=yes -l 6 -n 4 /dev/loop[4567] df -h mount > /dev/md/1 62M 5.3M 54M 9% /root/raid_test/mount ls -sh orig/ > total 83M > 4.0K lost+found 31M tar 18M tar1 34M tar2 rm orig/tar2 cp -a orig/tar* mount/ md5sum {orig,mount}/t* > 0ce64177752b4ffad0b483e27cb0df1f orig/tar > b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 > 0ce64177752b4ffad0b483e27cb0df1f mount/tar > b0716c52927f45ad7bd3dcc4b2f467a1 mount/tar1 dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=/dev/loop7 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=/dev/loop7 count=1 md5sum {orig,mount}/t* 0ce64177752b4ffad0b483e27cb0df1f orig/tar b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 0ce64177752b4ffad0b483e27cb0df1f mount/tar b0716c52927f45ad7bd3dcc4b2f467a1 mount/tar1 mdadm --manage /dev/md1 --fail /dev/loop7 > mdadm: set /dev/loop7 faulty in /dev/md1 mdadm --manage /dev/md1 --remove /dev/loop7 > mdadm: hot removed /dev/loop7 mdadm --manage /dev/md1 --add /dev/loop7 > mdadm: re-added /dev/loop7 dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=/dev/loop4 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=/dev/loop4 count=5 md5sum {orig,mount}/t* > 0ce64177752b4ffad0b483e27cb0df1f orig/tar > b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 > 0ce64177752b4ffad0b483e27cb0df1f mount/tar > b0716c52927f45ad7bd3dcc4b2f467a1 mount/tar1 [BUT: no messages, no change in /proc/mdstat] dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=/dev/loop6 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=/dev/loop6 count=5 md5sum {orig,mount}/t* 0ce64177752b4ffad0b483e27cb0df1f orig/tar b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 0ce64177752b4ffad0b483e27cb0df1f mount/tar b0716c52927f45ad7bd3dcc4b2f467a1 mount/tar1 dd bs=$((1024*1024)) seek=10 if=/dev/urandom of=/dev/loop5 count=1 dd bs=$((1024*1024)) seek=20 if=/dev/urandom of=/dev/loop5 count=5 md5sum {orig,mount}/t* 0ce64177752b4ffad0b483e27cb0df1f orig/tar b0716c52927f45ad7bd3dcc4b2f467a1 orig/tar1 d0321a851673e6844b7cd26463e16025 mount/tar md5sum: mount/tar1: Input/output error ##################################