Monday, December 28, 2020
Sunday, May 24, 2020
Wednesday, January 3, 2018
Tuesday, December 15, 2015
Monday, December 14, 2015
Are Files Appends Really Atomic?
Nice post and thanks to the original author !
Ref: http://www.notthewizard.com/2014/06/17/are-files-appends-really-atomic/
#!/bin/bash
#############################################################################
#
# This script aims to test/prove that you can append to a single file from
# multiple processes with buffers up to a certain size, without causing one
# process' output to corrupt the other's.
#
# The script takes one parameter, the length of the buffer. It then creates
# 20 worker processes which each write 50 lines of the specified buffer
# size to the same file. When all processes are done outputting, it tests
# the output file to ensure it is in the correct format.
#
#############################################################################
NUM_WORKERS=20
LINES_PER_WORKER=50
OUTPUT_FILE=/tmp/out.tmp
# each worker will output $LINES_PER_WORKER lines to the output file
run_worker() {
worker_num=$1
buf_len=$2
# Each line will be a specific character, multiplied by the line length.
# The character changes based on the worker number.
filler_len=$((${buf_len}-1)) # -1 -> leave room for \n
filler_char=$(printf \\$(printf '%03o' $(($worker_num+64))))
line=`for i in $(seq 1 $filler_len);do echo -n $filler_char;done`
for i in $(seq 1 $LINES_PER_WORKER)
do
echo $line >> $OUTPUT_FILE
done
}
if [ "$1" = "worker" ]; then
run_worker $2 $3
exit
fi
buf_len=$1
if [ "$buf_len" = "" ]; then
echo "Buffer length not specified, defaulting to 4096"
buf_len=4096
fi
rm -f $OUTPUT_FILE
echo Launching $NUM_WORKERS worker processes
for i in $(seq 1 $NUM_WORKERS)
do
$0 worker $i $buf_len &
pids[$i]=${!}
done
echo Each line will be $buf_len characters long
echo Waiting for processes to exit
for i in $(seq 1 $NUM_WORKERS)
do
wait ${pids[$i]}
done
# Now we want to test the output file. Each line should be the same letter
# repeated buf_len-1 times (remember the \n takes up one byte). If we had
# workers writing over eachother's lines, then there will be mixed characters
# and/or longer/shorter lines.
echo Testing output file
# Make sure the file is the right size (ensures processes didn't write over
# eachother's lines)
expected_file_size=$(($NUM_WORKERS * $LINES_PER_WORKER * $buf_len))
actual_file_size=`cat $OUTPUT_FILE | wc -c`
if [ "$expected_file_size" -ne "$actual_file_size" ]; then
echo Expected file size of $expected_file_size, but got $actual_file_size
else
# File size is OK, test the actual content
# Only use newer versions of grep because older ones are way too slow with
# backreferences
[[ $(grep --version) =~ [^[:digit:]]*([[:digit:]]+)\.([[:digit:]]+) ]]
grep_ver="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
if [ "$grep_ver" -ge "216" ]; then
num_lines=$(grep -v "^\(.\)\1\{$((${buf_len}-2))\}$" $OUTPUT_FILE | wc -l)
else
# Scan line by line in bash, which isn't that speedy, but is good enough
# Note: Doesn't work on cygwin for lines < 255
line_length=$((${buf_len}-1))
num_lines=0
for line in `cat $OUTPUT_FILE`
do
if ! [[ $line =~ ^${line:0:1}{$line_length}$ ]]; then
num_lines=$(($num_lines+1))
fi;
echo -n .
done
echo
fi
if [ "$num_lines" -gt "0" ]; then
echo "Found $num_lines instances of corrupted lines"
else
echo "All's good! The output file had no corrupted lines. $size"
fi
fi
rm -f $OUTPUT_FILE
Ref: http://www.notthewizard.com/2014/06/17/are-files-appends-really-atomic/
#!/bin/bash
#############################################################################
#
# This script aims to test/prove that you can append to a single file from
# multiple processes with buffers up to a certain size, without causing one
# process' output to corrupt the other's.
#
# The script takes one parameter, the length of the buffer. It then creates
# 20 worker processes which each write 50 lines of the specified buffer
# size to the same file. When all processes are done outputting, it tests
# the output file to ensure it is in the correct format.
#
#############################################################################
NUM_WORKERS=20
LINES_PER_WORKER=50
OUTPUT_FILE=/tmp/out.tmp
# each worker will output $LINES_PER_WORKER lines to the output file
run_worker() {
worker_num=$1
buf_len=$2
# Each line will be a specific character, multiplied by the line length.
# The character changes based on the worker number.
filler_len=$((${buf_len}-1)) # -1 -> leave room for \n
filler_char=$(printf \\$(printf '%03o' $(($worker_num+64))))
line=`for i in $(seq 1 $filler_len);do echo -n $filler_char;done`
for i in $(seq 1 $LINES_PER_WORKER)
do
echo $line >> $OUTPUT_FILE
done
}
if [ "$1" = "worker" ]; then
run_worker $2 $3
exit
fi
buf_len=$1
if [ "$buf_len" = "" ]; then
echo "Buffer length not specified, defaulting to 4096"
buf_len=4096
fi
rm -f $OUTPUT_FILE
echo Launching $NUM_WORKERS worker processes
for i in $(seq 1 $NUM_WORKERS)
do
$0 worker $i $buf_len &
pids[$i]=${!}
done
echo Each line will be $buf_len characters long
echo Waiting for processes to exit
for i in $(seq 1 $NUM_WORKERS)
do
wait ${pids[$i]}
done
# Now we want to test the output file. Each line should be the same letter
# repeated buf_len-1 times (remember the \n takes up one byte). If we had
# workers writing over eachother's lines, then there will be mixed characters
# and/or longer/shorter lines.
echo Testing output file
# Make sure the file is the right size (ensures processes didn't write over
# eachother's lines)
expected_file_size=$(($NUM_WORKERS * $LINES_PER_WORKER * $buf_len))
actual_file_size=`cat $OUTPUT_FILE | wc -c`
if [ "$expected_file_size" -ne "$actual_file_size" ]; then
echo Expected file size of $expected_file_size, but got $actual_file_size
else
# File size is OK, test the actual content
# Only use newer versions of grep because older ones are way too slow with
# backreferences
[[ $(grep --version) =~ [^[:digit:]]*([[:digit:]]+)\.([[:digit:]]+) ]]
grep_ver="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
if [ "$grep_ver" -ge "216" ]; then
num_lines=$(grep -v "^\(.\)\1\{$((${buf_len}-2))\}$" $OUTPUT_FILE | wc -l)
else
# Scan line by line in bash, which isn't that speedy, but is good enough
# Note: Doesn't work on cygwin for lines < 255
line_length=$((${buf_len}-1))
num_lines=0
for line in `cat $OUTPUT_FILE`
do
if ! [[ $line =~ ^${line:0:1}{$line_length}$ ]]; then
num_lines=$(($num_lines+1))
fi;
echo -n .
done
echo
fi
if [ "$num_lines" -gt "0" ]; then
echo "Found $num_lines instances of corrupted lines"
else
echo "All's good! The output file had no corrupted lines. $size"
fi
fi
rm -f $OUTPUT_FILE
Friday, November 20, 2015
Subscribe to:
Posts (Atom)