#!/bin/sh
# A script that generates incomp-[12].bz2 files.

# It's commonly believed that bzip2 always creates bit-identical compressed
# files for given input file, provided that the block size used is the same.
#
# In fact that's not true. Output file can be different because the Reversible
# Transformation (aka BWT) is sometimes ambiguous. In some cases for given
# input string it's possible to construct two or more different transforms.
#
# The simplest case is a string consisting of two identical characters, for
# instance AA. There exist exactly 2 transformates for this string, namely
# AA with bwt_idx=0 and AA with bwt_idx=1.
#
# bzip2 uses two different algorithms for block-sorting, both of them can
# produce different transformates for the same input, resulting in slightly
# different resulting .bz2 files. The direct reason for that is usage of
# non-stable sorting algorithms, namely shellsort and quicksort.

# The following code first creates input string that has exactly 10 different
# transforms. Then it's compressed with different work factors. The two
# resulting .bz2 files are different (at least for bzip2 1.0.6 and 1.0.5).

(for x in $(seq 10); do echo -n \
A123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
B123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
C123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
D123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
E123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
F123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
G123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
H123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
I123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789\
J123456789b123456789c123456789d123456789e123456789\
f123456789g123456789h123456789i123456789j123456789; done) | bzip2 >incomp-1.bz2

bzcat incomp-1.bz2 | bzip2 --exponential >incomp-2.bz2