Source: Explosives in Cyberspace
Part 1: A file is compressed by including compression markers of the form
(#x#)...
where the first number tells how many characters to copy and the second is how many times to copy them. SoA(2x3)HA!
becomesAHAHAHA!
.
For part 1, we could likely just expand the actual strings. But I’m guessing part 2 is going to do something crazy recursive. So let’s just calculate the length directly.
re_compressed_block = re.compile(r'\((?P<length>\d+)x(?P<count>\d+)\)')
def decompressed_length(content):
index = 0
output_length = 0
while True:
m = re_compressed_block.search(content, pos = index)
if m:
# Content before the current block
output_length += m.start() - index
# Expanded content
length = int(m.group('length'))
count = int(m.group('count'))
output_length += length * count
# Skip past this block for the next iteration
index = m.end() + length
else:
break
output_length += len(content) - index
return output_length
if os.path.exists(args.input):
with open(args.input, 'r') as fin:
content = re.sub('\s+', '', fin.read())
else:
content = args.input
print('v1:', decompressed_length(content, 1))
Part 2: If a decompressed string contains a marker, decompress that as well. Repeat.
Called that.
re_compressed_block = re.compile(r'\((?P<length>\d+)x(?P<count>\d+)\)')
def decompressed_length(content, version):
index = 0
output_length = 0
while True:
m = re_compressed_block.search(content, pos = index)
if m:
# Content before the current block
output_length += m.start() - index
# The current block, expand recursively for version 2
length = int(m.group('length'))
count = int(m.group('count'))
if version == 1:
output_length += length * count
elif version == 2:
block = content[m.end() : m.end() + length]
expanded_block_length = decompressed_length(block, version = 2)
output_length += expanded_block_length * count
# Skip past this block for the next iteration
index = m.end() + length
else:
break
output_length += len(content) - index
return output_length
print('v1:', decompressed_length(content, 1))
print('v2:', decompressed_length(content, 2))
My output for v2 was over 10 billion, so keeping that all in memory is unlikely to have ended well…