diff --git a/Package.swift b/Package.swift index 450f42e7..b335135c 100644 --- a/Package.swift +++ b/Package.swift @@ -24,13 +24,15 @@ let package:Package = .init(name: "swift-png", ], targets: [ - .target(name: "LZ77"), + .target(name: "LZ77", dependencies: + [ + .product(name: "CRC", package: "swift-hash"), + ]), .target(name: "PNG", dependencies: [ .target(name: "LZ77"), - .product(name: "CRC", package: "swift-hash"), ]), .target(name: "PNGInspection", diff --git a/Snippets/GzipCompression.swift b/Snippets/GzipCompression.swift new file mode 100644 index 00000000..4fd131f5 --- /dev/null +++ b/Snippets/GzipCompression.swift @@ -0,0 +1,40 @@ +import LZ77 +import PNG + +let path:String = "Snippets/GzipCompression/example" + +guard +let original:[UInt8] = (System.File.Source.open(path: "\(path).gz") +{ + (source:inout System.File.Source) -> [UInt8]? in + + guard let count:Int = source.count + else + { + return nil + } + return source.read(count: count) +} ?? nil) +else +{ + fatalError("failed to open or read file '\(path).gz'") +} + +var inflator:Gzip.Inflator = .init() +try inflator.push(original[...]) + +let utf8:[UInt8] = inflator.pull() +let text:String = .init(decoding: utf8, as: Unicode.UTF8.self) + +print(text) + +var deflator:Gzip.Deflator = .init(level: 13, exponent: 15, hint: 128 << 10) + deflator.push(utf8[...], last: true) + +let _:Void? = System.File.Destination.open(path: "\(path).txt.gz") +{ + while let part:[UInt8] = deflator.pull() + { + $0.write(part) + } +} diff --git a/Snippets/GzipCompression/example.gz b/Snippets/GzipCompression/example.gz new file mode 100644 index 00000000..3242704a Binary files /dev/null and b/Snippets/GzipCompression/example.gz differ diff --git a/Snippets/GzipCompression/example.txt.gz b/Snippets/GzipCompression/example.txt.gz new file mode 100644 index 00000000..e6113b3e Binary files /dev/null and b/Snippets/GzipCompression/example.txt.gz differ diff --git a/Sources/LZ77/Deflator/LZ77.Deflator.swift b/Sources/LZ77/Deflator/LZ77.Deflator.swift index 8f461270..9f642c3b 100644 --- a/Sources/LZ77/Deflator/LZ77.Deflator.swift +++ b/Sources/LZ77/Deflator/LZ77.Deflator.swift @@ -8,48 +8,21 @@ extension LZ77 struct Deflator { private - var stream:Stream + var buffers:DeflatorBuffers + + public + init(format:LZ77.Format = .zlib, level:Int, exponent:Int = 15, hint:Int = 1 << 12) + { + self.buffers = .init(format: format, level: level, exponent: exponent, hint: hint) + } } } extension LZ77.Deflator { - public - init(format:LZ77.DeflateFormat = .zlib, level:Int, exponent:Int = 15, hint:Int = 1 << 12) - { - let e:Int - switch format - { - case .zlib: e = exponent - case .ios : e = 15 - } - - self.stream = .init(format: format, level: level, exponent: e, hint: hint) - self.stream.start(exponent: e) - } - public mutating func push(_ data:ArraySlice, last:Bool = false) { - // rebase input buffer - if !data.isEmpty - { - self.stream.input.enqueue(contentsOf: data) - } - guard self.stream.input.count > 4096 || last - else - { - return - } - - while let _:Void = self.stream.compress(all: last) - { - self.stream.block(final: false) - } - if last - { - self.stream.block(final: true) - self.stream.checksum() - } + self.buffers.push(data, last: last) } /// Returns a block of compressed data from this deflator, if available. If no compressed @@ -58,13 +31,7 @@ extension LZ77.Deflator public mutating func pull() -> [UInt8]? { - if let complete:[UInt8] = self.pop() - { - return complete - } - - let flushed:[UInt8] = self.stream.output.pull() - return flushed.isEmpty ? nil : flushed + self.buffers.pull() } /// Removes and returns a complete block of compressed data from this deflator, if @@ -72,6 +39,6 @@ extension LZ77.Deflator public mutating func pop() -> [UInt8]? { - self.stream.output.pop() + self.buffers.pop() } } diff --git a/Sources/LZ77/Deflator/LZ77.Deflator.Stream.swift b/Sources/LZ77/Deflator/LZ77.DeflatorBuffers.Stream.swift similarity index 88% rename from Sources/LZ77/Deflator/LZ77.Deflator.Stream.swift rename to Sources/LZ77/Deflator/LZ77.DeflatorBuffers.Stream.swift index 07a5095a..4931db2a 100644 --- a/Sources/LZ77/Deflator/LZ77.Deflator.Stream.swift +++ b/Sources/LZ77/Deflator/LZ77.DeflatorBuffers.Stream.swift @@ -1,89 +1,31 @@ -extension LZ77.Deflator +extension LZ77.DeflatorBuffers { @frozen @usableFromInline struct Stream { - let format:LZ77.DeflateFormat let search:LZ77.DeflatorSearch - var input:LZ77.DeflatorIn - //var queued:(run:Int, extend:Int, distance:Int)? - //var terms:[Term] - var window:LZ77.DeflatorWindow var matches:LZ77.DeflatorMatches + var window:LZ77.DeflatorWindow var output:LZ77.DeflatorOut + var input:LZ77.DeflatorIn - init(format:LZ77.DeflateFormat, level:Int, exponent:Int, hint:Int) + init(search:LZ77.DeflatorSearch, + matches:LZ77.DeflatorMatches, + window:LZ77.DeflatorWindow, + output:LZ77.DeflatorOut, + input:LZ77.DeflatorIn) { - precondition(8 ..< 16 ~= exponent, - "exponent cannot be less than 8 or greater than 15") - - switch level - { - case .min ... 0: - self.search = .greedy(attempts: 1, goal: 6) - case 1: - self.search = .greedy(attempts: 2, goal: 8) - case 2: - self.search = .greedy(attempts: 4, goal: 10) - case 3: - self.search = .greedy(attempts: 40, goal: 24) - - case 4: - self.search = .lazy( attempts: 20, goal: 32) - case 5: - self.search = .lazy( attempts: 40, goal: 54) - case 6: - self.search = .lazy( attempts: 64, goal: 80) - case 7: - self.search = .lazy( attempts: 100, goal: 160) - - case 8: - self.search = .full( attempts: 14, goal: 20, iterations: 1) - case 9: - self.search = .full( attempts: 20, goal: 32, iterations: 2) - case 10: - self.search = .full( attempts: 30, goal: 50, iterations: 3) - case 11: - self.search = .full( attempts: 60, goal: 80, iterations: 4) - case 12: - self.search = .full( attempts: 100, goal: 133, iterations: 5) - default: - self.search = .full( attempts:.max, goal: 258, iterations: 6) - } - // match buffer is either a vector of terms, or a directed-graph - switch self.search - { - case .greedy, .lazy: - self.matches = .terms(capacity: 1 << 15) - case .full: - self.matches = .graph(capacity: 1 << 16) - } - - self.format = format - - self.input = .init() - self.window = .init(exponent: exponent) - self.output = .init(hint: hint) + self.search = search + self.matches = matches + self.window = window + self.output = output + self.input = input } } } -extension LZ77.Deflator.Stream +extension LZ77.DeflatorBuffers.Stream { - mutating - func start(exponent:Int) - { - if case .ios = self.format - { - return - } - - let unpaired:UInt16 = .init(exponent - 8) << 4 | 0x08 - let check:UInt16 = ~((unpaired << 8 | unpaired >> 8) % 31) & 31 - - self.output.append(check << 8 | unpaired, count: 16) - } - mutating func compress(all:Bool) -> Void? { @@ -427,140 +369,8 @@ extension LZ77.Deflator.Stream return nil } - private mutating - func blockStart(final:Bool, runliterals:Int, distances:Int, metatree:LZ77.HuffmanTree) - { - let codelengths:[UInt16] = .init(unsafeUninitializedCapacity: 19) - { - $0.initialize(repeating: 0) - for (length, level):(UInt16, Range) in zip(1 ... 8, metatree.levels) - { - for symbol:UInt8 in metatree.symbols[level] - { - let z:Int = - [ - 3, 17, 15, 13, 11, 9, 7, 5, - 4, 6, 8, 10, 12, 14, 16, 18, - 0, 1, 2 - ][.init(symbol)] - - $0[z] = length - } - } - // max(4, _) because HCLEN cannot be less than 4 - $1 = max(4, $0.reversed().drop{ $0 == 0 }.count) - } - - self.output.append(final ? 0b10_1 : 0b10_0, count: 3) - - self.output.append(.init(runliterals - 257), count: 5) - self.output.append(.init(distances - 1), count: 5) - self.output.append(.init(codelengths.count - 4), count: 4) - for codelength:UInt16 in codelengths - { - self.output.append(codelength, count: 3) - } - } - - private mutating - func blockTables(_ metaterms:[LZ77.DeflatorTerm.Meta], semistatic:LZ77.DeflatorTables) - { - for metaterm:LZ77.DeflatorTerm.Meta in metaterms - { - let codeword:LZ77.Codeword = semistatic[meta: metaterm.symbol] - self.output.append(codeword.bits, count: codeword.length) - self.output.append(metaterm.bits, count: codeword.extra) - } - } - - private mutating - func blockCompressed(semistatic:LZ77.DeflatorTables) - { - switch self.search - { - case .greedy, .lazy: - for index:Int in self.matches.indices - { - let term:LZ77.DeflatorTerm = .init(storage: self.matches[offset: index]) - - let symbol:(runliteral:UInt16, distance:UInt8) = term.symbol - let codeword:(runliteral:LZ77.Codeword, distance:LZ77.Codeword) - - codeword.runliteral = semistatic[runliteral: symbol.runliteral] - - self.output.append(codeword.runliteral.bits, count: codeword.runliteral.length) - - if symbol.runliteral > 256 - { - // there are extra bits and a distance code to follow - let bits:(run:UInt16, distance:UInt16) = term.bits - - codeword.distance = semistatic[distance: symbol.distance] - - self.output.append(bits.run, count: codeword.runliteral.extra) - self.output.append(codeword.distance.bits, count: codeword.distance.length) - self.output.append(bits.distance, count: codeword.distance.extra) - } - } - - // end-of-block symbol - let end:LZ77.Codeword = semistatic[runliteral: 256] - self.output.append(end.bits, count: end.length) - - self.matches.resetTerms() - - case .full: - var index:Int = self.matches.startIndex - while index < self.matches.endIndex - { - let upstream:UInt32 = self.matches[offset: index << 5] - let count:Int = .init(upstream >> 16) - if count == 1 - { - let literal:UInt16 = .init(upstream & 0x00_00_00_ff) - let codeword:LZ77.Codeword = semistatic[runliteral: literal] - self.output.append(codeword.bits, count: codeword.length) - } - else - { - let decade:(run:UInt8, distance:UInt8) = - ( - run: LZ77.Decades[run: count], - distance: .init(truncatingIfNeeded: upstream >> 8) - ) - let offset:UInt16 = - .init(self.matches[offset: index << 5 | (2 + .init(decade.distance))] >> 16) - - let bits:(run:UInt16, distance:UInt16) = - ( - run: .init(count) - LZ77.Composites[run: decade.run].base, - distance: offset - LZ77.Composites[distance: decade.distance].base - ) - - let codeword:(run:LZ77.Codeword, distance:LZ77.Codeword) = - ( - run: semistatic[runliteral: 256 | .init(decade.run)], - distance: semistatic[distance: decade.distance] - ) - - self.output.append(codeword.run.bits, count: codeword.run.length) - self.output.append(bits.run, count: codeword.run.extra) - self.output.append(codeword.distance.bits, count: codeword.distance.length) - self.output.append(bits.distance, count: codeword.distance.extra) - } - - index += count - } - // emit end-of-block code - let end:LZ77.Codeword = semistatic[runliteral: 256] - self.output.append(end.bits, count: end.length) - - self.matches.resetGraph() - } - } - mutating - func block(final:Bool) + func writeBlock(final:Bool) { let tree: ( @@ -674,21 +484,164 @@ extension LZ77.Deflator.Stream tree.meta = .init(frequencies: frequencies, limit: 7) - self.blockStart(final: final, runliterals: r, distances: d, metatree: tree.meta) + self.writeBlockMetadata(tree: tree.meta, + literals: r, + distances: d, + final: final) - let semistatic:LZ77.DeflatorTables = .init( + let tables:LZ77.DeflatorTables = .init( runliteral: tree.runliteral, distance: tree.distance, meta: tree.meta) - self.blockTables(terms, semistatic: semistatic) - self.blockCompressed(semistatic: semistatic) + self.writeBlockTables(tables, terms: terms) + self.writeBlock(with: tables) /* let dicing:LZ77.Deflator.Dicing = .init(self.terms, unit: 1 << 12) self.block(dicing.startIndex, dicing: dicing, last: last) // empty literal buffer self.terms.removeAll(keepingCapacity: true) */ } +} +extension LZ77.DeflatorBuffers.Stream +{ + private mutating + func writeBlockMetadata(tree:LZ77.HuffmanTree, + literals:Int, + distances:Int, + final:Bool) + { + let codelengths:[UInt16] = .init(unsafeUninitializedCapacity: 19) + { + $0.initialize(repeating: 0) + for (length, level):(UInt16, Range) in zip(1 ... 8, tree.levels) + { + for symbol:UInt8 in tree.symbols[level] + { + let z:Int = + [ + 3, 17, 15, 13, 11, 9, 7, 5, + 4, 6, 8, 10, 12, 14, 16, 18, + 0, 1, 2 + ][.init(symbol)] + + $0[z] = length + } + } + // max(4, _) because HCLEN cannot be less than 4 + $1 = max(4, $0.reversed().drop{ $0 == 0 }.count) + } + + self.output.append(final ? 0b10_1 : 0b10_0, count: 3) + + self.output.append(.init(literals - 257), count: 5) + self.output.append(.init(distances - 1), count: 5) + self.output.append(.init(codelengths.count - 4), count: 4) + for codelength:UInt16 in codelengths + { + self.output.append(codelength, count: 3) + } + } + + private mutating + func writeBlockTables(_ tables:LZ77.DeflatorTables, terms:[LZ77.DeflatorTerm.Meta]) + { + for metaterm:LZ77.DeflatorTerm.Meta in terms + { + let codeword:LZ77.Codeword = tables[meta: metaterm.symbol] + self.output.append(codeword.bits, count: codeword.length) + self.output.append(metaterm.bits, count: codeword.extra) + } + } + + private mutating + func writeBlock(with tables:LZ77.DeflatorTables) + { + switch self.search + { + case .greedy, .lazy: + for index:Int in self.matches.indices + { + let term:LZ77.DeflatorTerm = .init(storage: self.matches[offset: index]) + + let symbol:(runliteral:UInt16, distance:UInt8) = term.symbol + let codeword:(runliteral:LZ77.Codeword, distance:LZ77.Codeword) + + codeword.runliteral = tables[runliteral: symbol.runliteral] + + self.output.append(codeword.runliteral.bits, count: codeword.runliteral.length) + + if symbol.runliteral > 256 + { + // there are extra bits and a distance code to follow + let bits:(run:UInt16, distance:UInt16) = term.bits + + codeword.distance = tables[distance: symbol.distance] + + self.output.append(bits.run, count: codeword.runliteral.extra) + self.output.append(codeword.distance.bits, count: codeword.distance.length) + self.output.append(bits.distance, count: codeword.distance.extra) + } + } + + // end-of-block symbol + let end:LZ77.Codeword = tables[runliteral: 256] + self.output.append(end.bits, count: end.length) + + self.matches.resetTerms() + + case .full: + var index:Int = self.matches.startIndex + while index < self.matches.endIndex + { + let upstream:UInt32 = self.matches[offset: index << 5] + let count:Int = .init(upstream >> 16) + if count == 1 + { + let literal:UInt16 = .init(upstream & 0x00_00_00_ff) + let codeword:LZ77.Codeword = tables[runliteral: literal] + self.output.append(codeword.bits, count: codeword.length) + } + else + { + let decade:(run:UInt8, distance:UInt8) = + ( + run: LZ77.Decades[run: count], + distance: .init(truncatingIfNeeded: upstream >> 8) + ) + let offset:UInt16 = .init( + self.matches[offset: index << 5 | (2 + .init(decade.distance))] >> 16) + + let bits:(run:UInt16, distance:UInt16) = + ( + run: .init(count) - LZ77.Composites[run: decade.run].base, + distance: offset - LZ77.Composites[distance: decade.distance].base + ) + + let codeword:(run:LZ77.Codeword, distance:LZ77.Codeword) = + ( + run: tables[runliteral: 256 | .init(decade.run)], + distance: tables[distance: decade.distance] + ) + + self.output.append(codeword.run.bits, count: codeword.run.length) + self.output.append(bits.run, count: codeword.run.extra) + self.output.append(codeword.distance.bits, count: codeword.distance.length) + self.output.append(bits.distance, count: codeword.distance.extra) + } + + index += count + } + // emit end-of-block code + let end:LZ77.Codeword = tables[runliteral: 256] + self.output.append(end.bits, count: end.length) + + self.matches.resetGraph() + } + } +} +extension LZ77.DeflatorBuffers.Stream +{ /* private mutating func block(_ index:Int, dicing:LZ77.Deflator.Dicing, last:Bool) @@ -765,17 +718,18 @@ extension LZ77.Deflator.Stream } */ mutating - func checksum() + func writeLittleEndianUInt32(_ uint32:UInt32) { - if case .ios = self.format - { - return - } - // checksum is written big-endian, which means it has to go into the - // bitstream msb-first - let checksum:UInt32 = self.input.checksum().byteSwapped + self.writeBigEndianUInt32(uint32.byteSwapped) + } + + mutating + func writeBigEndianUInt32(_ uint32:UInt32) + { + let uint32:UInt32 = uint32.bigEndian + self.output.pad(to: UInt8.self) - self.output.append(.init(truncatingIfNeeded: checksum ), count: 16) - self.output.append(.init( checksum >> 16), count: 16) + self.output.append(.init(truncatingIfNeeded: uint32 ), count: 16) + self.output.append(.init( uint32 >> 16), count: 16) } } diff --git a/Sources/LZ77/Deflator/LZ77.DeflatorBuffers.swift b/Sources/LZ77/Deflator/LZ77.DeflatorBuffers.swift new file mode 100644 index 00000000..e1bc1b18 --- /dev/null +++ b/Sources/LZ77/Deflator/LZ77.DeflatorBuffers.swift @@ -0,0 +1,164 @@ +extension LZ77 +{ + @frozen public + struct DeflatorBuffers where Format:LZ77.FormatType + { + var stream:Stream + let format:Format + + private + init(format:Format, stream:Stream) + { + self.format = format + self.stream = stream + } + } +} +extension LZ77.DeflatorBuffers +{ + private + init(format:Format, exponent:Int, level:Int, hint:Int) + { + precondition(8 ..< 16 ~= exponent, + "exponent cannot be less than 8 or greater than 15") + + let search:LZ77.DeflatorSearch = .init(level: level) + + let matches:LZ77.DeflatorMatches + // match buffer is either a vector of terms, or a directed-graph + switch search + { + case .greedy: matches = .terms(capacity: 1 << 15) + case .lazy: matches = .terms(capacity: 1 << 15) + case .full: matches = .graph(capacity: 1 << 16) + } + + self.init(format: format, stream: .init(search: search, + matches: matches, + window: .init(exponent: exponent), + output: .init(hint: hint), + input: .init())) + } +} +extension LZ77.DeflatorBuffers +{ + init(format:LZ77.Format, level:Int, exponent:Int, hint:Int) + { + let header:LZ77.StreamHeader + + switch format + { + case .zlib: header = .init(exponent: exponent) + case .ios : header = .init(exponent: 15) + } + + self.init(format: format, + exponent: header.exponent, + level: level, + hint: hint) + + switch format + { + case .zlib: header.write(&self.stream.output) + case .ios: break + } + } + + mutating + func push(_ data:ArraySlice, last:Bool) + { + // rebase input buffer + if !data.isEmpty + { + self.stream.input.enqueue(contentsOf: data) + } + guard self.stream.input.count > 4096 || last + else + { + return + } + + while let _:Void = self.stream.compress(all: last) + { + self.stream.writeBlock(final: false) + } + if last + { + self.stream.writeBlock(final: true) + + if case .ios = self.format + { + return + } + // checksum is written big-endian, which means it has to go into the + // bitstream msb-first + let checksum:UInt32 = self.stream.input.checksum() + self.stream.writeBigEndianUInt32(checksum) + } + } +} +// TODO: this currently only supports one member. +extension LZ77.DeflatorBuffers +{ + init(format:Gzip.Format, level:Int, exponent:Int, hint:Int) + { + self.init(format: format, + exponent: exponent, + level: level, + hint: hint) + + let header:Gzip.StreamHeader = .init( + flag: (false, false, false, false, false), + xlen: 0) + + header.write(&self.stream.output) + } + + mutating + func push(_ data:ArraySlice, last:Bool) + { + if !data.isEmpty + { + self.stream.input.enqueue(contentsOf: data) + } + guard self.stream.input.count > 4096 || last + else + { + return + } + + while let _:Void = self.stream.compress(all: last) + { + self.stream.writeBlock(final: false) + } + if last + { + self.stream.writeBlock(final: true) + + let checksum:UInt32 = self.stream.input.checksum() + let bytes:UInt32 = self.stream.input.integral.bytes + self.stream.writeLittleEndianUInt32(checksum) + self.stream.writeLittleEndianUInt32(bytes) + } + } +} +extension LZ77.DeflatorBuffers +{ + mutating + func pull() -> [UInt8]? + { + if let complete:[UInt8] = self.pop() + { + return complete + } + + let flushed:[UInt8] = self.stream.output.pull() + return flushed.isEmpty ? nil : flushed + } + + mutating + func pop() -> [UInt8]? + { + self.stream.output.pop() + } +} diff --git a/Sources/LZ77/Deflator/LZ77.DeflatorIn.swift b/Sources/LZ77/Deflator/LZ77.DeflatorIn.swift index feab8689..fbc7757b 100644 --- a/Sources/LZ77/Deflator/LZ77.DeflatorIn.swift +++ b/Sources/LZ77/Deflator/LZ77.DeflatorIn.swift @@ -7,11 +7,11 @@ extension LZ77 var startIndex:Int, endIndex:Int + private(set) + var integral:Integral private var capacity:Int private - var integral:Integral - private var storage:ManagedBuffer init() @@ -25,8 +25,8 @@ extension LZ77 // self.endIndex = 0 self.startIndex = 4 self.endIndex = 4 - self.capacity = capacity self.integral = .init() + self.capacity = capacity } } } diff --git a/Sources/LZ77/Deflator/LZ77.DeflatorSearch.swift b/Sources/LZ77/Deflator/LZ77.DeflatorSearch.swift index bc3151fc..1a6eb832 100644 --- a/Sources/LZ77/Deflator/LZ77.DeflatorSearch.swift +++ b/Sources/LZ77/Deflator/LZ77.DeflatorSearch.swift @@ -8,3 +8,28 @@ extension LZ77 case full(attempts:Int, goal:Int, iterations:Int) } } +extension LZ77.DeflatorSearch +{ + init(level:Int) + { + switch level + { + case .min ... 0: self = .greedy(attempts: 1, goal: 6) + case 1: self = .greedy(attempts: 2, goal: 8) + case 2: self = .greedy(attempts: 4, goal: 10) + case 3: self = .greedy(attempts: 40, goal: 24) + + case 4: self = .lazy(attempts: 20, goal: 32) + case 5: self = .lazy(attempts: 40, goal: 54) + case 6: self = .lazy(attempts: 64, goal: 80) + case 7: self = .lazy(attempts: 100, goal: 160) + + case 8: self = .full(attempts: 14, goal: 20, iterations: 1) + case 9: self = .full(attempts: 20, goal: 32, iterations: 2) + case 10: self = .full(attempts: 30, goal: 50, iterations: 3) + case 11: self = .full(attempts: 60, goal: 80, iterations: 4) + case 12: self = .full(attempts: 100, goal: 133, iterations: 5) + default: self = .full(attempts: .max, goal: 258, iterations: 6) + } + } +} diff --git a/Sources/LZ77/Gzip/Gzip.Deflator.swift b/Sources/LZ77/Gzip/Gzip.Deflator.swift new file mode 100644 index 00000000..77fef54c --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.Deflator.swift @@ -0,0 +1,40 @@ +extension Gzip +{ + @frozen public + struct Deflator + { + private + var buffers:LZ77.DeflatorBuffers + + public + init(level:Int, exponent:Int = 15, hint:Int = 1 << 12) + { + self.buffers = .init(format: .gzip, level: level, exponent: exponent, hint: hint) + } + } +} +extension Gzip.Deflator +{ + public mutating + func push(_ data:ArraySlice, last:Bool = false) + { + self.buffers.push(data, last: last) + } + + /// Returns a block of gzip-compressed data from this deflator, if available. If no + /// compressed data blocks have been completed yet, this method flushes and returns the + /// incomplete block. + public mutating + func pull() -> [UInt8]? + { + self.buffers.pull() + } + + /// Removes and returns a complete block of gzip-compressed data from this deflator, if + /// available. + public mutating + func pop() -> [UInt8]? + { + self.buffers.pop() + } +} diff --git a/Sources/LZ77/Gzip/Gzip.Format.Integral (ext).swift b/Sources/LZ77/Gzip/Gzip.Format.Integral (ext).swift new file mode 100644 index 00000000..65802ac6 --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.Format.Integral (ext).swift @@ -0,0 +1,32 @@ +import CRC + +extension Gzip.Format +{ + @frozen public + struct Integral + { + @usableFromInline + var crc32:CRC32 + @usableFromInline + var bytes:UInt32 + + @inlinable public + init() + { + self.crc32 = .init() + self.bytes = 0 + } + } +} +extension Gzip.Format.Integral:LZ77.StreamIntegral +{ + @inlinable public + var checksum:UInt32 { self.crc32.checksum } + + @inlinable public mutating + func update(from buffer:UnsafePointer, count:Int) + { + self.crc32.update(with: UnsafeBufferPointer.init(start: buffer, count: count)) + self.bytes += UInt32.init(count) + } +} diff --git a/Sources/LZ77/Gzip/Gzip.Format.swift b/Sources/LZ77/Gzip/Gzip.Format.swift new file mode 100644 index 00000000..6ec35e7a --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.Format.swift @@ -0,0 +1,8 @@ +extension Gzip +{ + @frozen public + enum Format:LZ77.FormatType + { + case gzip + } +} diff --git a/Sources/LZ77/Gzip/Gzip.Inflator.swift b/Sources/LZ77/Gzip/Gzip.Inflator.swift new file mode 100644 index 00000000..9d8d0b10 --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.Inflator.swift @@ -0,0 +1,58 @@ +extension Gzip +{ + @frozen public + struct Inflator + { + private + var buffers:LZ77.InflatorBuffers + private + var state:InflatorState + } +} +extension Gzip.Inflator +{ + public + init() + { + self.buffers = .init(format: .gzip) + self.state = .initial + } +} +extension Gzip.Inflator +{ + /// Pushes **compressed** data to the inflator, returning nil once a complete gzip DEFLATE + /// stream has been received. + public mutating + func push(_ data:ArraySlice) throws -> Void? + { + self.buffers.stream.push(data) + + advancing: + do + { + switch try self.buffers.advance(state: self.state) + { + case .terminal?: + return nil + + case let next?: + self.state = next + continue advancing + + case nil: + return () + } + } + } + + public mutating + func pull(_ count:Int) -> [UInt8]? + { + self.buffers.stream.pull(count) + } + public mutating + func pull() -> [UInt8] + { + self.buffers.stream.pull() + } +} diff --git a/Sources/LZ77/Gzip/Gzip.InflatorState.swift b/Sources/LZ77/Gzip/Gzip.InflatorState.swift new file mode 100644 index 00000000..45946ce1 --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.InflatorState.swift @@ -0,0 +1,13 @@ +extension Gzip +{ + @frozen @usableFromInline + enum InflatorState + { + case initial + case strings(skip:Int, count:Int) + case block(LZ77.BlockState) + case checksum + case epilogue + case terminal + } +} diff --git a/Sources/LZ77/Gzip/Gzip.StreamHeader.swift b/Sources/LZ77/Gzip/Gzip.StreamHeader.swift new file mode 100644 index 00000000..7447822a --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.StreamHeader.swift @@ -0,0 +1,97 @@ +extension Gzip +{ + @frozen public + struct StreamHeader + { + let flag:(Bool, Bool, Bool, Bool, Bool) + let xlen:UInt16 + + init(flag:(Bool, Bool, Bool, Bool, Bool), xlen:UInt16) + { + self.flag = flag + self.xlen = xlen + } + } +} +extension Gzip.StreamHeader +{ + static + func read(_ input:inout LZ77.InflatorIn, from bit:inout Int) throws -> Self? + { + guard bit + 80 <= input.count + else + { + return nil + } + + guard + case 0x8b_1f = input[bit] + else + { + throw Gzip.StreamHeaderError.invalidSigil + } + + switch input[bit + 16, count: 8, as: UInt8.self] + { + case 0x08: break + case let code: throw Gzip.StreamHeaderError.invalidCompressionMethod(code) + } + + let flags:UInt8 = input[bit + 24, count: 8, as: UInt8.self] + if flags & 0b1110_0000 != 0 + { + throw Gzip.StreamHeaderError.invalidFlagBits(flags) + } + + let flag:(Bool, Bool, Bool, Bool, Bool) = + ( + flags & 0x01 != 0, + flags & 0x02 != 0, + flags & 0x04 != 0, + flags & 0x08 != 0, + flags & 0x10 != 0 + ) + + // TODO: read MTIME instead of skipping over it + + if flag.1 + { + throw Gzip.StreamHeaderError._headerChecksumUnsupported + } + + if flag.2 + { + guard bit + 96 <= input.count + else + { + // We will need to reparse the header once more data is available. + return nil + } + + // This is little-endian! + let xlen:UInt16 = .init(littleEndian: input[bit + 80]) + + bit += 96 + + return .init(flag: flag, xlen: xlen) + } + else + { + bit += 80 + return .init(flag: flag, xlen: 0) + } + } + + // TODO: this is discarding all the metadata! + func write(_ output:inout LZ77.DeflatorOut) + { + output.append(0x8b_1f, count: 16) + output.append(0x00_08, count: 16) + + // TODO: support MTIME + output.append(0x00_00, count: 16) + output.append(0x00_00, count: 16) + + output.append(0xff_00, count: 16) + } +} diff --git a/Sources/LZ77/Gzip/Gzip.StreamHeaderError.swift b/Sources/LZ77/Gzip/Gzip.StreamHeaderError.swift new file mode 100644 index 00000000..f06de934 --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.StreamHeaderError.swift @@ -0,0 +1,12 @@ +extension Gzip +{ + @frozen public + enum StreamHeaderError:Error, Sendable + { + case invalidSigil + case invalidCompressionMethod(UInt8) + case invalidFlagBits(UInt8) + + case _headerChecksumUnsupported + } +} diff --git a/Sources/LZ77/Gzip/Gzip.swift b/Sources/LZ77/Gzip/Gzip.swift new file mode 100644 index 00000000..6ecf33e3 --- /dev/null +++ b/Sources/LZ77/Gzip/Gzip.swift @@ -0,0 +1,4 @@ +@frozen public +enum Gzip +{ +} diff --git a/Sources/LZ77/Inflator/LZ77.InflatorTables.Meta.swift b/Sources/LZ77/Inflator/LZ77.BlockMetadata.swift similarity index 96% rename from Sources/LZ77/Inflator/LZ77.InflatorTables.Meta.swift rename to Sources/LZ77/Inflator/LZ77.BlockMetadata.swift index 3cc3fc90..96726886 100644 --- a/Sources/LZ77/Inflator/LZ77.InflatorTables.Meta.swift +++ b/Sources/LZ77/Inflator/LZ77.BlockMetadata.swift @@ -1,13 +1,13 @@ -extension LZ77.InflatorTables +extension LZ77 { @frozen @usableFromInline - struct Meta + struct BlockMetadata { private var storage:ManagedBuffer } } -extension LZ77.InflatorTables.Meta +extension LZ77.BlockMetadata { private static var size:Int diff --git a/Sources/LZ77/Inflator/LZ77.BlockState.swift b/Sources/LZ77/Inflator/LZ77.BlockState.swift new file mode 100644 index 00000000..8049d8e8 --- /dev/null +++ b/Sources/LZ77/Inflator/LZ77.BlockState.swift @@ -0,0 +1,11 @@ +extension LZ77 +{ + @frozen @usableFromInline + enum BlockState + { + case metadata + case tables (final:Bool, literals:Int, distances:Int) + case compressed (final:Bool, tables:InflatorTables) + case uncompressed (final:Bool, end:Int) + } +} diff --git a/Sources/LZ77/Inflator/LZ77.BlockType.swift b/Sources/LZ77/Inflator/LZ77.BlockType.swift index c85fc3ba..287ae73f 100644 --- a/Sources/LZ77/Inflator/LZ77.BlockType.swift +++ b/Sources/LZ77/Inflator/LZ77.BlockType.swift @@ -2,8 +2,8 @@ extension LZ77 { enum BlockType { - case dynamic(runliterals:Int, distances:Int) - case fixed - case bytes(Int) + case dynamic (final:Bool, literals:Int, distances:Int) + case fixed (final:Bool) + case bytes (final:Bool, count:Int) } } diff --git a/Sources/LZ77/Inflator/LZ77.DeflateHeader.swift b/Sources/LZ77/Inflator/LZ77.DeflateHeader.swift deleted file mode 100644 index cadf0705..00000000 --- a/Sources/LZ77/Inflator/LZ77.DeflateHeader.swift +++ /dev/null @@ -1,20 +0,0 @@ -extension LZ77 -{ - @frozen public - struct DeflateHeader - { - public - let exponent:Int - - @inlinable public - init(exponent:Int) - { - self.exponent = exponent - } - } -} -extension LZ77.DeflateHeader:LZ77.StreamHeader -{ - public - var window:Int { 1 << self.exponent } -} diff --git a/Sources/LZ77/Inflator/LZ77.Inflator.swift b/Sources/LZ77/Inflator/LZ77.Inflator.swift index 455291d1..ab7eab88 100644 --- a/Sources/LZ77/Inflator/LZ77.Inflator.swift +++ b/Sources/LZ77/Inflator/LZ77.Inflator.swift @@ -7,12 +7,8 @@ extension LZ77 @frozen public struct Inflator { - typealias Format = DeflateFormat - - private - let format:DeflateFormat private - var stream:Stream + var buffers:InflatorBuffers private var state:InflatorState } @@ -20,139 +16,47 @@ extension LZ77 extension LZ77.Inflator { public - init(format:LZ77.DeflateFormat = .zlib) + init(format:LZ77.Format = .zlib) { - self.format = format - self.stream = .init() - self.state = .streamStart + self.buffers = .init(format: format) + self.state = .initial } } extension LZ77.Inflator { - // returns `nil` if the stream is finished + /// Pushes **compressed** data to the inflator, returning nil once a complete zlib DEFLATE + /// stream has been received. public mutating func push(_ data:ArraySlice) throws -> Void? { - self.stream.input.rebase(data, pointer: &self.stream.b) - while let _:Void = try self.advance() - { - } - if case .streamEnd = self.state - { - return nil - } - else + self.buffers.stream.push(data) + + advancing: + do { - return () + switch try self.buffers.advance(state: self.state) + { + case .terminal?: + return nil + + case let next?: + self.state = next + continue advancing + + case nil: + return () + } } } + public mutating func pull(_ count:Int) -> [UInt8]? { - self.stream.output.exclude() - return self.stream.output.release(bytes: count) + self.buffers.stream.pull(count) } public mutating func pull() -> [UInt8] { - self.stream.output.exclude() - return self.stream.output.release() - } - - // returns nil if unable to advance - private mutating - func advance() throws -> Void? - { - // pool cow-exclusions here instead of checking the reference count - // on every loop iteration - self.stream.meta.exclude() - self.stream.output.exclude() - switch self.state - { - case .streamStart: - guard - let header:Format.Header = try self.format.begin(inflating: &self.stream.input, - at: &self.stream.b) - else - { - return nil - } - self.stream.output.window = header.window - self.state = .blockStart - - case .blockStart: - guard - let (final, type):(Bool, LZ77.BlockType) = try self.stream.blockStart() - else - { - return nil - } - - switch type - { - case .dynamic(runliterals: let runliterals, distances: let distances): - self.state = .blockTables(final: final, - runliterals: runliterals, distances: distances) - - case .fixed: - self.state = .blockCompressed(final: final, semistatic: .fixed) - - case .bytes(let count): - // compute endindex - let end:Int = self.stream.output.endIndex + count - self.state = .blockUncompressed(final: final, end: end) - } - - #if DUMP_LZ77_BLOCKS - print("< \(type)") - #endif - - case .blockTables(final: let final, runliterals: let runliterals, distances: let distances): - guard - let (runliteral, distance):(LZ77.HuffmanTree, LZ77.HuffmanTree) = - try self.stream.blockTables(runliterals: runliterals, distances: distances) - else - { - return nil - } - - self.state = .blockCompressed(final: final, - semistatic: .init(runliteral: runliteral, distance: distance)) - - case .blockUncompressed(final: let final, end: let end): - guard - let _:Void = try self.stream.blockUncompressed(end: end) - else - { - return nil - } - self.state = final ? .streamChecksum : .blockStart - - case .blockCompressed(final: let final, semistatic: let semistatic): - guard - let _:Void = try self.stream.blockCompressed(semistatic: semistatic) - else - { - return nil - } - self.state = final ? .streamChecksum : .blockStart - - case .streamChecksum: - guard - let declared:UInt32? = self.format.check(inflating: &self.stream.input, - at: &self.stream.b) - else - { - return nil - } - - try self.stream.check(declared: declared) - self.state = .streamEnd - - case .streamEnd: - return nil - } - - return () + self.buffers.stream.pull() } } diff --git a/Sources/LZ77/Inflator/LZ77.Inflator.Stream.swift b/Sources/LZ77/Inflator/LZ77.InflatorBuffers.Stream.swift similarity index 73% rename from Sources/LZ77/Inflator/LZ77.Inflator.Stream.swift rename to Sources/LZ77/Inflator/LZ77.InflatorBuffers.Stream.swift index 5c8d9593..6c079a75 100644 --- a/Sources/LZ77/Inflator/LZ77.Inflator.Stream.swift +++ b/Sources/LZ77/Inflator/LZ77.InflatorBuffers.Stream.swift @@ -1,17 +1,12 @@ -extension LZ77.Inflator +extension LZ77.InflatorBuffers { @frozen @usableFromInline - struct Stream where Integral:LZ77.StreamIntegral + struct Stream { + var output:LZ77.InflatorOut // Stream.In manages its own COW in rebase(_:pointer:) var input:LZ77.InflatorIn var b:Int - var lengths:[Int] - // Meta and Stream.Out need to have COW manually implemented with - // exclude() on each, to avoid redundant exclusions inside loops,, - // reuse the same buffer since the size is fixed - var meta:LZ77.InflatorTables.Meta - var output:LZ77.InflatorOut #if DUMP_LZ77_BLOCKS || DUMP_LZ77_SYMBOL_HISTOGRAM // histogram, no match can ever cost more than 17 bits per literal @@ -32,18 +27,36 @@ extension LZ77.Inflator init() { - self.b = 0 - self.input = [] - self.lengths = [] - self.meta = .init() self.output = .init() + self.input = [] + self.b = 0 } } } -extension LZ77.Inflator.Stream +extension LZ77.InflatorBuffers.Stream +{ + mutating + func push(_ data:ArraySlice) + { + self.input.rebase(data, pointer: &self.b) + } + mutating + func pull(_ count:Int) -> [UInt8]? + { + self.output.exclude() + return self.output.release(bytes: count) + } + mutating + func pull() -> [UInt8] + { + self.output.exclude() + return self.output.release() + } +} +extension LZ77.InflatorBuffers.Stream { mutating - func blockStart() throws -> (final:Bool, type:LZ77.BlockType)? + func readBlockMetadata(into metadata:inout LZ77.BlockMetadata) throws -> LZ77.BlockType? { guard self.b + 3 <= self.input.count else @@ -53,7 +66,6 @@ extension LZ77.Inflator.Stream // read block header bits let final:Bool = self.input[self.b, count: 1, as: UInt8.self] != 0 - let type:LZ77.BlockType switch self.input[self.b + 1, count: 2, as: UInt8.self] { case 0: @@ -73,12 +85,12 @@ extension LZ77.Inflator.Stream throw LZ77.DecompressionError.invalidBlockElementCountParity(l, m) } - type = .bytes(.init(l)) - self.b = boundary + 32 + self.b = boundary + 32 + return .bytes(final: final, count: .init(l)) case 1: - type = .fixed self.b += 3 + return .fixed(final: final) case 2: guard self.b + 17 <= self.input.count @@ -95,14 +107,14 @@ extension LZ77.Inflator.Stream return nil } - let runliterals:Int = 257 + self.input[self.b + 3, count: 5, as: Int.self] - let distances:Int = 1 + self.input[self.b + 8, count: 5, as: Int.self] + let literals:Int = 257 + self.input[self.b + 3, count: 5, as: Int.self] + let distances:Int = 1 + self.input[self.b + 8, count: 5, as: Int.self] // other counts don’t need to be checked because the number of bits // matches the acceptable range - guard 257 ... 286 ~= runliterals + guard 257 ... 286 ~= literals else { - throw LZ77.DecompressionError.invalidHuffmanRunLiteralSymbolCount(runliterals) + throw LZ77.DecompressionError.invalidHuffmanRunLiteralSymbolCount(literals) } var lengths:[Int] = .init(repeating: 0, count: 19) @@ -118,24 +130,25 @@ extension LZ77.Inflator.Stream throw LZ77.DecompressionError.invalidHuffmanCodelengthHuffmanTable } - self.meta.replace(tree: tree) + metadata.replace(tree: tree) self.b += 17 + 3 * codelengths - type = .dynamic(runliterals: runliterals, distances: distances) + return .dynamic(final: final, literals: literals, distances: distances) case let code: throw LZ77.DecompressionError.invalidBlockTypeCode(code) } - - return (final, type) } + mutating - func blockTables(runliterals:Int, distances:Int) - throws -> (runliteral:LZ77.HuffmanTree, distance:LZ77.HuffmanTree)? + func readBlockTables( + metadata:LZ77.BlockMetadata, + lengths count:(literals:Int, total:Int), + reusing lengths:inout [Int]) throws -> LZ77.InflatorTables? { // code lengths form an unbroken sequence codelengths: - while self.lengths.count < runliterals + distances + while lengths.count < count.total { guard self.b < self.input.count else @@ -143,7 +156,7 @@ extension LZ77.Inflator.Stream return nil } - let meta:LZ77.Metaword = self.meta[.init(truncatingIfNeeded: self.input[self.b])] + let meta:LZ77.Metaword = metadata[.init(truncatingIfNeeded: self.input[self.b])] // if the codeword length is longer than the available input // then we know the match is invalid (due to padding 0-bits) guard self.b + meta.length <= self.input.count @@ -170,13 +183,13 @@ extension LZ77.Inflator.Stream switch meta.symbol { case 0 ..< 16: - self.lengths.append(.init(meta.symbol)) + lengths.append(.init(meta.symbol)) self.b += meta.length continue codelengths case 16: guard - let last:Int = self.lengths.last + let last:Int = lengths.last else { throw LZ77.DecompressionError.invalidHuffmanCodelengthSequence @@ -207,15 +220,15 @@ extension LZ77.Inflator.Stream let repetitions:Int = base + self.input[self.b + meta.length, count: extra, as: Int.self] - self.lengths.append(contentsOf: repeatElement(element, count: repetitions)) + lengths.append(contentsOf: repeatElement(element, count: repetitions)) self.b += meta.length + extra } defer { // important - self.lengths.removeAll(keepingCapacity: true) + lengths.removeAll(keepingCapacity: true) } - guard self.lengths.count == runliterals + distances + guard lengths.count == count.total else { throw LZ77.DecompressionError.invalidHuffmanCodelengthSequence @@ -237,20 +250,20 @@ extension LZ77.Inflator.Stream #endif guard - let runliteral:LZ77.HuffmanTree = .validate( - symbols: 0 ... 287, - lengths: self.lengths.prefix(runliterals)), - let distance:LZ77.HuffmanTree = .validate( - symbols: 0 ... 31, - normalizing: self.lengths.dropFirst(runliterals)) + let literalTree:LZ77.HuffmanTree = .validate(symbols: 0 ... 287, + lengths: lengths.prefix(count.literals)), + let distanceTree:LZ77.HuffmanTree = .validate(symbols: 0 ... 31, + normalizing: lengths.dropFirst(count.literals)) else { throw LZ77.DecompressionError.invalidHuffmanTable } - return (runliteral, distance) + + return .init(literals: literalTree, distances: distanceTree) } + mutating - func blockCompressed(semistatic:LZ77.InflatorTables) throws -> Void? + func readBlock(with tables:LZ77.InflatorTables) throws -> Void? { while self.b < self.input.count { @@ -264,7 +277,7 @@ extension LZ77.Inflator.Stream // ------------------------- // total : 48 bits let first:UInt16 = self.input[self.b] - let runliteral:LZ77.RunLiteral = semistatic[first, as: LZ77.RunLiteral.self] + let runliteral:LZ77.RunLiteral = tables[first, as: LZ77.RunLiteral.self] if runliteral.symbol < 256 { @@ -319,17 +332,17 @@ extension LZ77.Inflator.Stream offset:(extra:Int, base:Int) ) - composite.count = semistatic.composite(decade: runliteral) + composite.count = tables.composite(decade: runliteral) let count:Int = composite.count.base &+ .init(truncatingIfNeeded: slug & ~(.max &<< composite.count.extra)) slug &>>= composite.count.extra let distance:LZ77.Distance = - semistatic[.init(truncatingIfNeeded: slug), as: LZ77.Distance.self] + tables[.init(truncatingIfNeeded: slug), as: LZ77.Distance.self] slug &>>= distance.length - composite.offset = semistatic.composite(decade: distance) + composite.offset = tables.composite(decade: distance) let offset:Int = composite.offset.base &+ .init(truncatingIfNeeded: slug & ~(.max &<< composite.offset.extra)) @@ -366,25 +379,95 @@ extension LZ77.Inflator.Stream } return nil } + mutating - func blockUncompressed(end:Int) throws -> Void? + func readBlock(upTo end:Int) -> Void? { while self.output.endIndex < end { - guard self.b + 8 <= self.input.count + if let byte:UInt8 = self.readByte() + { + self.output.append(byte) + } else { return nil } - self.output.append(self.input[self.b, count: 8, as: UInt8.self]) - self.b += 8 } return () } mutating - func check(declared checksum:UInt32?) throws + func readBigEndianUInt32() -> UInt32? + { + // skip to next byte boundary, read 4 bytes + let boundary:Int = (self.b + 7) & ~7 + if boundary + 32 <= input.count + { + b = boundary + 32 + } + else + { + return nil + } + + // mrc-32 is big-endian + let bytes:(UInt32, UInt32, UInt32, UInt32) = + ( + input[boundary, count: 8, as: UInt32.self], + input[boundary + 8, count: 8, as: UInt32.self], + input[boundary + 16, count: 8, as: UInt32.self], + input[boundary + 24, count: 8, as: UInt32.self] + ) + let checksum:UInt32 = bytes.0 << 24 | + bytes.1 << 16 | + bytes.2 << 8 | + bytes.3 + + return checksum + } + + mutating + func readLittleEndianUInt32() -> UInt32? + { + self.readBigEndianUInt32()?.byteSwapped + } + + mutating + func readString() -> Void? + { + reading: + do + { + switch self.readByte() + { + case nil: return nil + case 0?: return () + case _?: continue reading + } + } + } + + @inline(__always) + mutating + func readByte() -> UInt8? + { + guard self.b + 8 <= self.input.count + else + { + return nil + } + defer + { + self.b += 8 + } + + return self.input[self.b, count: 8, as: UInt8.self] + } + + mutating + func _dumpPerfStats() { #if DUMP_LZ77_BLOCKS let efficiency:Double = self.statistics.literals.enumerated().reduce(0.0){ $0 + .init($1.0 * $1.1) } / @@ -400,20 +483,5 @@ extension LZ77.Inflator.Stream #if DUMP_LZ77_BLOCKS || DUMP_LZ77_SYMBOL_HISTOGRAM print(String.init(histogram: self.statistics.symbols, size: (29, 30), pad: 4)) #endif - - guard - let checksum:UInt32 - else - { - return // Checksum missing. - } - - let computed:UInt32 = self.output.checksum() - if computed != checksum - { - throw LZ77.DecompressionError.invalidStreamChecksum( - declared: checksum, - computed: computed) - } } } diff --git a/Sources/LZ77/Inflator/LZ77.InflatorBuffers.swift b/Sources/LZ77/Inflator/LZ77.InflatorBuffers.swift new file mode 100644 index 00000000..98d887f1 --- /dev/null +++ b/Sources/LZ77/Inflator/LZ77.InflatorBuffers.swift @@ -0,0 +1,231 @@ +extension LZ77 +{ + @frozen @usableFromInline + struct InflatorBuffers where Format:LZ77.FormatType + { + // Reusable buffers + var metadata:BlockMetadata + var buffer:[Int] + var stream:Stream + + let format:Format + + init(format:Format) + { + self.metadata = .init() + self.buffer = [] + self.stream = .init() + self.format = format + } + } +} +extension LZ77.InflatorBuffers +{ + private mutating + func advance(state:LZ77.BlockState) throws -> LZ77.BlockState?? + { + switch state + { + case .metadata: + if let block:LZ77.BlockType = try self.stream.readBlockMetadata( + into: &self.metadata) + { + #if DUMP_LZ77_BLOCKS + defer + { + print("< \(block)") + } + #endif + + switch block + { + case .dynamic (final: let final, literals: let l, distances: let d): + return .tables(final: final, literals: l, distances: d) + + case .fixed (final: let final): + return .compressed(final: final, tables: .fixed) + + case .bytes (final: let final, count: let count): + // compute endindex + let end:Int = self.stream.output.endIndex + count + return .uncompressed(final: final, end: end) + } + } + + case .tables(final: let final, literals: let l, distances: let d): + if let tables:LZ77.InflatorTables = try self.stream.readBlockTables( + metadata: self.metadata, + lengths: (l, l + d), + reusing: &self.buffer) + { + return .compressed(final: final, tables: tables) + } + + case .uncompressed(final: let final, end: let end): + if let _:Void = self.stream.readBlock(upTo: end) + { + return final ? .some(nil) : .metadata + } + + case .compressed(final: let final, tables: let tables): + if let _:Void = try self.stream.readBlock(with: tables) + { + return final ? .some(nil) : .metadata + } + } + + return .none + } +} +extension LZ77.InflatorBuffers +{ + mutating + func advance(state:LZ77.InflatorState) throws -> LZ77.InflatorState? + { + // pool cow-exclusions here instead of checking the reference count + // on every loop iteration + self.metadata.exclude() + self.stream.output.exclude() + + switch state + { + case .initial: + if case .ios = self.format + { + self.stream.output.window = 1 << 15 + return .block(.metadata) + } + else if + let header:LZ77.StreamHeader = try .read(&self.stream.input, + from: &self.stream.b) + { + self.stream.output.window = 1 << header.exponent + return .block(.metadata) + } + + case .block(let block): + if let next:LZ77.BlockState? = try self.advance(state: block) + { + return next.map { .block($0) } ?? .checksum + } + + case .checksum: + self.stream._dumpPerfStats() + + if case .ios = self.format + { + return .terminal + } + else if + let declared:UInt32 = self.stream.readBigEndianUInt32() + { + let computed:UInt32 = self.stream.output.checksum() + if computed != declared + { + throw LZ77.DecompressionError.invalidStreamChecksum( + declared: declared, + computed: computed) + } + return .terminal + } + + case .terminal: + break + } + + return nil + } +} +extension LZ77.InflatorBuffers +{ + mutating + func advance(state:Gzip.InflatorState) throws -> Gzip.InflatorState? + { + // pool cow-exclusions here instead of checking the reference count + // on every loop iteration + self.metadata.exclude() + self.stream.output.exclude() + + switch state + { + case .initial: + if let header:Gzip.StreamHeader = try .read(&self.stream.input, + from: &self.stream.b) + { + self.stream.output.window = 1 << 15 + + var count:Int = 0 + + if header.flag.3 + { + count += 1 + } + if header.flag.4 + { + count += 1 + } + + guard header.xlen == 0, count == 0 + else + { + return .strings(skip: 8 * Int.init(header.xlen), count: count) + } + + return .block(.metadata) + } + + case .strings(skip: let skip, count: var count): + if skip == 0 + { + precondition(count > 0) + + if case ()? = self.stream.readString() + { + count -= 1 + } + else + { + break + } + + return count == 0 ? .block(.metadata) : .strings(skip: 0, count: count) + } + else if + self.stream.b + skip <= self.stream.input.count + { + self.stream.b += skip + return count == 0 ? .block(.metadata) : .strings(skip: 0, count: count) + } + + case .block(let block): + if let next:LZ77.BlockState? = try self.advance(state: block) + { + return next.map { .block($0) } ?? .checksum + } + + case .checksum: + if let declared:UInt32 = self.stream.readLittleEndianUInt32() + { + let computed:UInt32 = self.stream.output.checksum() + if computed != declared + { + throw LZ77.DecompressionError.invalidStreamChecksum( + declared: declared, + computed: computed) + } + return .epilogue + } + + case .epilogue: + if let _:UInt32 = self.stream.readLittleEndianUInt32() + { + return .terminal + } + + case .terminal: + preconditionFailure("Attempted to advance past terminal state!") + } + + return nil + } +} diff --git a/Sources/LZ77/Inflator/LZ77.InflatorIn.swift b/Sources/LZ77/Inflator/LZ77.InflatorIn.swift index f3b4140c..c33ba537 100644 --- a/Sources/LZ77/Inflator/LZ77.InflatorIn.swift +++ b/Sources/LZ77/Inflator/LZ77.InflatorIn.swift @@ -137,7 +137,13 @@ extension LZ77.InflatorIn } } - /// Returns bits in the low end of the returned integer. + /// Returns bits in the low end of the returned integer. The maximum meaningful bit `count` + /// is 16. + /// + /// The best way to think about the bit order is to imagine the bitstream as a single, + /// arbitrarily-precision integer. This means if you load a slice of the integer into a + /// ``UInt16``, the most-significant bits in the result will correspond to the bits that + /// appear later in the bitstream. /// /// ```text /// { b.15, b.14, b.13, b.12, b.11, b.10, b.9, b.8, b.7, b.6, b.5, b.4, b.3, b.2, b.1, b.0 } diff --git a/Sources/LZ77/Inflator/LZ77.InflatorState.swift b/Sources/LZ77/Inflator/LZ77.InflatorState.swift index d7fe9f52..da7c55a7 100644 --- a/Sources/LZ77/Inflator/LZ77.InflatorState.swift +++ b/Sources/LZ77/Inflator/LZ77.InflatorState.swift @@ -3,12 +3,9 @@ extension LZ77 @frozen @usableFromInline enum InflatorState { - case streamStart - case blockStart - case blockTables(final:Bool, runliterals:Int, distances:Int) - case blockUncompressed(final:Bool, end:Int) - case blockCompressed(final:Bool, semistatic:InflatorTables) - case streamChecksum - case streamEnd + case initial + case block(LZ77.BlockState) + case checksum + case terminal } } diff --git a/Sources/LZ77/Inflator/LZ77.InflatorTables.swift b/Sources/LZ77/Inflator/LZ77.InflatorTables.swift index c2be5097..575b5db1 100644 --- a/Sources/LZ77/Inflator/LZ77.InflatorTables.swift +++ b/Sources/LZ77/Inflator/LZ77.InflatorTables.swift @@ -64,11 +64,11 @@ extension LZ77 } extension LZ77.InflatorTables { - init(runliteral:LZ77.HuffmanTree, distance:LZ77.HuffmanTree) + init(literals:LZ77.HuffmanTree, distances:LZ77.HuffmanTree) { let start:Int = 256 + MemoryLayout.stride * 64 - let offset:Int = start + MemoryLayout.stride * runliteral.size.z - let size:Int = offset + MemoryLayout.stride * distance.size.z + let offset:Int = start + MemoryLayout.stride * literals.size.z + let size:Int = offset + MemoryLayout.stride * distances.size.z self.storage = .create(minimumCapacity: size){ _ in () } self.storage.withUnsafeMutablePointerToElements { @@ -83,18 +83,18 @@ extension LZ77.InflatorTables } // write huffman tables (base + start).withMemoryRebound(to: LZ77.RunLiteral.self, - capacity: runliteral.size.z) + capacity: literals.size.z) { - runliteral.table(initializing: $0) + literals.table(initializing: $0) } (base + offset).withMemoryRebound(to: LZ77.Distance.self, - capacity: distance.size.z) + capacity: distances.size.z) { - distance.table(initializing: $0) + distances.table(initializing: $0) } } - self.fence = (runliteral: runliteral.size.n, distance: distance.size.n) + self.fence = (runliteral: literals.size.n, distance: distances.size.n) self.offset = offset } @@ -166,5 +166,5 @@ extension LZ77.InflatorTables } static - let fixed:Self = .init(runliteral: .runliteral, distance: .distance) + let fixed:Self = .init(literals: .runliteral, distances: .distance) } diff --git a/Sources/LZ77/Inflator/LZ77.StreamHeader.swift b/Sources/LZ77/Inflator/LZ77.StreamHeader.swift new file mode 100644 index 00000000..bf6bfcab --- /dev/null +++ b/Sources/LZ77/Inflator/LZ77.StreamHeader.swift @@ -0,0 +1,63 @@ +extension LZ77 +{ + struct StreamHeader + { + let exponent:Int + + init(exponent:Int) + { + self.exponent = exponent + } + } +} +extension LZ77.StreamHeader +{ + static + func read(_ input:inout LZ77.InflatorIn, from bit:inout Int) throws -> Self? + { + // read stream header + guard bit + 16 <= input.count + else + { + return nil + } + + switch input[bit + 0, count: 4, as: UInt8.self] + { + case 0x08: break + case let code: throw LZ77.StreamHeaderError.invalidCompressionMethod(code) + } + + let e:Int = input[bit + 4, count: 4, as: Int.self] + + guard e < 8 + else + { + throw LZ77.StreamHeaderError.invalidWindowSize(exponent: e + 8) + } + + let flags:Int = input[bit + 8, count: 8, as: Int.self] + guard (e << 12 | 8 << 8 + flags) % 31 == 0 + else + { + throw LZ77.StreamHeaderError.invalidCheckBits + } + guard flags & 0x20 == 0 + else + { + throw LZ77.StreamHeaderError.unexpectedDictionary + } + + bit += 16 + + return .init(exponent: 8 + e) + } + + func write(_ output:inout LZ77.DeflatorOut) + { + let unpaired:UInt16 = .init(self.exponent - 8) << 4 | 0x08 + let check:UInt16 = ~((unpaired << 8 | unpaired >> 8) % 31) & 31 + + output.append(check << 8 | unpaired, count: 16) + } +} diff --git a/Sources/LZ77/Inflator/LZ77.DeflateHeaderError.swift b/Sources/LZ77/Inflator/LZ77.StreamHeaderError.swift similarity index 95% rename from Sources/LZ77/Inflator/LZ77.DeflateHeaderError.swift rename to Sources/LZ77/Inflator/LZ77.StreamHeaderError.swift index e6c863c8..69b20d1c 100644 --- a/Sources/LZ77/Inflator/LZ77.DeflateHeaderError.swift +++ b/Sources/LZ77/Inflator/LZ77.StreamHeaderError.swift @@ -3,7 +3,7 @@ extension LZ77 /// Errors that can occur when decompressing a DEFLATE stream embedded in the ‘zlib’ wrapper /// format. public - enum DeflateHeaderError:Error, Equatable + enum StreamHeaderError:Error, Equatable { /// A compressed data stream had an invalid compression method code. /// diff --git a/Sources/LZ77/Wrappers/LZ77.DeflateFormat.swift b/Sources/LZ77/Wrappers/LZ77.DeflateFormat.swift deleted file mode 100644 index ee840cea..00000000 --- a/Sources/LZ77/Wrappers/LZ77.DeflateFormat.swift +++ /dev/null @@ -1,102 +0,0 @@ -extension LZ77 -{ - @available(*, deprecated, renamed: "DeflateFormat") - public - typealias Format = DeflateFormat - - @frozen public - enum DeflateFormat - { - case zlib - case ios - } -} -extension LZ77.DeflateFormat:LZ77.StreamFormat -{ - public - typealias Integral = LZ77.MRC32 - - public - func begin(inflating input:inout LZ77.InflatorIn, - at bit:inout Int) throws -> LZ77.DeflateHeader? - { - if case .ios = self - { - return .init(exponent: 15) - } - - // read stream header - guard bit + 16 <= input.count - else - { - return nil - } - - switch input[bit + 0, count: 4, as: UInt8.self] - { - case 8: - break - case let code: - throw LZ77.DeflateHeaderError.invalidCompressionMethod(code) - } - - let e:Int = input[bit + 4, count: 4, as: Int.self] - - guard e < 8 - else - { - throw LZ77.DeflateHeaderError.invalidWindowSize(exponent: e + 8) - } - - let flags:Int = input[bit + 8, count: 8, as: Int.self] - guard (e << 12 | 8 << 8 + flags) % 31 == 0 - else - { - throw LZ77.DeflateHeaderError.invalidCheckBits - } - guard flags & 0x20 == 0 - else - { - throw LZ77.DeflateHeaderError.unexpectedDictionary - } - - bit += 16 - - return .init(exponent: 8 + e) - } - - public - func check(inflating input:inout LZ77.InflatorIn, at bit:inout Int) -> UInt32?? - { - if case .ios = self - { - return .some(nil) - } - - // skip to next byte boundary, read 4 bytes - let boundary:Int = (bit + 7) & ~7 - if boundary + 32 <= input.count - { - bit = boundary + 32 - } - else - { - return .none - } - - // mrc-32 is big-endian - let bytes:(UInt32, UInt32, UInt32, UInt32) = - ( - input[boundary, count: 8, as: UInt32.self], - input[boundary + 8, count: 8, as: UInt32.self], - input[boundary + 16, count: 8, as: UInt32.self], - input[boundary + 24, count: 8, as: UInt32.self] - ) - let checksum:UInt32 = bytes.0 << 24 | - bytes.1 << 16 | - bytes.2 << 8 | - bytes.3 - - return .some(checksum) - } -} diff --git a/Sources/LZ77/Wrappers/LZ77.Format.swift b/Sources/LZ77/Wrappers/LZ77.Format.swift new file mode 100644 index 00000000..b1c113fa --- /dev/null +++ b/Sources/LZ77/Wrappers/LZ77.Format.swift @@ -0,0 +1,54 @@ +extension LZ77 +{ + @available(*, deprecated, renamed: "Format") + public + typealias DeflateFormat = Format + + @frozen public + enum Format + { + case zlib + case ios + } +} +extension LZ77.Format:LZ77.FormatType +{ + public + typealias Integral = LZ77.MRC32 +} +extension LZ77.Format +{ + func check(inflating input:inout LZ77.InflatorIn, at bit:inout Int) -> UInt32?? + { + if case .ios = self + { + return .some(nil) + } + + // skip to next byte boundary, read 4 bytes + let boundary:Int = (bit + 7) & ~7 + if boundary + 32 <= input.count + { + bit = boundary + 32 + } + else + { + return .none + } + + // mrc-32 is big-endian + let bytes:(UInt32, UInt32, UInt32, UInt32) = + ( + input[boundary, count: 8, as: UInt32.self], + input[boundary + 8, count: 8, as: UInt32.self], + input[boundary + 16, count: 8, as: UInt32.self], + input[boundary + 24, count: 8, as: UInt32.self] + ) + let checksum:UInt32 = bytes.0 << 24 | + bytes.1 << 16 | + bytes.2 << 8 | + bytes.3 + + return .some(checksum) + } +} diff --git a/Sources/LZ77/Wrappers/LZ77.FormatType.swift b/Sources/LZ77/Wrappers/LZ77.FormatType.swift new file mode 100644 index 00000000..8838359a --- /dev/null +++ b/Sources/LZ77/Wrappers/LZ77.FormatType.swift @@ -0,0 +1,13 @@ +extension LZ77 +{ + public + typealias FormatType = _LZ77FormatType +} +public +protocol _LZ77FormatType +{ + associatedtype Integral:LZ77.StreamIntegral + + // func begin(inflating input:inout LZ77.InflatorIn, at bit:inout Int) throws -> Header? + // func check(inflating input:inout LZ77.InflatorIn, at bit:inout Int) -> UInt32?? +} diff --git a/Sources/LZ77/Wrappers/LZ77.StreamFormat.swift b/Sources/LZ77/Wrappers/LZ77.StreamFormat.swift deleted file mode 100644 index f9499fe3..00000000 --- a/Sources/LZ77/Wrappers/LZ77.StreamFormat.swift +++ /dev/null @@ -1,14 +0,0 @@ -extension LZ77 -{ - public - typealias StreamFormat = _LZ77StreamFormat -} -public -protocol _LZ77StreamFormat -{ - associatedtype Integral:LZ77.StreamIntegral - associatedtype Header:LZ77.StreamHeader - - func begin(inflating input:inout LZ77.InflatorIn, at bit:inout Int) throws -> Header? - func check(inflating input:inout LZ77.InflatorIn, at bit:inout Int) -> UInt32?? -} diff --git a/Sources/LZ77/Wrappers/LZ77.StreamHeader.swift b/Sources/LZ77/Wrappers/LZ77.StreamHeader.swift deleted file mode 100644 index 155010c0..00000000 --- a/Sources/LZ77/Wrappers/LZ77.StreamHeader.swift +++ /dev/null @@ -1,10 +0,0 @@ -extension LZ77 -{ - public - typealias StreamHeader = _LZ77StreamHeader -} -public -protocol _LZ77StreamHeader -{ - var window:Int { get } -} diff --git a/Sources/PNG/Decoding/PNG.Decoder.swift b/Sources/PNG/Decoding/PNG.Decoder.swift index dba9ed71..a02f65f4 100644 --- a/Sources/PNG/Decoding/PNG.Decoder.swift +++ b/Sources/PNG/Decoding/PNG.Decoder.swift @@ -33,7 +33,7 @@ extension PNG.Decoder self.pass = interlaced ? 0 : nil self.continue = () - let format:LZ77.DeflateFormat + let format:LZ77.Format switch standard { case .common: format = .zlib diff --git a/Sources/PNG/Encoding/PNG.Encoder.swift b/Sources/PNG/Encoding/PNG.Encoder.swift index d1ea088b..f17d3cd1 100644 --- a/Sources/PNG/Encoding/PNG.Encoder.swift +++ b/Sources/PNG/Encoding/PNG.Encoder.swift @@ -24,7 +24,7 @@ extension PNG.Encoder self.row = nil self.pass = interlaced ? .subimage(0) : .image - let format:LZ77.DeflateFormat + let format:LZ77.Format switch standard { case .common: format = .zlib diff --git a/Sources/PNG/LZ77.DeflateHeaderError (ext).swift b/Sources/PNG/LZ77.DeflateHeaderError (ext).swift index 518a1a45..5c45aa9e 100644 --- a/Sources/PNG/LZ77.DeflateHeaderError (ext).swift +++ b/Sources/PNG/LZ77.DeflateHeaderError (ext).swift @@ -1,6 +1,6 @@ import LZ77 -extension LZ77.DeflateHeaderError:PNG.Error +extension LZ77.StreamHeaderError:PNG.Error { /// The string `"Stream header error"`. public static