From 93ad782af6977cb25d43d50f4d05f5f53a7a0b69 Mon Sep 17 00:00:00 2001 From: taylorswift Date: Thu, 1 Feb 2024 04:07:32 +0000 Subject: [PATCH] fix byte order bug in gzip header parser, add gzip compression example --- Snippets/GzipCompression.swift | 18 +++++++++++++++--- Snippets/GzipCompression/example.txt.gz | Bin 0 -> 4828 bytes Sources/LZ77/Gzip/Gzip.Deflator.swift | 2 +- Sources/LZ77/Gzip/Gzip.StreamHeader.swift | 10 +++++----- Sources/LZ77/Inflator/LZ77.InflatorIn.swift | 8 +++++++- 5 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 Snippets/GzipCompression/example.txt.gz diff --git a/Snippets/GzipCompression.swift b/Snippets/GzipCompression.swift index 129198cb..4fd131f5 100644 --- a/Snippets/GzipCompression.swift +++ b/Snippets/GzipCompression.swift @@ -4,7 +4,7 @@ import PNG let path:String = "Snippets/GzipCompression/example" guard -let gzipped:[UInt8] = (System.File.Source.open(path: "\(path).gz") +let original:[UInt8] = (System.File.Source.open(path: "\(path).gz") { (source:inout System.File.Source) -> [UInt8]? in @@ -21,8 +21,20 @@ else } var inflator:Gzip.Inflator = .init() -try inflator.push(gzipped[...]) +try inflator.push(original[...]) -let text:String = .init(decoding: inflator.pull(), as: Unicode.UTF8.self) +let utf8:[UInt8] = inflator.pull() +let text:String = .init(decoding: utf8, as: Unicode.UTF8.self) print(text) + +var deflator:Gzip.Deflator = .init(level: 13, exponent: 15, hint: 128 << 10) + deflator.push(utf8[...], last: true) + +let _:Void? = System.File.Destination.open(path: "\(path).txt.gz") +{ + while let part:[UInt8] = deflator.pull() + { + $0.write(part) + } +} diff --git a/Snippets/GzipCompression/example.txt.gz b/Snippets/GzipCompression/example.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6113b3e817ce6de90c1d91caebdf30977166316 GIT binary patch literal 4828 zcmV<25+m&&iwFP!00000|CCkBjvGe|yz47?Tmu8aa^A@)**HJ|8!)gyeCBj>G=zJa zN}`@jy~u%no1^Bbki65-YM z$1|=yI$yNT^r4iIQieMo0ORQe)Jz`w6ChmH!01~K2TX-s4Vd6cjX(1hv@-qT zn(2z-sy(HH|58B#zcj$xTQ{H&Qj^m@J;5)MCKY%d*^Sn|d5Ynxw-Ga7Ddb|^|= zr)S~y$P9qg}7(Hmp0#nZWfq4{l~v!oAgZnV3&vQ(Y=#&F+Y+Eaq}l0MvG8j7j2j49=3 zBvNx*r3niq#$b6t$aKJjg<#-7Aj!US*8n-I7SFDaZ;jWYz-nc11~0Bo6pJ4I?&7eD zb8~*gqXtzaNxUD624bHumhK#l2_Jc?6VPL!Sl}QzYiK)|N3yP#Y!se!`mcg%NZMJh zeWu?l#S%`Pnqkw0A=r&`@vK|*NGID~t(CLSy1!pqD_0}AcKHlANC+}>+<*U+V|{H} z^V;yHdZJuI$uK0D03)FHY^%3O2c80HReNh2e!AeY6xJ40C{65V#!Bf#nmF-A^&G}$ip|V58@*6$EufJZM*7N}TX6YunpcxTICYcE?h)iUYWIovk%V{oBCiQwMp!G`K-*{ znt@5y=>nu#0`AQNp*LhPGkJ8P^Zls47&%ot*Ry33pH}-dWGzL0v^|bEhDh2$JKLqP z?kTo7ef8tJAJ#eZVMv^e7iQr{6G_TxPo6s4TH}f%=eOEa)pnEfRNsOJ7_@^jJWR@d z3t_MSs;W=sj9{R<_M=h=BkJ0<-Qnihf(Bw3jZ5QA7m1qM2Y*8^tsB@ry+U3a$dpU# z6FEn10^a%%o?ub<+F%NO^a~7^ z(VDd+EWhM3Tx{UDviC&YbKa&IYo9xq3H<-DceTYp_NnWrO-TE%CQ()W`FP?yg^x<- zd#qBP)$l~(9u=4Z2x4Zp-cuWnR8u}%oT3t=T9{rd)g+n^c%lg-ry{w07DI7ii$U$P z_GcJAO7Est*oFfs)0@SJ73mnB zvP0Kp9*$B~qpZtH%rO0Py|QaCl1}C@nCZjDgA~|^nS9_o2& z{)kT)cJ-^^Dm(-P4I4{$N{^3b1V%kU)rG7e)(Rn9JwHgS0G7gc% z^?urvVyT)^!}Shx!fvx|kR@rT0I|ff)%98azugEo)eafvK6ti!>qJ6VyA9r8XFT8O zjH}~9Ref}B-Uyc5e{nD_Cso9-%jV|d1rv2Km8!#){I*aD9$*EFL(qoqX0UWJLn zP$yM}Xt#3T!iRtV{%6LjOJ9)U?INA$uc=k_yRcS4RhJTaz6}<=HeXRUi#-Uvz6&B* z|B-Gry^-Ta^sZl_;~J0~yX*1pCbt2SAPZ!3$Rx;_EwR-ctH};Y+3EcH;8n5Yq9w&+ zQ%WS9Cr-FB+Yp0K>Y)0b^k)e=vPAS-@`63jN6UYq~NhlDd4b-*s|${ zdd#DWGZ|Z*O&pPGR8_?pJs};`u98E2hYEX(5vWUmbgKisWAt4i?iS#b%ZkAPpj)4L z9Wg@Bjv}16f~*1l+0rGldTpc<8$)Dk{jh~@fyAbHI6Tx=yDD0mM)bw;0(w-ct6!Fn zh`v;Yexn9Fg2k?(gvJnB=UQQFqObjxtx^l)02gY!x6MBbRjs2=@Cc1$YU>=&?B1p% zlGeVZgVEK8W6;1!FYD4_S{dv5vukUm7uT{Am6?JVqq3qltaW%n5zQ~U*oG?;FQ+*o zneIcbeW8To(+3Z6UsgocPgKsh9#Bz-aIP|Hp8&ec#`jiGE&G~4RI~vlIVucM9(Ga< z$#0yHm|Gi3`WF+C;)%gh>@Pi=oT21?yAx8{4rE@=e<=TTEn{!-6FM)SaPvSM>ZAd| z?!#~jUNqd=Cm&vXdos+*IB)uXj;42DD<6P9KQpWnANl+Wy#J1CQz76SLIT)k5~6rP zkkD+}lO-SRl%mpnWm3ZV!CpM*!4O4R_;H-vDA*)uzpInZhR^>wyx3=mv4r#h%ck-? zc#b|e`q7@vbYf{?H$nciQTZq=L*$ansK>%IXC$P4vW97StRt)apGWQX&f2=fI^<}> z(J*+sWM84?%or^+E_evt(=Bc+Z%%%q?%+2ol4?ceM;PqaTlE=T=B zcai2C^hKiAm8|r~c2oX}(MsV`!q2peW(Srov2-=gmChHVdqi;uR`FeykU7B2i-D)+94;oip3=S}424>D+A&s_ z6I+A|hrCMYRa)fGvu5kc-y{~8yWLTh)=+eI9?G`^%#`6>DtM)9XFu7*RyF(Vrgano z@9uh+CyogxM=M`dkvM=e_tS|17cxHhF*rX86YtjCIVlY(g%ah7VyxI68X2ONo|_HO z)T$P_M#tsxPv5tLxuw>ekcAgkg7kYd{S#P>(7tYYXnzaQ2Hb`|#fP-Ib<(Ya^7))i zA3vcWP?xZX$#kl{v&r9kEK zhz2Cp@SbHlF=~ITZZxx zO}WiJ+jvjQijDR$q*VTLhwx`dY{Zyhi!v@P$myoKl=vlr>1l+07tVood(-11`g4h5 z+sooDDe`ea4vXn7LwGJzDGAXEV8Bx~VP(`vu!mwbfSK6I;}0DaTK9e0I-PKRGl% zp?(&m10DRc0G+1UDh;Iw-uN{8JDS=C9*LH=>NAEpFBnQ->yz_?G$|2IBW56mPTEPp z7MvbozKRv>wgy;acV`vFymbWhf{AajfCmYaFR?!xpScBlay*)%qnxzyArPVmn>PG{ zvm_ie!lctvYnF=UU$dDUBSz`tN@2Gnn;IVYa>m%q=2Q?}f-UmnLU|@ffE||;-}#W= z)&_ToAM?d(+6gOFZ-iSc&@m``;~3!eA(klvoYy4%t_}lPLk{h=c*7 zmEb95R2dXeBO@@3GMU)B0ezaOT&zbgsoc_FE@rgSPeWCV!`S$q=gB&Je+r2Z?#boU z;TLT;hA;pqc1%>I9IjV0+K!UwoGqiBIdbi6rqk@n+#sY*2&`1Bg41f5`O_e!D4Yw> z;6^K7fC{qA3fn<^k?IU7a(Sif%oeZCt1CX)nVjtkY@3662!#yRpdlM^e<^rkFuu<5 z#sw^LxxyPuTUgmuT|MX?(qvwiVj|H7O3Xw11t>*VpcoxxQ87G8oANk}bm{dOm8tAn zN+IH0Qv1~PF{dbIlV%s@j`oVZGBd$zcx{RELqh8I#1tIo0V%|{jKNPFQ<10T}&9~w%N@-|vCBUo8 z&}^LCsN-?i)8zVmA5Qmlmr{Zx#6}Qrqn_eWAHPP%EJj(3A{LIwP8!LPf7EF2(ERSu z{^n9|r^$F5=2Z7L>y~YSyEZF`1NnO!J;W^3+Lz^xrjh@j z`BRwvAaq^TxJlW*m^ds1g;B`Pc{MtK=CLPMQ;pMaZG|3uJ!j6dY!vy>WlCq7hL+LO zH5$OofIMC%i$;fFZjc-i5oVXjE95mlUyhbFv8d6Hwo6m{rY_oT7v#dut! ztTwnk-S}hAOv|Be>o(h*uui59cc)bxiy5sq7psqmO-hpl+K~XNng+BYoVN5LBS3{g z;ZZLZS|pMsrRB-=MkU)FY4?-vG5T?5lHa9oYMMzCT{R<=UH)HN@3nk)EVz6BABa|W zB`l=;57MELcR#oHYw2Yfx4*c5jU=;v)1{I>{#&^jtRM^nLG<3Q@Tdn9@#s~05TX?P z0wGmHqbY6d_sb;NbZIZ`2W-NUT^QbE@L=;DoJInvgFzq;2$Di(Jrz_6@g=m!_FgmE z1K1nr7I*yJ7rkxX-nacp%1^vjxb7vdm6Atskb*IJoSvzC5PQApNRqcdKMAXGa+dBV zV9srbS(SCk-P&S)qyM2U=kj(hiCG8wYwH198&p4#&n~JTMuu4&!, last:Bool = false) { self.buffers.push(data, last: last) diff --git a/Sources/LZ77/Gzip/Gzip.StreamHeader.swift b/Sources/LZ77/Gzip/Gzip.StreamHeader.swift index 7e0d896b..7447822a 100644 --- a/Sources/LZ77/Gzip/Gzip.StreamHeader.swift +++ b/Sources/LZ77/Gzip/Gzip.StreamHeader.swift @@ -25,7 +25,7 @@ extension Gzip.StreamHeader } guard - case 0x1f_8b = input[bit, count: 16, as: UInt16.self] + case 0x8b_1f = input[bit] else { throw Gzip.StreamHeaderError.invalidSigil @@ -69,7 +69,7 @@ extension Gzip.StreamHeader } // This is little-endian! - let xlen:UInt16 = input[bit + 80, count: 16, as: UInt16.self].byteSwapped + let xlen:UInt16 = .init(littleEndian: input[bit + 80]) bit += 96 @@ -85,13 +85,13 @@ extension Gzip.StreamHeader // TODO: this is discarding all the metadata! func write(_ output:inout LZ77.DeflatorOut) { - output.append(0x1f_8b, count: 16) - output.append(0x08_00, count: 16) + output.append(0x8b_1f, count: 16) + output.append(0x00_08, count: 16) // TODO: support MTIME output.append(0x00_00, count: 16) output.append(0x00_00, count: 16) - output.append(0x00_ff, count: 16) + output.append(0xff_00, count: 16) } } diff --git a/Sources/LZ77/Inflator/LZ77.InflatorIn.swift b/Sources/LZ77/Inflator/LZ77.InflatorIn.swift index f3b4140c..c33ba537 100644 --- a/Sources/LZ77/Inflator/LZ77.InflatorIn.swift +++ b/Sources/LZ77/Inflator/LZ77.InflatorIn.swift @@ -137,7 +137,13 @@ extension LZ77.InflatorIn } } - /// Returns bits in the low end of the returned integer. + /// Returns bits in the low end of the returned integer. The maximum meaningful bit `count` + /// is 16. + /// + /// The best way to think about the bit order is to imagine the bitstream as a single, + /// arbitrarily-precision integer. This means if you load a slice of the integer into a + /// ``UInt16``, the most-significant bits in the result will correspond to the bits that + /// appear later in the bitstream. /// /// ```text /// { b.15, b.14, b.13, b.12, b.11, b.10, b.9, b.8, b.7, b.6, b.5, b.4, b.3, b.2, b.1, b.0 }