From b6b9d728b101e16fc495f430d6a3386bda1f8c5d Mon Sep 17 00:00:00 2001 From: Akhil Indurti Date: Wed, 27 Dec 2023 21:15:48 -0800 Subject: [PATCH] add support for unicode 15.0.0 --- .gitignore | 4 +- flake.lock | 61 +++++++++++++++++++++++++++++ flake.nix | 22 +++++++++++ testdata/gendata.py | 3 +- testdata/xid_continue15.0.0.txt.gz | Bin 0 -> 2647 bytes testdata/xid_start15.0.0.txt.gz | Bin 0 -> 2477 bytes unicodeTestVersion_114.go | 1 + unicodeTestVersion_116.go | 3 +- unicodeTestVersion_121.go | 6 +++ 9 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 testdata/xid_continue15.0.0.txt.gz create mode 100644 testdata/xid_start15.0.0.txt.gz create mode 100644 unicodeTestVersion_121.go diff --git a/.gitignore b/.gitignore index 3236373..d252b85 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ !go.sum !*.md !*.go -!*.py \ No newline at end of file +!*.py +!*.lock +!*.nix diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..897df41 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1701680307, + "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1703438236, + "narHash": "sha256-aqVBq1u09yFhL7bj1/xyUeJjzr92fXVvQSSEx6AdB1M=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "5f64a12a728902226210bf01d25ec6cbb9d9265b", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1063b87 --- /dev/null +++ b/flake.nix @@ -0,0 +1,22 @@ +{ + description = "A flake that sets up the devShell"; + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + let supportedSystems = [ "aarch64-linux" "i686-linux" "x86_64-linux" ]; + in flake-utils.lib.eachSystem supportedSystems (system: + let + pkgs = import nixpkgs { + inherit system; + overlays = [ ]; + }; + pp = ps: with ps; [ regex ]; + in { + devShell = pkgs.mkShell { + buildInputs = with pkgs; [ go (python312.withPackages pp) ]; + }; + }); +} diff --git a/testdata/gendata.py b/testdata/gendata.py index 1f07482..7059f24 100644 --- a/testdata/gendata.py +++ b/testdata/gendata.py @@ -1,6 +1,7 @@ # This script generates testdata for particular version of unicode. # Used Python 3.8.7 && regex==2020.2.20 to generate cases for unicode 12.1.0. # Used Python 3.9.1 && regex>2020.2.20 to generate cases for unicode 13.0.0. +# Used Python 3.12.1 && regex==2023.8.8 to generate cases for unicode 15.0.0. import unicodedata import regex @@ -10,7 +11,7 @@ def gendata(category): f = gzip.open(f'{category}{unicodedata.unidata_version}.txt.gz', 'wb') for r in range(sys.maxunicode+1): - matched = bool(regex.match(f'\p{{{category}}}', chr(r))) + matched = bool(regex.match(f'\\p{{{category}}}', chr(r))) if matched: f.write(b'T') else: diff --git a/testdata/xid_continue15.0.0.txt.gz b/testdata/xid_continue15.0.0.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..c353c233cbf3ff1955f6f14e863865f2ae9c694f GIT binary patch literal 2647 zcmeHHdr;F?7EWautXM3w@=zerRRk;#EznQ{iBnK!5n+@TLIXq^0!Bh2Pcb}FtSiWK zXe?oPMH5h49w{pr0)gNP1c(6)kv9Yg;jugd#E=I`ek*pJ+1c5d{kt>0XXe~9ckXxY zcfWhjf#mJqZ>Z15?gLVBQD_X2kc1;7W6!!fpLu6VsY#&qY2(7$V~0UQ)6-@xK(oJ4 zGy*wxXC!5y?^1L`tfBgc{DP;hoVo}kiGxbzo?2L8zElAsb~Z=2n_hr*7_(;%e@Af~ z47sQ_t>L*0BMH=g54(NJK5mS8#$R!hDEsXG?c}HD;ko_?DR1hX%&BXSEs#A!)E6dv-Ar?rI+%Z?y^0EPyfoG2qDsH;Z)Np9V1$lzbqu0wa2*^)LpuX#q3P+08JIYVtHY2-4YIdpFWPGk2Gu6A{LTshI z3TjztDJxz+w-mk3Hw+~>(6G1!?c3_Jhlt3-Z=7vrMtBSoNm&>d<@+VO#Qkw|qCTR# zF5yUhlHf?s&puceil5A5EY^%kkA{n5!V)3|LoJPpyvt-2f6{fjrI(-^3AbzE;-djc zt%ZyKXluBx*4z?ke0&4dV?s#HvG5>LN#3>I{`j%eOCXw2jC%}?2gp2!ey z_mdN`k7~R3Nyx?y^1Wt zhYebK_f{n$>#e&S4NnyRL{Zd)UNhRIf%l-I)!zSCG z0Whl^WCMoA0{z*SB@s=p6u{z+J|)mllWBiL1BXC+qcfugFnAyRmplJjDvnuJv-oeB zf~pgto@#Np@wmda+UsFV7ba&d8H`X5t>ASuM0;|_yagU&pRqgNwyPn9kw1r})DWBQ zl?Bl7DleL6l`eSI$HX}ZuX6=-t8MemW`e-y8)oYNIMuodgHS)J=X~^U4x0vR&g>LB zeoTtsu$-(A{&MpbM(?@2<=>tM!DudD(2TAkfiB{0cx-qY%SJ3zct-SBaICXZ;c4#o zEawU1EI(Q=nUoVAgODFslt%A7`#Zo7=kLT`v2SN>kP*<#7QyV&qJ()}EIQ}5i0*#T zER7s>P*z<(dq!QnC2L$VQ^E;lbQ>knmtGLwRLg$(iVUB=^&o*GTChl)NeY~5)YWHL zW_m&%LSD|SGB0s6M@#e5vHHq4_gMpVq`Q#34*Oi&ZTD8|fOYX_LKl+~#>gW{meP2R zyrLORo+B^F`_#uRe&X6Y!Ye*i^|;ZS!A-$c6GhM+ZsiTY;RSJEGuKy%EYDY1(--eN zj^8NJpkhVY{Xt(BAiiCdz#7=>fhuXsgn9?Bm4io4)-ETcn9JveR2kmGU$HB?FLq70 z7S1HnlLQ{FTGfP}eLKAR3=`%BSU7FI*d_8%K##T|G@btz0&MA()aF3-iX`bQBi$GJ z<2RdiiY$p3f7G`p`^_*tXgk)U(X*5!7D7{*B}%d*y#E^A1;R-}fPT5lQ9RWmeKqwr2)MkY!rno5G!rX^yl?P}Z2 zypT~QCaV>ulqHsim}Vwk&{#DuU`m7rAKz+Ivokxh|L$KsXXc#q%zMr|=ltFm&YU-I zmM$Yq4~!2#M2MjtjHcQ<*w{@EYCIJp@17rB=xhpU?gUKU=|~>?h~yVPu3l}oc1(mr z$am_=13QgbEw{=C8!awrFGW8l6(AF@aqW?F7St*I$-XBgKY!BEVK(bfy6F8rEM`BU zCjUMz+`5{BOTeQV@ihe>3@P=nD|;snw`tq-itf2G6Hb@Krc^J9Ai&P~QED6XJuU-- zv>lcv^8MsicgNv<+q2DmNfYC2l~2QPY(JkXD0%L*IV+^iIZU`qEUEN`3c2Ql=W*I6 zVCOl$-_fPz!^rG{NvBrE5FJnO%hh#E-fLm>p$f|}PnGdXyFKwYBykDeNa(5yTFZt8 z@Xy$8D@3@vmOUi!+Y=M%i8EsDACYRVug!aBZPXlxow^xP>_q1|-f+?7>m5=!=g7Rp z;~x@DaOm3^uWz-s>{PAPzvf(n9bqTp-0KTd{EN(3p=9Z-Ntu2M2~#xN=php+JFmqFMrR+SAL3Y6TeX2MZrcZnp;B2F~V#& z`F)-`arfBHGX;ia8}a6hRuPZdLeqxCS~WK}czBy$$Q^v!(DXmPXbT~1kW*+G^u;Dq z`l-(!ApFrWM-OiVNs9r%QP=xa{Rqy`(R&W3dtaT)Pt2;J!A9~XY1?m7#?WoOLKzQl zu`~U47{Mzw2PMT>8KPGusPsJ>6_lQv$acYv16L8rH1i;Am!iva+)A^bW< zb-+XZ$8d?~N6KQ?APN8^MHy z6WW5+6@-9|2YX;oe^6Pf$#Io-@c}P7%oGfy0dL7k=_LNrZ&XmnEw!t#rQnM|7qt}G zR6D6wz39bB1*z7^&IQ8SsXNn=Wq-x#rr>rzSEs&CK3<{x7bY% zpSG(B^s|LWppgfYoUGDR=>tAM{lk>?U9$z-TC*~};k~D5(%cYGny@$85CR#24jSzj z@sfq}AEug37f+{5v{-ceYgLaecgG)b8CUj+X`F?EYOQTG!y6h%b7MYb3}d2Rfxwye zsceaU=cE?@`AV;238=4*>b7zWjO_o~q3`F=&+`|$hlB`xPhMOX$sQYB(-cV8SlUC9 zX(EHBl~G>sKNblynm%W^-feFU0JT#nPo*aG`=|`Dz3P51o@hE}GrwTD{E%~B%(K@o zGs~A&VTW>S_D0Nw1PcYSWgaC1Q?F}>i{$nk*blmmBbnHMk!w*x2)|)x@$mdG?7YdWmY2UP$I5F=!XfPAoP*OaTJJ|W| b>>2w_-w1po@V^k~5-lDg?(&2{j3IvmBJK}& literal 0 HcmV?d00001 diff --git a/unicodeTestVersion_114.go b/unicodeTestVersion_114.go index b28670f..1060aeb 100644 --- a/unicodeTestVersion_114.go +++ b/unicodeTestVersion_114.go @@ -1,3 +1,4 @@ +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package xid diff --git a/unicodeTestVersion_116.go b/unicodeTestVersion_116.go index 7372553..fc77c35 100644 --- a/unicodeTestVersion_116.go +++ b/unicodeTestVersion_116.go @@ -1,4 +1,5 @@ -// +build go1.16 +//go:build go1.16 && !go1.21 +// +build go1.16,!go1.21 package xid diff --git a/unicodeTestVersion_121.go b/unicodeTestVersion_121.go new file mode 100644 index 0000000..bbe777f --- /dev/null +++ b/unicodeTestVersion_121.go @@ -0,0 +1,6 @@ +//go:build go1.21 +// +build go1.21 + +package xid + +const unicodeTestVersion = "15.0.0"