From 48f4dde6f6715d0576314e6469b50333b2fd0a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Sun, 31 Dec 2023 02:21:30 +0100 Subject: [PATCH] WIP --- LibYAML/XS.xs | 18 ++++++++++++++++-- LibYAML/etc/perl_libyaml.c | 5 +++-- t/10.basic.t | 16 ++++++++++------ 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/LibYAML/XS.xs b/LibYAML/XS.xs index 0e0a0d5..a9e8ab7 100644 --- a/LibYAML/XS.xs +++ b/LibYAML/XS.xs @@ -124,6 +124,7 @@ emit_string_events(AV *perl_events, HV *options) yaml_emitter_t emitter; SV **val; SV *yaml = newSVpvn("", 0); + int unicode = 1; XCPT_TRY_START { @@ -140,7 +141,14 @@ emit_string_events(AV *perl_events, HV *options) } yaml_emitter_set_output(&emitter, &append_output, (void *) yaml); yaml_emitter_set_canonical(&emitter, 0); - yaml_emitter_set_unicode(&emitter, 0); + yaml_emitter_set_unicode(&emitter, 1); + val = hv_fetch(options, "unicode", 7, TRUE); + if (val && SvOK(*val) && SvTRUE(*val)) { + unicode = 1; + } + else if (val && SvOK(*val) && ! SvTRUE(*val)) { + unicode = 0; + } emit_events(&emitter, perl_events); @@ -155,7 +163,13 @@ emit_string_events(AV *perl_events, HV *options) } if (yaml) { - SvUTF8_off(yaml); + if (unicode) { + SvUTF8_off(yaml); + } + else { + (void)sv_utf8_decode(yaml); + SvUTF8_on(yaml); + } } RETVAL = yaml; diff --git a/LibYAML/etc/perl_libyaml.c b/LibYAML/etc/perl_libyaml.c index 16f201c..9d4a510 100644 --- a/LibYAML/etc/perl_libyaml.c +++ b/LibYAML/etc/perl_libyaml.c @@ -324,7 +324,7 @@ perl_to_libyaml_event(yaml_emitter_t *emitter, HV *perl_event) } if (strEQ(type, "stream_start_event")) { - ok = yaml_stream_start_event_initialize(&event, 0); + ok = yaml_stream_start_event_initialize(&event, YAML_UTF8_ENCODING); } else if (strEQ(type, "stream_end_event")) { ok = yaml_stream_end_event_initialize(&event); @@ -385,9 +385,10 @@ perl_to_libyaml_event(yaml_emitter_t *emitter, HV *perl_event) else { croak("%s\n", "scalar value not defined"); } + ok = yaml_scalar_event_initialize( &event, anchor_name, tag_name, - (unsigned char *) scalar_value, strlen(scalar_value), plain_implicit, quoted_implicit, style); + (unsigned char *) scalar_value, len, plain_implicit, quoted_implicit, style); } else if (strEQ(type, "alias_event")) { val = hv_fetch(perl_event, "value", 5, TRUE); diff --git a/t/10.basic.t b/t/10.basic.t index 6255dea..79ef3d2 100644 --- a/t/10.basic.t +++ b/t/10.basic.t @@ -269,16 +269,20 @@ subtest unicode => sub { my $ev = []; $yaml = "- ö"; YAML::LibYAML::API::parse_string_events($yaml, $ev); + use Devel::Peek; my $value = encode_utf8 $ev->[3]->{value}; cmp_ok($value, 'eq', "ö", "utf8 parse"); $ev->[3]->{value} = decode_utf8 "ä"; - my $dump = YAML::LibYAML::API::emit_string_events($ev); - cmp_ok($dump, '=~', qr{- "\\xE4"}i, "utf8 emit"); - - $ev->[3]->{value} = "\303\274 \303\300"; - $dump = YAML::LibYAML::API::emit_string_events($ev); - cmp_ok($dump, '=~', qr{- "\\xC3\\xBC \\xC3\\xC0"}i, "binary emit"); + Dump $ev->[3]->{value}; + my $dump = YAML::LibYAML::API::emit_string_events($ev, { unicode => 0 }); + Dump $dump; + cmp_ok($dump, '=~', qr{- \xe4}i, "utf8 decoded emit"); + +# $ev->[3]->{value} = "\303\274 \303\300"; + $dump = YAML::LibYAML::API::emit_string_events($ev, { unicode => 1 }); + cmp_ok($dump, '=~', qr{- ä}i, "utf8 emit"); +# cmp_ok($dump, '=~', qr{- "\\xC3\\xBC \\xC3\\xC0"}i, "binary emit"); }; subtest indent => sub {