From 10759787a05a031870ca215e2229c607dd12fbc5 Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Thu, 31 Oct 2024 17:57:06 -0700 Subject: [PATCH 1/7] Support Time.new timestamp argument and error messaging closes #3693 --- spec/ruby/core/time/new_spec.rb | 20 +++++ spec/tags/core/time/new_tags.txt | 9 --- src/main/ruby/truffleruby/core/time.rb | 6 +- .../core/truffle/time_operations.rb | 74 +++++++++++++++++++ 4 files changed, 99 insertions(+), 10 deletions(-) diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index d686355270c7..9c95586606a3 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -511,6 +511,16 @@ def zone.local_to_utc(t) Time.new("2021-12-25 00:00:00", in: "-01:00").to_s.should == "2021-12-25 00:00:00 -0100" end + it "returns Time of Jan 1 for string with just year" do + Time.new("2021").should == Time.new(2021, 1, 1) + Time.new("2021").zone.should == Time.new(2021, 1, 1).zone + Time.new("2021").utc_offset.should == Time.new(2021, 1, 1).utc_offset + end + + it "returns Time of Jan 1 for string with just year in timezone specified with in keyword argument" do + Time.new("2021", in: "+17:00").to_s.should == "2021-01-01 00:00:00 +1700" + end + it "converts precision keyword argument into Integer if is not nil" do obj = Object.new def obj.to_int; 3; end @@ -544,6 +554,10 @@ def obj.to_int; 3; end -> { Time.new("2020-12-25 00 +09:00") }.should raise_error(ArgumentError, "missing min part: 00 ") + + -> { + Time.new("2020-12-25") + }.should raise_error(ArgumentError, 'no time information') end it "raises ArgumentError if subsecond is missing after dot" do @@ -641,6 +655,12 @@ def obj.to_int; 3; end Time.new("2021-11-31 00:00:60 +09:00".encode("utf-32le")) }.should raise_error(ArgumentError, "time string should have ASCII compatible encoding") end + + it "raises ArgumentError if string doesn't start with year" do + -> { + Time.new("a\nb") + }.should raise_error(ArgumentError, "can't parse: \"a\\nb\"") + end end end end diff --git a/spec/tags/core/time/new_tags.txt b/spec/tags/core/time/new_tags.txt index d66b21b0d997..04c1c547bdb5 100644 --- a/spec/tags/core/time/new_tags.txt +++ b/spec/tags/core/time/new_tags.txt @@ -20,15 +20,6 @@ fails:Time.new with a timezone argument subject's class implements .find_timezon fails:Time.new with a timezone argument subject's class implements .find_timezone method calls .find_timezone to build a time object if passed zone name as a timezone argument fails:Time.new with a timezone argument subject's class implements .find_timezone method does not call .find_timezone if passed any not string/numeric/timezone timezone argument fails:Time.new with a timezone argument :in keyword argument could be a timezone object -fails:Time.new with a timezone argument Time.new with a String argument parses an ISO-8601 like format fails:Time.new with a timezone argument Time.new with a String argument accepts precision keyword argument and truncates specified digits of sub-second part -fails:Time.new with a timezone argument Time.new with a String argument returns Time in timezone specified in the String argument -fails:Time.new with a timezone argument Time.new with a String argument returns Time in timezone specified in the String argument even if the in keyword argument provided -fails:Time.new with a timezone argument Time.new with a String argument returns Time in timezone specified with in keyword argument if timezone isn't provided in the String argument fails:Time.new with a timezone argument Time.new with a String argument converts precision keyword argument into Integer if is not nil fails:Time.new with a timezone argument Time.new with a String argument raise TypeError is can't convert precision keyword argument into Integer -fails:Time.new with a timezone argument Time.new with a String argument raises ArgumentError if part of time string is missing -fails:Time.new with a timezone argument Time.new with a String argument raises ArgumentError if subsecond is missing after dot -fails:Time.new with a timezone argument Time.new with a String argument raises ArgumentError if String argument is not in the supported format -fails:Time.new with a timezone argument Time.new with a String argument raises ArgumentError if date/time parts values are not valid -fails:Time.new with a timezone argument Time.new with a String argument raises ArgumentError if string has not ascii-compatible encoding diff --git a/src/main/ruby/truffleruby/core/time.rb b/src/main/ruby/truffleruby/core/time.rb index cac22a892135..394ec6fcc07d 100644 --- a/src/main/ruby/truffleruby/core/time.rb +++ b/src/main/ruby/truffleruby/core/time.rb @@ -403,15 +403,19 @@ def at(sec, sub_sec = undefined, unit = undefined, **kwargs) time end - def new(year = undefined, month = nil, day = nil, hour = nil, minute = nil, second = nil, utc_offset = nil, **options) + def new(year = undefined, month = undefined, day = nil, hour = nil, minute = nil, second = nil, utc_offset = nil, **options) if utc_offset && options[:in] raise ArgumentError, 'timezone argument given as positional and keyword arguments' end utc_offset ||= options[:in] + month_undefined = Primitive.undefined?(month) + month = nil if month_undefined if Primitive.undefined?(year) utc_offset ? self.now.getlocal(utc_offset) : self.now + elsif Primitive.is_a?(year, String) && month_undefined + Truffle::TimeOperations.new_from_string(self, year, **options) elsif Primitive.nil? utc_offset Truffle::TimeOperations.compose(self, :local, year, month, day, hour, minute, second) elsif utc_offset == :std diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index 6c2a63afc0a8..0657fb159846 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -91,5 +91,79 @@ def self.compose(time_class, utc_offset, p1, p2 = nil, p3 = nil, p4 = nil, p5 = Primitive.time_s_from_array(time_class, sec, min, hour, mday, month, year, nsec, is_dst, is_utc, utc_offset) end + + def self.new_from_string(time_class, str, **options) + require 'strscan' + + raise ArgumentError, 'time string should have ASCII compatible encoding' unless str.encoding.ascii_compatible? + + scanner = StringScanner.new(str) + year = scanner.scan(/\d+/) + raise ArgumentError, "can't parse: #{str.inspect}" if Primitive.nil?(year) + raise ArgumentError, "year must be 4 or more digits: #{year}" if year.length < 4 + + return self.compose(time_class, self.utc_offset_for_compose(options[:in]), year) if scanner.eos? + + month = self.scan_for_two_digits(scanner, 'mon', '-', 1..12) + mday = self.scan_for_two_digits(scanner, 'mday', '-', 1..31) + + # Just focus on the time part now. + scanner.string = scanner.rest + hour = self.scan_for_two_digits(scanner, 'hour', /[ T]/, 0..23, 'no time information', true) + min = self.scan_for_two_digits(scanner, 'min', ':', 0..59, true, true) + sec = self.scan_for_two_digits(scanner, 'sec', ':', 0..59, true) + + if scanner.scan(/\.(\d*)/) + usec = scanner.captures[0] + raise ArgumentError, "subsecond expected after dot: #{scanner.pre_match[1..]}. " if usec == '' + end + + utc_offset = options[:in] + unless scanner.eos? + scanner.skip(/\s+/) + if scanner.match?(/\S+/) + # An offset provided in the string overrides any passed in via `in:`. + utc_offset = scanner.matched + end + end + + self.compose(time_class, self.utc_offset_for_compose(utc_offset), year, month, mday, hour, min, sec, usec) + end + + def self.scan_for_two_digits(scanner, name, separator, range = nil, not_found_msg = nil, check_fraction = false) + digits = if scanner.scan(/#{separator}(\d+)/) + scanner.captures[0] + elsif Primitive.true?(not_found_msg) + raise ArgumentError, "missing #{name} part: #{scanner.string[1...scanner.pos]} " + elsif not_found_msg + raise ArgumentError, not_found_msg + end + + if digits.to_s.size != 2 + after = " after '#{separator}'" if separator == ':' || separator == '-' + raise ArgumentError, "two digits #{name} is expected#{after}: #{ "#{scanner.matched}#{scanner.rest}"[0..10] }" + end + + num = digits.to_i + if range && !range.include?(num) + raise ArgumentError, "#{name} out of range" + end + + if check_fraction && scanner.peek(1) == '.' + raise ArgumentError, "fraction #{name} is not supported: #{ "#{scanner.pre_match}#{scanner.matched}"[1..] }." + end + + num + end + + def self.utc_offset_for_compose(utc_offset) + if Primitive.nil?(utc_offset) + :local + elsif Time.send(:utc_offset_in_utc?, utc_offset) + :utc + else + Truffle::Type.coerce_to_utc_offset(utc_offset) + end + end end end From f9084b94ad36a3ed3a55770be959ade4e000d8d9 Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Fri, 1 Nov 2024 10:07:21 -0700 Subject: [PATCH 2/7] Add fast-path Time.new(string) parsing for well-formed timestamps Co-authored-by: Kevin Menard --- spec/ruby/core/time/new_spec.rb | 2 ++ .../truffleruby/core/truffle/time_operations.rb | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index 9c95586606a3..05df05ef4d60 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -485,6 +485,8 @@ def zone.local_to_utc(t) Time.new("2020-12-25 00:56:17 +0900").should == t Time.new("2020-12-25 00:57:47 +090130").should == t Time.new("2020-12-25T00:56:17+09:00").should == t + + Time.new("2020-12-25T00:56:17.123456+09:00").should == Time.utc(2020, 12, 24, 15, 56, 17, 123456) end it "accepts precision keyword argument and truncates specified digits of sub-second part" do diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index 0657fb159846..4ccb93efdf11 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -93,10 +93,19 @@ def self.compose(time_class, utc_offset, p1, p2 = nil, p3 = nil, p4 = nil, p5 = end def self.new_from_string(time_class, str, **options) - require 'strscan' - raise ArgumentError, 'time string should have ASCII compatible encoding' unless str.encoding.ascii_compatible? + # Fast path for well-formed strings. + # Specify the acceptable range for each component in the regexp so that if + # any value is out of range the match will fail and fall through to the + # scanner code below. + if match = str.match(/\A(\d{4,5})(?:-(0[0-9]|1[012])-(0[0-9]|[12][0-9]|3[01])[ T]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(?:\.(\d+))?\s*(\S+)?)?\z/) + year, month, mday, hour, min, sec, usec, offset = match.captures + return self.compose(time_class, self.utc_offset_for_compose(offset || options[:in]), year, month, mday, hour, min, sec, usec) + end + + require 'strscan' + scanner = StringScanner.new(str) year = scanner.scan(/\d+/) raise ArgumentError, "can't parse: #{str.inspect}" if Primitive.nil?(year) From f5619326b603d562f2a80209344f4a73ee5e420c Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Fri, 1 Nov 2024 12:23:43 -0700 Subject: [PATCH 3/7] Manage string position manually to remove StringScanner --- spec/ruby/core/time/new_spec.rb | 6 ++ .../core/truffle/time_operations.rb | 72 +++++++++++-------- 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index 05df05ef4d60..e95dec1ba2d4 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -663,6 +663,12 @@ def obj.to_int; 3; end Time.new("a\nb") }.should raise_error(ArgumentError, "can't parse: \"a\\nb\"") end + + it "raises ArgumentError if string has extra characters after offset" do + -> { + Time.new("2021-11-31 00:00:59 +09:00 abc") + }.should raise_error(ArgumentError, "can't parse at: abc") + end end end end diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index 4ccb93efdf11..44f5108bf389 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -104,62 +104,74 @@ def self.new_from_string(time_class, str, **options) return self.compose(time_class, self.utc_offset_for_compose(offset || options[:in]), year, month, mday, hour, min, sec, usec) end - require 'strscan' + # raise ArgumentError, "can't parse: #{str.inspect}" - scanner = StringScanner.new(str) - year = scanner.scan(/\d+/) + len = str.length + state = [1, 0] + year = str.match(/^(\d+)/)&.captures&.first raise ArgumentError, "can't parse: #{str.inspect}" if Primitive.nil?(year) raise ArgumentError, "year must be 4 or more digits: #{year}" if year.length < 4 - return self.compose(time_class, self.utc_offset_for_compose(options[:in]), year) if scanner.eos? + return self.compose(time_class, self.utc_offset_for_compose(options[:in]), year) if len == year.size - month = self.scan_for_two_digits(scanner, 'mon', '-', 1..12) - mday = self.scan_for_two_digits(scanner, 'mday', '-', 1..31) + state[1] += year.size + month = self.scan_for_two_digits(state, str, 'mon', '-', 1..12) + mday = self.scan_for_two_digits(state, str, 'mday', '-', 1..31) # Just focus on the time part now. - scanner.string = scanner.rest - hour = self.scan_for_two_digits(scanner, 'hour', /[ T]/, 0..23, 'no time information', true) - min = self.scan_for_two_digits(scanner, 'min', ':', 0..59, true, true) - sec = self.scan_for_two_digits(scanner, 'sec', ':', 0..59, true) - - if scanner.scan(/\.(\d*)/) - usec = scanner.captures[0] - raise ArgumentError, "subsecond expected after dot: #{scanner.pre_match[1..]}. " if usec == '' + state[0] = state[1] + 1 + hour = self.scan_for_two_digits(state, str, 'hour', /[ T]/, 0..23, 'no time information', true) + min = self.scan_for_two_digits(state, str, 'min', ':', 0..59, true, true) + sec = self.scan_for_two_digits(state, str, 'sec', ':', 0..60, true) + + if match = str[state[1]].match(/\.(\d*)/) + usec = match.captures[0] + raise ArgumentError, "subsecond expected after dot: #{str[state[0]..state[1]]} " if usec == '' end utc_offset = options[:in] - unless scanner.eos? - scanner.skip(/\s+/) - if scanner.match?(/\S+/) + unless len == state[1] + if match = str[state[1]..].match(/\s*(\S+)/) # An offset provided in the string overrides any passed in via `in:`. - utc_offset = scanner.matched + utc_offset = match.captures[0] + state[1] += match[0].size end end + raise ArgumentError, "can't parse at:#{str[state[1]..]}" if str.length > state[1] + self.compose(time_class, self.utc_offset_for_compose(utc_offset), year, month, mday, hour, min, sec, usec) end - def self.scan_for_two_digits(scanner, name, separator, range = nil, not_found_msg = nil, check_fraction = false) - digits = if scanner.scan(/#{separator}(\d+)/) - scanner.captures[0] - elsif Primitive.true?(not_found_msg) - raise ArgumentError, "missing #{name} part: #{scanner.string[1...scanner.pos]} " - elsif not_found_msg - raise ArgumentError, not_found_msg - end + def self.scan_for_two_digits(state, str, name, separator, range = nil, not_found_msg = nil, check_fraction = false) + index = state[1] + if str.length > index && str[index].match?(separator) && str[(index + 1)].match?(/\d/) + subindex = index + 1 + while str.length > subindex && str[subindex].match?(/\d/) + subindex += 1 + end + digits = str[(index + 1)...subindex] + elsif Primitive.true?(not_found_msg) + raise ArgumentError, "missing #{name} part: #{str[state[0]..index]}" + elsif not_found_msg + raise ArgumentError, not_found_msg + end - if digits.to_s.size != 2 + if digits.size != 2 after = " after '#{separator}'" if separator == ':' || separator == '-' - raise ArgumentError, "two digits #{name} is expected#{after}: #{ "#{scanner.matched}#{scanner.rest}"[0..10] }" + raise ArgumentError, "two digits #{name} is expected#{after}: #{ str[index..(index + 10)] }" end + # Advance index. + index = state[1] = state[1] + 3 + num = digits.to_i if range && !range.include?(num) raise ArgumentError, "#{name} out of range" end - if check_fraction && scanner.peek(1) == '.' - raise ArgumentError, "fraction #{name} is not supported: #{ "#{scanner.pre_match}#{scanner.matched}"[1..] }." + if check_fraction && str[index] == '.' + raise ArgumentError, "fraction #{name} is not supported: #{ str[state[0]..index] }" end num From b2665f8293488fa33575405120379c5429f855ba Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Fri, 1 Nov 2024 13:50:37 -0700 Subject: [PATCH 4/7] Relax Time.new(string) error message specs; just raise if not well-formed --- CHANGELOG.md | 1 + spec/ruby/core/time/new_spec.rb | 50 ++++++------- .../core/truffle/time_operations.rb | 75 +------------------ 3 files changed, 28 insertions(+), 98 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af3ae69e55be..df5b12c4c604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Compatibility: * Set `RbConfig::CONFIG['archincludedir']` (#3396, @andrykonchin). * Support the index/length arguments for the string argument to `String#bytesplice` added in 3.3 (#3656, @rwstauner). * Implement `rb_str_strlen()` (#3697, @Th3-M4jor). +* Support `Time.new` with String argument and error when invalid (#3693, @rwstauner). Performance: diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index e95dec1ba2d4..9f0c647c3d9c 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -551,105 +551,105 @@ def obj.to_int; 3; end it "raises ArgumentError if part of time string is missing" do -> { Time.new("2020-12-25 00:56 +09:00") - }.should raise_error(ArgumentError, "missing sec part: 00:56 ") + }.should raise_error(ArgumentError, /missing sec part: 00:56 |can't parse:/) -> { Time.new("2020-12-25 00 +09:00") - }.should raise_error(ArgumentError, "missing min part: 00 ") + }.should raise_error(ArgumentError, /missing min part: 00 |can't parse:/) -> { Time.new("2020-12-25") - }.should raise_error(ArgumentError, 'no time information') + }.should raise_error(ArgumentError, /no time information|can't parse:/) end it "raises ArgumentError if subsecond is missing after dot" do -> { Time.new("2020-12-25 00:56:17. +0900") - }.should raise_error(ArgumentError, "subsecond expected after dot: 00:56:17. ") + }.should raise_error(ArgumentError, /subsecond expected after dot: 00:56:17. |can't parse:/) end it "raises ArgumentError if String argument is not in the supported format" do -> { Time.new("021-12-25 00:00:00.123456 +09:00") - }.should raise_error(ArgumentError, "year must be 4 or more digits: 021") + }.should raise_error(ArgumentError, /year must be 4 or more digits: 021|can't parse:/) -> { Time.new("2020-012-25 00:56:17 +0900") - }.should raise_error(ArgumentError, /\Atwo digits mon is expected after [`']-': -012-25 00:\z/) + }.should raise_error(ArgumentError, /\Atwo digits mon is expected after [`']-': -012-25 00:\z|can't parse:/) -> { Time.new("2020-2-25 00:56:17 +0900") - }.should raise_error(ArgumentError, /\Atwo digits mon is expected after [`']-': -2-25 00:56\z/) + }.should raise_error(ArgumentError, /\Atwo digits mon is expected after [`']-': -2-25 00:56\z|can't parse:/) -> { Time.new("2020-12-215 00:56:17 +0900") - }.should raise_error(ArgumentError, /\Atwo digits mday is expected after [`']-': -215 00:56:\z/) + }.should raise_error(ArgumentError, /\Atwo digits mday is expected after [`']-': -215 00:56:\z|can't parse:/) -> { Time.new("2020-12-25 000:56:17 +0900") - }.should raise_error(ArgumentError, "two digits hour is expected: 000:56:17 ") + }.should raise_error(ArgumentError, /two digits hour is expected: 000:56:17 |can't parse:/) -> { Time.new("2020-12-25 0:56:17 +0900") - }.should raise_error(ArgumentError, "two digits hour is expected: 0:56:17 +0") + }.should raise_error(ArgumentError, /two digits hour is expected: 0:56:17 \+0|can't parse:/) -> { Time.new("2020-12-25 00:516:17 +0900") - }.should raise_error(ArgumentError, /\Atwo digits min is expected after [`']:': :516:17 \+09\z/) + }.should raise_error(ArgumentError, /\Atwo digits min is expected after [`']:': :516:17 \+09\z|can't parse:/) -> { Time.new("2020-12-25 00:6:17 +0900") - }.should raise_error(ArgumentError, /\Atwo digits min is expected after [`']:': :6:17 \+0900\z/) + }.should raise_error(ArgumentError, /\Atwo digits min is expected after [`']:': :6:17 \+0900\z|can't parse:/) -> { Time.new("2020-12-25 00:56:137 +0900") - }.should raise_error(ArgumentError, /\Atwo digits sec is expected after [`']:': :137 \+0900\z/) + }.should raise_error(ArgumentError, /\Atwo digits sec is expected after [`']:': :137 \+0900\z|can't parse:/) -> { Time.new("2020-12-25 00:56:7 +0900") - }.should raise_error(ArgumentError, /\Atwo digits sec is expected after [`']:': :7 \+0900\z/) + }.should raise_error(ArgumentError, /\Atwo digits sec is expected after [`']:': :7 \+0900\z|can't parse:/) -> { Time.new("2020-12-25 00:56. +0900") - }.should raise_error(ArgumentError, "fraction min is not supported: 00:56.") + }.should raise_error(ArgumentError, /fraction min is not supported: 00:56\.|can't parse:/) -> { Time.new("2020-12-25 00. +0900") - }.should raise_error(ArgumentError, "fraction hour is not supported: 00.") + }.should raise_error(ArgumentError, /fraction hour is not supported: 00\.|can't parse:/) end it "raises ArgumentError if date/time parts values are not valid" do -> { Time.new("2020-13-25 00:56:17 +09:00") - }.should raise_error(ArgumentError, "mon out of range") + }.should raise_error(ArgumentError, /mon out of range|can't parse:/) -> { Time.new("2020-12-32 00:56:17 +09:00") - }.should raise_error(ArgumentError, "mday out of range") + }.should raise_error(ArgumentError, /mday out of range|can't parse:/) -> { Time.new("2020-12-25 25:56:17 +09:00") - }.should raise_error(ArgumentError, "hour out of range") + }.should raise_error(ArgumentError, /hour out of range|can't parse:/) -> { Time.new("2020-12-25 00:61:17 +09:00") - }.should raise_error(ArgumentError, "min out of range") + }.should raise_error(ArgumentError, /min out of range|can't parse:/) -> { Time.new("2020-12-25 00:56:61 +09:00") - }.should raise_error(ArgumentError, "sec out of range") + }.should raise_error(ArgumentError, /sec out of range|can't parse:/) -> { Time.new("2020-12-25 00:56:17 +23:59:60") - }.should raise_error(ArgumentError, "utc_offset out of range") + }.should raise_error(ArgumentError, /utc_offset out of range|can't parse:/) -> { Time.new("2020-12-25 00:56:17 +24:00") - }.should raise_error(ArgumentError, "utc_offset out of range") + }.should raise_error(ArgumentError, /utc_offset out of range|can't parse:/) -> { Time.new("2020-12-25 00:56:17 +23:61") - }.should raise_error(ArgumentError, '"+HH:MM", "-HH:MM", "UTC" or "A".."I","K".."Z" expected for utc_offset: +23:61') + }.should raise_error(ArgumentError, /#{Regexp.escape('"+HH:MM", "-HH:MM", "UTC" or "A".."I","K".."Z" expected for utc_offset: +23:61')}|can't parse:/) end it "raises ArgumentError if string has not ascii-compatible encoding" do @@ -667,7 +667,7 @@ def obj.to_int; 3; end it "raises ArgumentError if string has extra characters after offset" do -> { Time.new("2021-11-31 00:00:59 +09:00 abc") - }.should raise_error(ArgumentError, "can't parse at: abc") + }.should raise_error(ArgumentError, /can't parse.+ abc/) end end end diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index 44f5108bf389..487fb0a6431f 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -97,84 +97,13 @@ def self.new_from_string(time_class, str, **options) # Fast path for well-formed strings. # Specify the acceptable range for each component in the regexp so that if - # any value is out of range the match will fail and fall through to the - # scanner code below. + # any value is out of range the match will fail and fall through. if match = str.match(/\A(\d{4,5})(?:-(0[0-9]|1[012])-(0[0-9]|[12][0-9]|3[01])[ T]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(?:\.(\d+))?\s*(\S+)?)?\z/) year, month, mday, hour, min, sec, usec, offset = match.captures return self.compose(time_class, self.utc_offset_for_compose(offset || options[:in]), year, month, mday, hour, min, sec, usec) end - # raise ArgumentError, "can't parse: #{str.inspect}" - - len = str.length - state = [1, 0] - year = str.match(/^(\d+)/)&.captures&.first - raise ArgumentError, "can't parse: #{str.inspect}" if Primitive.nil?(year) - raise ArgumentError, "year must be 4 or more digits: #{year}" if year.length < 4 - - return self.compose(time_class, self.utc_offset_for_compose(options[:in]), year) if len == year.size - - state[1] += year.size - month = self.scan_for_two_digits(state, str, 'mon', '-', 1..12) - mday = self.scan_for_two_digits(state, str, 'mday', '-', 1..31) - - # Just focus on the time part now. - state[0] = state[1] + 1 - hour = self.scan_for_two_digits(state, str, 'hour', /[ T]/, 0..23, 'no time information', true) - min = self.scan_for_two_digits(state, str, 'min', ':', 0..59, true, true) - sec = self.scan_for_two_digits(state, str, 'sec', ':', 0..60, true) - - if match = str[state[1]].match(/\.(\d*)/) - usec = match.captures[0] - raise ArgumentError, "subsecond expected after dot: #{str[state[0]..state[1]]} " if usec == '' - end - - utc_offset = options[:in] - unless len == state[1] - if match = str[state[1]..].match(/\s*(\S+)/) - # An offset provided in the string overrides any passed in via `in:`. - utc_offset = match.captures[0] - state[1] += match[0].size - end - end - - raise ArgumentError, "can't parse at:#{str[state[1]..]}" if str.length > state[1] - - self.compose(time_class, self.utc_offset_for_compose(utc_offset), year, month, mday, hour, min, sec, usec) - end - - def self.scan_for_two_digits(state, str, name, separator, range = nil, not_found_msg = nil, check_fraction = false) - index = state[1] - if str.length > index && str[index].match?(separator) && str[(index + 1)].match?(/\d/) - subindex = index + 1 - while str.length > subindex && str[subindex].match?(/\d/) - subindex += 1 - end - digits = str[(index + 1)...subindex] - elsif Primitive.true?(not_found_msg) - raise ArgumentError, "missing #{name} part: #{str[state[0]..index]}" - elsif not_found_msg - raise ArgumentError, not_found_msg - end - - if digits.size != 2 - after = " after '#{separator}'" if separator == ':' || separator == '-' - raise ArgumentError, "two digits #{name} is expected#{after}: #{ str[index..(index + 10)] }" - end - - # Advance index. - index = state[1] = state[1] + 3 - - num = digits.to_i - if range && !range.include?(num) - raise ArgumentError, "#{name} out of range" - end - - if check_fraction && str[index] == '.' - raise ArgumentError, "fraction #{name} is not supported: #{ str[state[0]..index] }" - end - - num + raise ArgumentError, "can't parse: #{str.inspect}" end def self.utc_offset_for_compose(utc_offset) From 4e1847519212094b60b464c73ad7b4ff92928400 Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Mon, 4 Nov 2024 13:38:31 -0700 Subject: [PATCH 5/7] Simplify regexp and out of range specs for Time.new(String) --- spec/ruby/core/time/new_spec.rb | 14 +++++++------- .../truffleruby/core/truffle/time_operations.rb | 4 +--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index 9f0c647c3d9c..370863153644 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -621,31 +621,31 @@ def obj.to_int; 3; end it "raises ArgumentError if date/time parts values are not valid" do -> { Time.new("2020-13-25 00:56:17 +09:00") - }.should raise_error(ArgumentError, /mon out of range|can't parse:/) + }.should raise_error(ArgumentError, /(mon|argument) out of range/) -> { Time.new("2020-12-32 00:56:17 +09:00") - }.should raise_error(ArgumentError, /mday out of range|can't parse:/) + }.should raise_error(ArgumentError, /(mday|argument) out of range/) -> { Time.new("2020-12-25 25:56:17 +09:00") - }.should raise_error(ArgumentError, /hour out of range|can't parse:/) + }.should raise_error(ArgumentError, /(hour|argument) out of range/) -> { Time.new("2020-12-25 00:61:17 +09:00") - }.should raise_error(ArgumentError, /min out of range|can't parse:/) + }.should raise_error(ArgumentError, /(min|argument) out of range/) -> { Time.new("2020-12-25 00:56:61 +09:00") - }.should raise_error(ArgumentError, /sec out of range|can't parse:/) + }.should raise_error(ArgumentError, /(sec|argument) out of range/) -> { Time.new("2020-12-25 00:56:17 +23:59:60") - }.should raise_error(ArgumentError, /utc_offset out of range|can't parse:/) + }.should raise_error(ArgumentError, /(utc_offset|argument) out of range/) -> { Time.new("2020-12-25 00:56:17 +24:00") - }.should raise_error(ArgumentError, /utc_offset out of range|can't parse:/) + }.should raise_error(ArgumentError, /(utc_offset|argument) out of range/) -> { Time.new("2020-12-25 00:56:17 +23:61") diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index 487fb0a6431f..c142e4a8abc3 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -96,9 +96,7 @@ def self.new_from_string(time_class, str, **options) raise ArgumentError, 'time string should have ASCII compatible encoding' unless str.encoding.ascii_compatible? # Fast path for well-formed strings. - # Specify the acceptable range for each component in the regexp so that if - # any value is out of range the match will fail and fall through. - if match = str.match(/\A(\d{4,5})(?:-(0[0-9]|1[012])-(0[0-9]|[12][0-9]|3[01])[ T]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(?:\.(\d+))?\s*(\S+)?)?\z/) + if match = str.match(/\A(\d{4,5})(?:-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?\s*(\S+)?)?\z/) year, month, mday, hour, min, sec, usec, offset = match.captures return self.compose(time_class, self.utc_offset_for_compose(offset || options[:in]), year, month, mday, hour, min, sec, usec) end From fb8c1afb7d610032778291a4186f8199e54690b8 Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Mon, 4 Nov 2024 13:53:02 -0700 Subject: [PATCH 6/7] Use named captures and extended regexp syntax to improve readability --- .../ruby/truffleruby/core/truffle/time_operations.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/ruby/truffleruby/core/truffle/time_operations.rb b/src/main/ruby/truffleruby/core/truffle/time_operations.rb index c142e4a8abc3..47152a79e346 100644 --- a/src/main/ruby/truffleruby/core/truffle/time_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/time_operations.rb @@ -96,8 +96,16 @@ def self.new_from_string(time_class, str, **options) raise ArgumentError, 'time string should have ASCII compatible encoding' unless str.encoding.ascii_compatible? # Fast path for well-formed strings. - if match = str.match(/\A(\d{4,5})(?:-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?\s*(\S+)?)?\z/) - year, month, mday, hour, min, sec, usec, offset = match.captures + if /\A (?\d{4,5}) + (?: + - (?\d{2}) + - (? \d{2}) + [ T] (? \d{2}) + : (? \d{2}) + : (? \d{2}) + (?:\. (? \d+) )? + \s* (?\S+)? + )?\z/x =~ str return self.compose(time_class, self.utc_offset_for_compose(offset || options[:in]), year, month, mday, hour, min, sec, usec) end From 9ec446e10b8c5e97e336af41af7c3a913cce0371 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Tue, 5 Nov 2024 12:58:19 +0100 Subject: [PATCH 7/7] Add version guard for spec failing on 3.2.2 --- spec/ruby/core/time/new_spec.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index 370863153644..66f16adf9135 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -556,10 +556,14 @@ def obj.to_int; 3; end -> { Time.new("2020-12-25 00 +09:00") }.should raise_error(ArgumentError, /missing min part: 00 |can't parse:/) + end - -> { - Time.new("2020-12-25") - }.should raise_error(ArgumentError, /no time information|can't parse:/) + ruby_version_is "3.2.3" do + it "raises ArgumentError if the time part is missing" do + -> { + Time.new("2020-12-25") + }.should raise_error(ArgumentError, /no time information|can't parse:/) + end end it "raises ArgumentError if subsecond is missing after dot" do