New Upstream Release - ruby-js-regex

Ready changes

Summary

Merged new upstream version: 3.11.0 (was: 3.8.0).

Diff

diff --git a/debian/changelog b/debian/changelog
index 9658e6d..f846d4f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+ruby-js-regex (3.11.0-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Thu, 17 Aug 2023 20:42:40 -0000
+
 ruby-js-regex (3.8.0-1) unstable; urgency=medium
 
   * New upstream version 3.8.0
diff --git a/js_regex.gemspec b/js_regex.gemspec
index 2783d14..5c4e243 100644
--- a/js_regex.gemspec
+++ b/js_regex.gemspec
@@ -2,16 +2,16 @@
 # This file has been automatically generated by gem2tgz #
 #########################################################
 # -*- encoding: utf-8 -*-
-# stub: js_regex 3.8.0 ruby lib
+# stub: js_regex 3.11.0 ruby lib
 
 Gem::Specification.new do |s|
   s.name = "js_regex".freeze
-  s.version = "3.8.0"
+  s.version = "3.11.0"
 
   s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
   s.require_paths = ["lib".freeze]
   s.authors = ["Janosch M\u00FCller".freeze]
-  s.date = "2022-09-25"
+  s.date = "2023-03-23"
   s.description = "JsRegex converts Ruby's native regular expressions for JavaScript, taking care of various incompatibilities and returning warnings for unsolvable differences.".freeze
   s.email = ["janosch84@gmail.com".freeze]
   s.files = ["lib/js_regex.rb".freeze, "lib/js_regex/conversion.rb".freeze, "lib/js_regex/converter.rb".freeze, "lib/js_regex/converter/anchor_converter.rb".freeze, "lib/js_regex/converter/assertion_converter.rb".freeze, "lib/js_regex/converter/backreference_converter.rb".freeze, "lib/js_regex/converter/base.rb".freeze, "lib/js_regex/converter/conditional_converter.rb".freeze, "lib/js_regex/converter/context.rb".freeze, "lib/js_regex/converter/escape_converter.rb".freeze, "lib/js_regex/converter/freespace_converter.rb".freeze, "lib/js_regex/converter/group_converter.rb".freeze, "lib/js_regex/converter/keep_converter.rb".freeze, "lib/js_regex/converter/literal_converter.rb".freeze, "lib/js_regex/converter/meta_converter.rb".freeze, "lib/js_regex/converter/property_converter.rb".freeze, "lib/js_regex/converter/property_map.csv".freeze, "lib/js_regex/converter/set_converter.rb".freeze, "lib/js_regex/converter/subexpression_converter.rb".freeze, "lib/js_regex/converter/type_converter.rb".freeze, "lib/js_regex/converter/unsupported_token_converter.rb".freeze, "lib/js_regex/error.rb".freeze, "lib/js_regex/node.rb".freeze, "lib/js_regex/second_pass.rb".freeze, "lib/js_regex/target.rb".freeze, "lib/js_regex/version.rb".freeze]
@@ -27,11 +27,11 @@ Gem::Specification.new do |s|
 
   if s.respond_to? :add_runtime_dependency then
     s.add_runtime_dependency(%q<character_set>.freeze, ["~> 1.4"])
-    s.add_runtime_dependency(%q<regexp_parser>.freeze, ["~> 2.5"])
+    s.add_runtime_dependency(%q<regexp_parser>.freeze, [">= 2.6.2", "< 3.0.0"])
     s.add_runtime_dependency(%q<regexp_property_values>.freeze, ["~> 1.0"])
   else
     s.add_dependency(%q<character_set>.freeze, ["~> 1.4"])
-    s.add_dependency(%q<regexp_parser>.freeze, ["~> 2.5"])
+    s.add_dependency(%q<regexp_parser>.freeze, [">= 2.6.2", "< 3.0.0"])
     s.add_dependency(%q<regexp_property_values>.freeze, ["~> 1.0"])
   end
 end
diff --git a/lib/js_regex.rb b/lib/js_regex.rb
index a889f59..d6f16aa 100644
--- a/lib/js_regex.rb
+++ b/lib/js_regex.rb
@@ -30,14 +30,17 @@ class JsRegex
     "/#{source.empty? ? '(?:)' : source}/#{options}"
   end
 
+  # @raise JsRegex::ConversionError
   def self.new!(ruby_regex, **kwargs)
-    js_regex = new(ruby_regex, **kwargs)
-    if js_regex.warnings.any?
-      raise StandardError.new(
-        "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
-        js_regex.warnings.join("\n")
-      ).extend(JsRegex::Error)
-    end
-    js_regex
+    new(ruby_regex, fail_fast: true, **kwargs)
   end
+
+  def self.compatible?(ruby_regex, **kwargs)
+    new!(ruby_regex, **kwargs)
+    true
+  rescue ConversionError
+    false
+  end
+
+  ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
 end
diff --git a/lib/js_regex/conversion.rb b/lib/js_regex/conversion.rb
index 3e94479..7ee3b6d 100644
--- a/lib/js_regex/conversion.rb
+++ b/lib/js_regex/conversion.rb
@@ -13,20 +13,21 @@ class JsRegex
     require_relative 'target'
 
     class << self
-      def of(input, options: nil, target: Target::ES2009)
+      def of(input, options: nil, target: Target::ES2009, fail_fast: false)
         target                       = Target.cast(target)
-        source, warnings, extra_opts = convert_source(input, target)
+        source, warnings, extra_opts = convert_source(input, target, fail_fast)
         options_string               = convert_options(input, options, extra_opts)
         [source, options_string, warnings, target]
       end
 
       private
 
-      def convert_source(input, target)
+      def convert_source(input, target, fail_fast)
         tree = Regexp::Parser.parse(input)
         context = Converter::Context.new(
           case_insensitive_root: tree.i?,
           target:                target,
+          fail_fast:             fail_fast,
         )
         converted_tree = Converter.convert(tree, context)
         final_tree = SecondPass.call(converted_tree)
diff --git a/lib/js_regex/converter/backreference_converter.rb b/lib/js_regex/converter/backreference_converter.rb
index d0f925c..30d8d8d 100644
--- a/lib/js_regex/converter/backreference_converter.rb
+++ b/lib/js_regex/converter/backreference_converter.rb
@@ -41,14 +41,19 @@ class JsRegex
       end
 
       def convert_call
-        if expression.respond_to?(:number) && expression.number.equal?(0)
-          return warn_of_unsupported_feature('whole-pattern recursion')
+        if context.recursions(expression) >= 5
+          warn_of("Recursion for '#{expression}' curtailed at 5 levels")
+          return ''
         end
+
+        context.count_recursion(expression)
         context.increment_local_capturing_group_count
         target_copy = expression.referenced_expression.unquantified_clone
         # avoid "Duplicate capture group name" error in JS
         target_copy.token = :capture if target_copy.is?(:named, :group)
-        convert_expression(target_copy)
+        result = convert_expression(target_copy)
+        # wrap in group if it is a full-pattern recursion
+        expression.reference == 0 ? Node.new('(?:', result, ')') : result
       end
     end
   end
diff --git a/lib/js_regex/converter/base.rb b/lib/js_regex/converter/base.rb
index aaa4521..f3e890e 100644
--- a/lib/js_regex/converter/base.rb
+++ b/lib/js_regex/converter/base.rb
@@ -63,7 +63,11 @@ class JsRegex
       end
 
       def warn_of(text)
-        context.warnings << text
+        if context.fail_fast
+          raise ConversionError, text.sub(/^Dropped /, '')
+        else
+          context.warnings << text
+        end
       end
 
       def drop
diff --git a/lib/js_regex/converter/context.rb b/lib/js_regex/converter/context.rb
index 946fb99..167cc9d 100644
--- a/lib/js_regex/converter/context.rb
+++ b/lib/js_regex/converter/context.rb
@@ -8,14 +8,17 @@ class JsRegex
     class Context
       attr_reader :capturing_group_count,
                   :case_insensitive_root,
+                  :fail_fast,
                   :in_atomic_group,
                   :warnings
 
-      def initialize(case_insensitive_root: false, target: nil)
+      def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
         self.added_capturing_groups_after_group = Hash.new(0)
         self.capturing_group_count = 0
-        self.warnings = []
+        self.fail_fast = fail_fast
+        self.recursions_per_expression = {}
         self.required_options_hash = {}
+        self.warnings = []
 
         self.case_insensitive_root = case_insensitive_root
         self.target = target
@@ -39,6 +42,10 @@ class JsRegex
         required_options_hash['u'] = true
       end
 
+      def u?
+        required_options_hash['u']
+      end
+
       def required_options
         required_options_hash.keys
       end
@@ -62,6 +69,18 @@ class JsRegex
         capture_group
       end
 
+      def recursions(exp)
+        recursions_per_expression[recursion_id(exp)] || 0
+      end
+
+      def count_recursion(exp)
+        recursions_per_expression[recursion_id(exp)] = recursions(exp) + 1
+      end
+
+      def recursion_id(exp)
+        [exp.class, exp.starts_at]
+      end
+
       # takes and returns 1-indexed group positions.
       # new is different from old if capturing groups were added in between.
       def new_capturing_group_position(old_position)
@@ -79,11 +98,13 @@ class JsRegex
       private
 
       attr_accessor :added_capturing_groups_after_group,
+                    :recursions_per_expression,
                     :required_options_hash,
                     :target
 
       attr_writer :capturing_group_count,
                   :case_insensitive_root,
+                  :fail_fast,
                   :in_atomic_group,
                   :warnings
 
diff --git a/lib/js_regex/converter/literal_converter.rb b/lib/js_regex/converter/literal_converter.rb
index 9a0df0e..9ef1386 100644
--- a/lib/js_regex/converter/literal_converter.rb
+++ b/lib/js_regex/converter/literal_converter.rb
@@ -6,11 +6,12 @@ class JsRegex
     # Template class implementation.
     #
     class LiteralConverter < JsRegex::Converter::Base
-      class << self
-        ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
+      ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
+      LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
 
+      class << self
         def convert_data(data, context)
-          if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
+          if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
             if context.enable_u_option
               escape_incompatible_bmp_literals(data)
             else
@@ -23,7 +24,7 @@ class JsRegex
 
         def convert_astral_data(data)
           data.each_char.each_with_object(Node.new) do |char, node|
-            if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN
+            if char.ord > 0xFFFF
               node << surrogate_substitution_for(char)
             else
               node << escape_incompatible_bmp_literals(char)
@@ -31,8 +32,12 @@ class JsRegex
           end
         end
 
+        ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
+          .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
+          .merge('/' => '\\/')
+
         def escape_incompatible_bmp_literals(data)
-          data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) }
+          data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
         end
 
         private
diff --git a/lib/js_regex/converter/set_converter.rb b/lib/js_regex/converter/set_converter.rb
index bf50a16..da68385 100644
--- a/lib/js_regex/converter/set_converter.rb
+++ b/lib/js_regex/converter/set_converter.rb
@@ -17,54 +17,73 @@ class JsRegex
       private
 
       def convert_data
-        return pass_through_with_escaping if directly_compatible?
+        simple_conversion || full_recalculation
+      end
 
-        content = CharacterSet.of_expression(expression)
-        if expression.case_insensitive? && !context.case_insensitive_root
-          content = content.case_insensitive
-        elsif !expression.case_insensitive? && context.case_insensitive_root
-          warn_of_unsupported_feature('nested case-sensitive set')
-        end
+      def simple_conversion
+        return false if casefolding_needed?
 
-        if context.es_2015_or_higher?
-          context.enable_u_option if content.astral_part?
-          content.to_s(format: 'es6', in_brackets: true)
-        else
-          content.to_s_with_surrogate_ranges
+        result = "[#{'^' if expression.negative?}".dup
+
+        expression.expressions.each do |subexp|
+          return false unless (child_res = simple_convert_child(subexp))
+
+          result << child_res.to_s
         end
-      end
 
-      def directly_compatible?
-        all_children_directly_compatible? && !casefolding_needed?
+        result << ']'
       end
 
-      def all_children_directly_compatible?
-        # note that #each_expression is recursive
-        expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
+      def casefolding_needed?
+        expression.case_insensitive? ^ context.case_insensitive_root
       end
 
-      def child_directly_compatible?(exp)
+      def simple_convert_child(exp)
         case exp.type
         when :literal
-          # surrogate pair substitution needed on ES2009 if astral
-          exp.text.ord <= 0xFFFF || context.enable_u_option
+          return false if !context.u? &&
+            exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
+            !context.enable_u_option
+
+          LiteralConverter.escape_incompatible_bmp_literals(exp.text)
         when :set
-          # conversion needed for nested sets, intersections
-          exp.token.equal?(:range)
+          # full conversion is needed for nested sets and intersections
+          exp.token.equal?(:range) && exp.expressions.map do |op|
+            simple_convert_child(op) or return false
+          end.join('-')
         when :type
-          TypeConverter.directly_compatible?(exp)
+          TypeConverter.directly_compatible?(exp, context) &&
+            exp.text
         when :escape
-          EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
+          return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text)
+
+          case exp.token
+          when *CONVERTIBLE_ESCAPE_TOKENS
+            EscapeConverter.new.convert(exp, context)
+          when :literal
+            exp.char.ord <= 0xFFFF &&
+              LiteralConverter.escape_incompatible_bmp_literals(exp.char)
+          end
         end
       end
 
-      def casefolding_needed?
-        expression.case_insensitive? ^ context.case_insensitive_root
-      end
+      SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
+      CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
+        EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
 
-      def pass_through_with_escaping
-        string = expression.to_s(:base)
-        LiteralConverter.escape_incompatible_bmp_literals(string)
+      def full_recalculation
+        content = CharacterSet.of_expression(expression)
+        if expression.case_insensitive? && !context.case_insensitive_root
+          content = content.case_insensitive
+        elsif !expression.case_insensitive? && context.case_insensitive_root
+          warn_of_unsupported_feature('nested case-sensitive set')
+        end
+        if context.es_2015_or_higher?
+          context.enable_u_option if content.astral_part?
+          content.to_s(format: 'es6', in_brackets: true)
+        else
+          content.to_s_with_surrogate_ranges
+        end
       end
     end
   end
diff --git a/lib/js_regex/converter/type_converter.rb b/lib/js_regex/converter/type_converter.rb
index 0cd4837..037095e 100644
--- a/lib/js_regex/converter/type_converter.rb
+++ b/lib/js_regex/converter/type_converter.rb
@@ -6,13 +6,14 @@ class JsRegex
     # Template class implementation.
     #
     class TypeConverter < JsRegex::Converter::Base
-      HEX_EXPANSION           = '[0-9A-Fa-f]'
-      NONHEX_EXPANSION        = '[^0-9A-Fa-f]'
-      ES2018_HEX_EXPANSION    = '\p{AHex}'
-      ES2018_NONHEX_EXPANSION = '\P{AHex}'
-      LINEBREAK_EXPANSION     = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
+      HEX_EXPANSION              = '[0-9A-Fa-f]'
+      NONHEX_EXPANSION           = '[^0-9A-Fa-f]'
+      ES2018_HEX_EXPANSION       = '\p{AHex}'
+      ES2018_NONHEX_EXPANSION    = '\P{AHex}'
+      ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
+      LINEBREAK_EXPANSION        = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
 
-      def self.directly_compatible?(expression)
+      def self.directly_compatible?(expression, _context = nil)
         case expression.token
         when :space, :nonspace
           !expression.ascii_classes?
@@ -28,6 +29,7 @@ class JsRegex
         when :hex then hex_expansion
         when :nonhex then nonhex_expansion
         when :linebreak then LINEBREAK_EXPANSION
+        when :xgrapheme then xgrapheme
         when :digit, :space, :word
           return pass_through if self.class.directly_compatible?(expression)
           set_substitution
@@ -68,6 +70,14 @@ class JsRegex
       def character_set
         CharacterSet.of_expression(expression)
       end
+
+      def xgrapheme
+        if context.es_2018_or_higher? && context.enable_u_option
+          ES2018_XGRAPHEME_EXPANSION
+        else
+          warn_of_unsupported_feature
+        end
+      end
     end
   end
 end
diff --git a/lib/js_regex/target.rb b/lib/js_regex/target.rb
index 648535a..e2a1a03 100644
--- a/lib/js_regex/target.rb
+++ b/lib/js_regex/target.rb
@@ -8,7 +8,7 @@ class JsRegex
     def self.cast(arg)
       return ES2009 if arg.nil?
 
-      normalized_arg = arg.to_s.upcase
+      normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES')
       return normalized_arg if SUPPORTED.include?(normalized_arg)
 
       raise ArgumentError.new(
diff --git a/lib/js_regex/version.rb b/lib/js_regex/version.rb
index 68ffdad..68a4bef 100644
--- a/lib/js_regex/version.rb
+++ b/lib/js_regex/version.rb
@@ -1,3 +1,3 @@
 class JsRegex
-  VERSION = '3.8.0'
+  VERSION = '3.11.0'
 end

More details

Full run details

Historical runs