New Upstream Release - ruby-js-regex
Ready changes
Summary
Merged new upstream version: 3.11.0 (was: 3.8.0).
Diff
diff --git a/debian/changelog b/debian/changelog
index 9658e6d..f846d4f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+ruby-js-regex (3.11.0-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk> Thu, 17 Aug 2023 20:42:40 -0000
+
ruby-js-regex (3.8.0-1) unstable; urgency=medium
* New upstream version 3.8.0
diff --git a/js_regex.gemspec b/js_regex.gemspec
index 2783d14..5c4e243 100644
--- a/js_regex.gemspec
+++ b/js_regex.gemspec
@@ -2,16 +2,16 @@
# This file has been automatically generated by gem2tgz #
#########################################################
# -*- encoding: utf-8 -*-
-# stub: js_regex 3.8.0 ruby lib
+# stub: js_regex 3.11.0 ruby lib
Gem::Specification.new do |s|
s.name = "js_regex".freeze
- s.version = "3.8.0"
+ s.version = "3.11.0"
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
s.require_paths = ["lib".freeze]
s.authors = ["Janosch M\u00FCller".freeze]
- s.date = "2022-09-25"
+ s.date = "2023-03-23"
s.description = "JsRegex converts Ruby's native regular expressions for JavaScript, taking care of various incompatibilities and returning warnings for unsolvable differences.".freeze
s.email = ["janosch84@gmail.com".freeze]
s.files = ["lib/js_regex.rb".freeze, "lib/js_regex/conversion.rb".freeze, "lib/js_regex/converter.rb".freeze, "lib/js_regex/converter/anchor_converter.rb".freeze, "lib/js_regex/converter/assertion_converter.rb".freeze, "lib/js_regex/converter/backreference_converter.rb".freeze, "lib/js_regex/converter/base.rb".freeze, "lib/js_regex/converter/conditional_converter.rb".freeze, "lib/js_regex/converter/context.rb".freeze, "lib/js_regex/converter/escape_converter.rb".freeze, "lib/js_regex/converter/freespace_converter.rb".freeze, "lib/js_regex/converter/group_converter.rb".freeze, "lib/js_regex/converter/keep_converter.rb".freeze, "lib/js_regex/converter/literal_converter.rb".freeze, "lib/js_regex/converter/meta_converter.rb".freeze, "lib/js_regex/converter/property_converter.rb".freeze, "lib/js_regex/converter/property_map.csv".freeze, "lib/js_regex/converter/set_converter.rb".freeze, "lib/js_regex/converter/subexpression_converter.rb".freeze, "lib/js_regex/converter/type_converter.rb".freeze, "lib/js_regex/converter/unsupported_token_converter.rb".freeze, "lib/js_regex/error.rb".freeze, "lib/js_regex/node.rb".freeze, "lib/js_regex/second_pass.rb".freeze, "lib/js_regex/target.rb".freeze, "lib/js_regex/version.rb".freeze]
@@ -27,11 +27,11 @@ Gem::Specification.new do |s|
if s.respond_to? :add_runtime_dependency then
s.add_runtime_dependency(%q<character_set>.freeze, ["~> 1.4"])
- s.add_runtime_dependency(%q<regexp_parser>.freeze, ["~> 2.5"])
+ s.add_runtime_dependency(%q<regexp_parser>.freeze, [">= 2.6.2", "< 3.0.0"])
s.add_runtime_dependency(%q<regexp_property_values>.freeze, ["~> 1.0"])
else
s.add_dependency(%q<character_set>.freeze, ["~> 1.4"])
- s.add_dependency(%q<regexp_parser>.freeze, ["~> 2.5"])
+ s.add_dependency(%q<regexp_parser>.freeze, [">= 2.6.2", "< 3.0.0"])
s.add_dependency(%q<regexp_property_values>.freeze, ["~> 1.0"])
end
end
diff --git a/lib/js_regex.rb b/lib/js_regex.rb
index a889f59..d6f16aa 100644
--- a/lib/js_regex.rb
+++ b/lib/js_regex.rb
@@ -30,14 +30,17 @@ class JsRegex
"/#{source.empty? ? '(?:)' : source}/#{options}"
end
+ # @raise JsRegex::ConversionError
def self.new!(ruby_regex, **kwargs)
- js_regex = new(ruby_regex, **kwargs)
- if js_regex.warnings.any?
- raise StandardError.new(
- "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
- js_regex.warnings.join("\n")
- ).extend(JsRegex::Error)
- end
- js_regex
+ new(ruby_regex, fail_fast: true, **kwargs)
end
+
+ def self.compatible?(ruby_regex, **kwargs)
+ new!(ruby_regex, **kwargs)
+ true
+ rescue ConversionError
+ false
+ end
+
+ ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
end
diff --git a/lib/js_regex/conversion.rb b/lib/js_regex/conversion.rb
index 3e94479..7ee3b6d 100644
--- a/lib/js_regex/conversion.rb
+++ b/lib/js_regex/conversion.rb
@@ -13,20 +13,21 @@ class JsRegex
require_relative 'target'
class << self
- def of(input, options: nil, target: Target::ES2009)
+ def of(input, options: nil, target: Target::ES2009, fail_fast: false)
target = Target.cast(target)
- source, warnings, extra_opts = convert_source(input, target)
+ source, warnings, extra_opts = convert_source(input, target, fail_fast)
options_string = convert_options(input, options, extra_opts)
[source, options_string, warnings, target]
end
private
- def convert_source(input, target)
+ def convert_source(input, target, fail_fast)
tree = Regexp::Parser.parse(input)
context = Converter::Context.new(
case_insensitive_root: tree.i?,
target: target,
+ fail_fast: fail_fast,
)
converted_tree = Converter.convert(tree, context)
final_tree = SecondPass.call(converted_tree)
diff --git a/lib/js_regex/converter/backreference_converter.rb b/lib/js_regex/converter/backreference_converter.rb
index d0f925c..30d8d8d 100644
--- a/lib/js_regex/converter/backreference_converter.rb
+++ b/lib/js_regex/converter/backreference_converter.rb
@@ -41,14 +41,19 @@ class JsRegex
end
def convert_call
- if expression.respond_to?(:number) && expression.number.equal?(0)
- return warn_of_unsupported_feature('whole-pattern recursion')
+ if context.recursions(expression) >= 5
+ warn_of("Recursion for '#{expression}' curtailed at 5 levels")
+ return ''
end
+
+ context.count_recursion(expression)
context.increment_local_capturing_group_count
target_copy = expression.referenced_expression.unquantified_clone
# avoid "Duplicate capture group name" error in JS
target_copy.token = :capture if target_copy.is?(:named, :group)
- convert_expression(target_copy)
+ result = convert_expression(target_copy)
+ # wrap in group if it is a full-pattern recursion
+ expression.reference == 0 ? Node.new('(?:', result, ')') : result
end
end
end
diff --git a/lib/js_regex/converter/base.rb b/lib/js_regex/converter/base.rb
index aaa4521..f3e890e 100644
--- a/lib/js_regex/converter/base.rb
+++ b/lib/js_regex/converter/base.rb
@@ -63,7 +63,11 @@ class JsRegex
end
def warn_of(text)
- context.warnings << text
+ if context.fail_fast
+ raise ConversionError, text.sub(/^Dropped /, '')
+ else
+ context.warnings << text
+ end
end
def drop
diff --git a/lib/js_regex/converter/context.rb b/lib/js_regex/converter/context.rb
index 946fb99..167cc9d 100644
--- a/lib/js_regex/converter/context.rb
+++ b/lib/js_regex/converter/context.rb
@@ -8,14 +8,17 @@ class JsRegex
class Context
attr_reader :capturing_group_count,
:case_insensitive_root,
+ :fail_fast,
:in_atomic_group,
:warnings
- def initialize(case_insensitive_root: false, target: nil)
+ def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
self.added_capturing_groups_after_group = Hash.new(0)
self.capturing_group_count = 0
- self.warnings = []
+ self.fail_fast = fail_fast
+ self.recursions_per_expression = {}
self.required_options_hash = {}
+ self.warnings = []
self.case_insensitive_root = case_insensitive_root
self.target = target
@@ -39,6 +42,10 @@ class JsRegex
required_options_hash['u'] = true
end
+ def u?
+ required_options_hash['u']
+ end
+
def required_options
required_options_hash.keys
end
@@ -62,6 +69,18 @@ class JsRegex
capture_group
end
+ def recursions(exp)
+ recursions_per_expression[recursion_id(exp)] || 0
+ end
+
+ def count_recursion(exp)
+ recursions_per_expression[recursion_id(exp)] = recursions(exp) + 1
+ end
+
+ def recursion_id(exp)
+ [exp.class, exp.starts_at]
+ end
+
# takes and returns 1-indexed group positions.
# new is different from old if capturing groups were added in between.
def new_capturing_group_position(old_position)
@@ -79,11 +98,13 @@ class JsRegex
private
attr_accessor :added_capturing_groups_after_group,
+ :recursions_per_expression,
:required_options_hash,
:target
attr_writer :capturing_group_count,
:case_insensitive_root,
+ :fail_fast,
:in_atomic_group,
:warnings
diff --git a/lib/js_regex/converter/literal_converter.rb b/lib/js_regex/converter/literal_converter.rb
index 9a0df0e..9ef1386 100644
--- a/lib/js_regex/converter/literal_converter.rb
+++ b/lib/js_regex/converter/literal_converter.rb
@@ -6,11 +6,12 @@ class JsRegex
# Template class implementation.
#
class LiteralConverter < JsRegex::Converter::Base
- class << self
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
+ LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
+ class << self
def convert_data(data, context)
- if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
+ if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
if context.enable_u_option
escape_incompatible_bmp_literals(data)
else
@@ -23,7 +24,7 @@ class JsRegex
def convert_astral_data(data)
data.each_char.each_with_object(Node.new) do |char, node|
- if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN
+ if char.ord > 0xFFFF
node << surrogate_substitution_for(char)
else
node << escape_incompatible_bmp_literals(char)
@@ -31,8 +32,12 @@ class JsRegex
end
end
+ ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
+ .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
+ .merge('/' => '\\/')
+
def escape_incompatible_bmp_literals(data)
- data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) }
+ data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
end
private
diff --git a/lib/js_regex/converter/set_converter.rb b/lib/js_regex/converter/set_converter.rb
index bf50a16..da68385 100644
--- a/lib/js_regex/converter/set_converter.rb
+++ b/lib/js_regex/converter/set_converter.rb
@@ -17,54 +17,73 @@ class JsRegex
private
def convert_data
- return pass_through_with_escaping if directly_compatible?
+ simple_conversion || full_recalculation
+ end
- content = CharacterSet.of_expression(expression)
- if expression.case_insensitive? && !context.case_insensitive_root
- content = content.case_insensitive
- elsif !expression.case_insensitive? && context.case_insensitive_root
- warn_of_unsupported_feature('nested case-sensitive set')
- end
+ def simple_conversion
+ return false if casefolding_needed?
- if context.es_2015_or_higher?
- context.enable_u_option if content.astral_part?
- content.to_s(format: 'es6', in_brackets: true)
- else
- content.to_s_with_surrogate_ranges
+ result = "[#{'^' if expression.negative?}".dup
+
+ expression.expressions.each do |subexp|
+ return false unless (child_res = simple_convert_child(subexp))
+
+ result << child_res.to_s
end
- end
- def directly_compatible?
- all_children_directly_compatible? && !casefolding_needed?
+ result << ']'
end
- def all_children_directly_compatible?
- # note that #each_expression is recursive
- expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
+ def casefolding_needed?
+ expression.case_insensitive? ^ context.case_insensitive_root
end
- def child_directly_compatible?(exp)
+ def simple_convert_child(exp)
case exp.type
when :literal
- # surrogate pair substitution needed on ES2009 if astral
- exp.text.ord <= 0xFFFF || context.enable_u_option
+ return false if !context.u? &&
+ exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
+ !context.enable_u_option
+
+ LiteralConverter.escape_incompatible_bmp_literals(exp.text)
when :set
- # conversion needed for nested sets, intersections
- exp.token.equal?(:range)
+ # full conversion is needed for nested sets and intersections
+ exp.token.equal?(:range) && exp.expressions.map do |op|
+ simple_convert_child(op) or return false
+ end.join('-')
when :type
- TypeConverter.directly_compatible?(exp)
+ TypeConverter.directly_compatible?(exp, context) &&
+ exp.text
when :escape
- EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
+ return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text)
+
+ case exp.token
+ when *CONVERTIBLE_ESCAPE_TOKENS
+ EscapeConverter.new.convert(exp, context)
+ when :literal
+ exp.char.ord <= 0xFFFF &&
+ LiteralConverter.escape_incompatible_bmp_literals(exp.char)
+ end
end
end
- def casefolding_needed?
- expression.case_insensitive? ^ context.case_insensitive_root
- end
+ SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
+ CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
- def pass_through_with_escaping
- string = expression.to_s(:base)
- LiteralConverter.escape_incompatible_bmp_literals(string)
+ def full_recalculation
+ content = CharacterSet.of_expression(expression)
+ if expression.case_insensitive? && !context.case_insensitive_root
+ content = content.case_insensitive
+ elsif !expression.case_insensitive? && context.case_insensitive_root
+ warn_of_unsupported_feature('nested case-sensitive set')
+ end
+ if context.es_2015_or_higher?
+ context.enable_u_option if content.astral_part?
+ content.to_s(format: 'es6', in_brackets: true)
+ else
+ content.to_s_with_surrogate_ranges
+ end
end
end
end
diff --git a/lib/js_regex/converter/type_converter.rb b/lib/js_regex/converter/type_converter.rb
index 0cd4837..037095e 100644
--- a/lib/js_regex/converter/type_converter.rb
+++ b/lib/js_regex/converter/type_converter.rb
@@ -6,13 +6,14 @@ class JsRegex
# Template class implementation.
#
class TypeConverter < JsRegex::Converter::Base
- HEX_EXPANSION = '[0-9A-Fa-f]'
- NONHEX_EXPANSION = '[^0-9A-Fa-f]'
- ES2018_HEX_EXPANSION = '\p{AHex}'
- ES2018_NONHEX_EXPANSION = '\P{AHex}'
- LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
+ HEX_EXPANSION = '[0-9A-Fa-f]'
+ NONHEX_EXPANSION = '[^0-9A-Fa-f]'
+ ES2018_HEX_EXPANSION = '\p{AHex}'
+ ES2018_NONHEX_EXPANSION = '\P{AHex}'
+ ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
+ LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
- def self.directly_compatible?(expression)
+ def self.directly_compatible?(expression, _context = nil)
case expression.token
when :space, :nonspace
!expression.ascii_classes?
@@ -28,6 +29,7 @@ class JsRegex
when :hex then hex_expansion
when :nonhex then nonhex_expansion
when :linebreak then LINEBREAK_EXPANSION
+ when :xgrapheme then xgrapheme
when :digit, :space, :word
return pass_through if self.class.directly_compatible?(expression)
set_substitution
@@ -68,6 +70,14 @@ class JsRegex
def character_set
CharacterSet.of_expression(expression)
end
+
+ def xgrapheme
+ if context.es_2018_or_higher? && context.enable_u_option
+ ES2018_XGRAPHEME_EXPANSION
+ else
+ warn_of_unsupported_feature
+ end
+ end
end
end
end
diff --git a/lib/js_regex/target.rb b/lib/js_regex/target.rb
index 648535a..e2a1a03 100644
--- a/lib/js_regex/target.rb
+++ b/lib/js_regex/target.rb
@@ -8,7 +8,7 @@ class JsRegex
def self.cast(arg)
return ES2009 if arg.nil?
- normalized_arg = arg.to_s.upcase
+ normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES')
return normalized_arg if SUPPORTED.include?(normalized_arg)
raise ArgumentError.new(
diff --git a/lib/js_regex/version.rb b/lib/js_regex/version.rb
index 68ffdad..68a4bef 100644
--- a/lib/js_regex/version.rb
+++ b/lib/js_regex/version.rb
@@ -1,3 +1,3 @@
class JsRegex
- VERSION = '3.8.0'
+ VERSION = '3.11.0'
end
More details
Historical runs
- unsatisfied-apt-dependencies: Unsatisfied APT dependencies: ruby-regexp-parser:amd64 (>= 2.6.2)
- unsatisfied-apt-dependencies: Unsatisfied APT dependencies: ruby-regexp-parser:amd64 (>= 2.6.2)
- run-disappeared: Jenkins job https://jenkins.debian.net/job/janitor-worker/837713/ has disappeared
- push-failed: Failed to push result branch: Connection closed: Connection closed early The remote server unexpectedly closed the connection.
- success: Merged new upstream version 3.7.1