New Upstream Release - nim-regex

Ready changes

Summary

Merged new upstream version: 0.20.2+ds (was: 0.19.0).

Resulting package

Built on 2023-05-20T11:56 (took 5m1s)

The resulting binary packages can be installed (if you have the apt repository enabled) by running one of:

apt install -t fresh-releases nim-regex-dev

Lintian Result

Diff

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..d8d6143
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,22 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  test:
+    name: Nim ${{ matrix.nim }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # xxx trim to on use latest in each branch eg 1.0.10, 1.2.8 etc
+        nim: [1.0.0, 1.0.2, 1.0.4, 1.0.10, 1.2.0, 1.2.2, 1.2.4, 1.2.6, 1.2.8, 1.4.0, 1.4.2, 1.4.4]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run Tests
+      run: |
+        docker pull nimlang/nim:${{ matrix.nim }}
+        docker run --rm -v `pwd`:/usr/src/app -w /usr/src/app nimlang/nim:${{ matrix.nim }} /bin/bash -c "nimble install -y; nimble test"
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 62f5efb..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-services:
-  - docker
-env:
-  - NIM=0.19.6
-  - NIM=0.20.0
-  - NIM=0.20.2
-  - NIM=1.0.0
-  - NIM=1.0.2
-  - NIM=1.0.4
-  - NIM=1.2.0
-  - NIM=1.2.2
-  - NIM=1.2.4
-  - NIM=1.2.6
-before_install:
-  - docker pull nimlang/nim:$NIM
-script:
-  - docker run --rm -v `pwd`:/usr/src/app -w /usr/src/app nimlang/nim:$NIM /bin/bash -c "nimble install -y; nimble test"
-notifications:
-  email:
-    on_failure: never
-    on_success: never
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50cac3a..cfeb30e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,28 @@
+v0.19.0
+==================
+
+* Adds support for unbounded lookaround assertions
+* Fix: parsing `{n,m}` repetitions is less strict;
+  `{}`, `{abc}`, etc are parsed as characters. This is
+  closer to PCRE, but it won't allow error prone instances
+  such as missing brackets: `{123`.
+* Fix: double repetitions: `**`, `++`, `*+`, `???`, `{n}*`, `{n}+`,
+  and other combinations are no longer allowed. The `++` PCRE hack
+  is not allowed, as it won't work the same way anyway.
+
+v0.18.0
+==================
+
+* Adds `escapeRe(string): string` function
+* Removed `unicodeplus` dependency
+
+v0.17.1
+==================
+
+* Fix: regression related to repetitions,
+  and lonely assertions; issue #83
+* Fix: make it compile with ARC; thanks to @timotheecour
+
 v0.17.0
 ==================
 
@@ -64,7 +89,7 @@ v0.13
 ==================
 
 * Add `groupFirstCapture`, `groupLastCapture`, and
-  `group(1): seq[string]` (thaks to @xmonader)
+  `group(1): seq[string]` (thanks to @xmonader)
 * Add Nim 1.0.0 to CI
 * Drop Nim 0.18 support
 * Fix nested captures with repetition range; issue #46
diff --git a/README.md b/README.md
index 0c3862a..041b84a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Regex
 
-[![Build Status](https://img.shields.io/travis/nitely/nim-regex/master.svg?style=flat-square)](https://travis-ci.org/nitely/nim-regex)
+[![Build Status](https://img.shields.io/github/actions/workflow/status/nitely/nim-regex/ci.yml?style=flat-square)](https://github.com/nitely/nim-regex/actions?query=workflow%3ACI)
 [![licence](https://img.shields.io/github/license/nitely/nim-regex.svg?style=flat-square)](https://raw.githubusercontent.com/nitely/nim-regex/master/LICENSE)
 
 A library for parsing, compiling, and executing regular expressions.
@@ -13,6 +13,7 @@ Features:
 * Unicode level-1 support
 * Descriptive error messages
 * Supports matching at compile-time (Nim +0.20)
+* PCRE syntax and semantics
 
 ## Install
 
@@ -22,7 +23,7 @@ nimble install regex
 
 # Compatibility
 
-Nim +0.19.0
+Nim +1.0.0
 
 ## Docs
 
@@ -34,6 +35,10 @@ Nim +0.19.0
 nimble test
 ```
 
+## Debugging
+
+Compile with `-d:regexDotDir:.` to generate [dot files](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) of the regexes (NFAs) within the nim file. A dot file can be viewed in [Graphviz](https://dreampuf.github.io/GraphvizOnline/). Requires Nim +1.2.
+
 ## LICENSE
 
 MIT
diff --git a/bench/bench.nim b/bench/bench.nim
index 8de4d9b..b38d135 100644
--- a/bench/bench.nim
+++ b/bench/bench.nim
@@ -27,6 +27,13 @@ benchRelative(regex_sol, m):
     discard regex.match(text, pattern4, m2)
   doNotOptimizeAway(m2)
 
+benchRelative(regex_macro_sol, m):
+  var d: bool
+  for i in 0 ..< m:
+    regex.match text, regex.rex"\w*sol\w*":
+      d = true
+  doNotOptimizeAway(d)
+
 var dummyTextNums = """650-253-0001"""
 
 var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"
@@ -45,6 +52,13 @@ benchRelative(regex_nums, m):
     discard regex.match(dummyTextNums, n_pattern_nums, m2)
   doNotOptimizeAway(m2)
 
+benchRelative(regex_macro_nums, m):
+  var d: bool
+  for i in 0 ..< m:
+    regex.match text, regex.rex"[0-9]+-[0-9]+-[0-9]+":
+      d = true
+  doNotOptimizeAway(d)
+
 var pattern_nums2 = re.re"^[0-9]+..*$"
 
 bench(re_nums2, m):
@@ -61,15 +75,22 @@ benchRelative(regex_nums2, m):
     discard regex.match(dummyTextNums, n_pattern_nums2, m3)
   doNotOptimizeAway(m3)
 
-var lits_find_re = re.re"do|re|mi|fa|sol"
-
-bench(re_lits_find, m):
-  var d: int
+benchRelative(regex_macro_nums2, m):
+  var d: bool
   for i in 0 ..< m:
-    d = re.find(text, lits_find_re)
+    regex.match text, regex.rex"[0-9]+..*":
+      d = true
   doNotOptimizeAway(d)
 
 when false:  # XXX remove
+  var lits_find_re = re.re"do|re|mi|fa|sol"
+
+  bench(re_lits_find, m):
+    var d: int
+    for i in 0 ..< m:
+      d = re.find(text, lits_find_re)
+    doNotOptimizeAway(d)
+
   const lits_find = regex.re"do|re|mi|fa|sol"
 
   benchRelative(regex_lits_find, m):
@@ -92,7 +113,7 @@ bench(re_email_find_all, m):
 
 const email_find_all = regex.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"
 
-benchRelative(email_find_all, m):
+benchRelative(regex_email_find_all, m):
   var d = 0
   for i in 0 ..< m:
     for _ in regex.findAll(bench_text, email_find_all):
@@ -112,7 +133,7 @@ bench(re_uri_find_all, m):
 
 const uri_find_all = regex.re"[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"
 
-benchRelative(uri_find_all, m):
+benchRelative(regex_uri_find_all, m):
   var d = 0
   for i in 0 ..< m:
     for _ in regex.findAll(bench_text, uri_find_all):
@@ -132,7 +153,7 @@ bench(re_ip_find_all, m):
 
 const ip_find_all = regex.re"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"
 
-benchRelative(ip_find_all, m):
+benchRelative(regex_ip_find_all, m):
   var d = 0
   for i in 0 ..< m:
     for _ in regex.findAll(bench_text, ip_find_all):
diff --git a/debian/changelog b/debian/changelog
index 92bdf1d..2df6b57 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+nim-regex (0.20.2+ds-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Sat, 20 May 2023 11:52:05 -0000
+
 nim-regex (0.17.0+ds-2) unstable; urgency=medium
 
   * Added some sanity to the package.
diff --git a/regex.nimble b/regex.nimble
index 327d2e2..6d0a5da 100644
--- a/regex.nimble
+++ b/regex.nimble
@@ -1,15 +1,14 @@
 # Package
 
-version = "0.17.0"
+version = "0.20.2"
 author = "Esteban Castro Borsani (@nitely)"
 description = "Linear time regex matching"
 license = "MIT"
 srcDir = "src"
 skipDirs = @["tests", "bench", "docs"]
 
-requires "nim >= 0.19.6"
+requires "nim >= 1.0.0"
 requires "unicodedb >= 0.7.2"
-requires "unicodeplus >= 0.5.0"
 
 task test, "Test":
   exec "nim c -r -o:bin/regex src/regex.nim"
@@ -23,7 +22,8 @@ task test, "Test":
   # js target should work in older versions, but
   # the docker image for CI has it since Nim 1.0.4,
   # so I'll only test it there
-  when (NimMajor, NimMinor, NimPatch) >= (1, 0, 4):
+  when (NimMajor, NimMinor, NimPatch) >= (1, 0, 4) and
+      (NimMajor, NimMinor) != (1, 4):  # issue #88
     exec "nim js -r src/regex.nim"
     exec "nim js -r tests/tests.nim"
     exec "nim js -r -d:forceRegexAtRuntime tests/tests.nim"
diff --git a/src/regex.nim b/src/regex.nim
index 4db61ff..98d1cf8 100644
--- a/src/regex.nim
+++ b/src/regex.nim
@@ -6,7 +6,7 @@ the regular expression. So, it can handle
 input from untrusted users. The syntax is similar to PCRE
 but lacks a few features that can not be implemented
 while keeping the space/time complexity guarantees,
-i.e.: backreferences and look-around assertions.
+ex: backreferences.
 
 Syntax
 ******
@@ -150,13 +150,15 @@ Lookaround Assertions
 #####################
 
 .. code-block::
-  (?=x)   A positive lookahead assertion
-  (?!x)   A negative lookahead assertion
-  (?<=x)  A positive lookbehind assertion
-  (?<!x)  A negative lookbehind assertion
+  (?=regex)   A positive lookahead assertion
+  (?!regex)   A negative lookahead assertion
+  (?<=regex)  A positive lookbehind assertion
+  (?<!regex)  A negative lookbehind assertion
 
-Lookaround assertions are limited to a single character
-at the moment.
+Any regex expression is a valid lookaround; groups
+are captured as well. Beware, lookarounds containing
+repetitions (``*``, ``+``, and ``{n,}``) may run in
+polynomial time.
 
 Examples
 ********
@@ -237,7 +239,7 @@ the scope, and it contains the submatches for every capture group.
     :test:
     var matched = false
     let text = "[my link](https://example.com)"
-    match text, rex"\[([a-z ]*)\]\((https?://[^)]+)\)":
+    match text, rex"\[([^\]]+)\]\((https?://[^)]+)\)":
       doAssert matches == @["my link", "https://example.com"]
       matched = true
     doAssert matched
@@ -249,11 +251,10 @@ import std/sequtils
 import std/unicode
 from std/strutils import addf
 
-import ./regex/nodetype
+import ./regex/types
 import ./regex/common
 import ./regex/compiler
 import ./regex/nfatype
-import ./regex/nfa
 import ./regex/nfafindall
 import ./regex/nfamatch
 when not defined(noRegexOpt):
@@ -508,50 +509,11 @@ func match*(
     doAssert "abcd".match(re"abcd", m)
     doAssert not "abcd".match(re"abc", m)
 
-  const f: MatchFlags = {}
-  result = matchImpl(s, pattern, m, f, start)
+  result = matchImpl(s, pattern, m, start)
 
 func match*(s: string, pattern: Regex): bool {.inline, raises: [].} =
   var m: RegexMatch
-  result = matchImpl(s, pattern, m, {mfNoCaptures})
-
-template containsImpl: untyped {.dirty.} =
-  const f = {mfShortestMatch, mfFindMatch, mfNoCaptures}
-  var m: RegexMatch
-  result = matchImpl(s, pattern, m, f)
-
-func contains*(s: string, pattern: Regex): bool {.inline, raises: [].} =
-  ##  search for the pattern anywhere
-  ##  in the string. It returns as soon
-  ##  as there is a match, even when the
-  ##  expression has repetitions
-  runnableExamples:
-    doAssert re"bc" in "abcd"
-    doAssert re"(23)+" in "23232"
-    doAssert re"^(23)+$" notin "23232"
-
-  containsImpl()
-
-template findImpl: untyped {.dirty.} =
-  matchImpl(s, pattern, m, {mfFindMatch}, start)
-
-func find*(
-  s: string,
-  pattern: Regex,
-  m: var RegexMatch,
-  start = 0
-): bool {.inline, raises: [].} =
-  ## search through the string looking for the first
-  ## location where there is a match
-  runnableExamples:
-    var m: RegexMatch
-    doAssert "abcd".find(re"bc", m) and
-      m.boundaries == 1 .. 2
-    doAssert not "abcd".find(re"de", m)
-    doAssert "2222".find(re"(22)*", m) and
-      m.group(0) == @[0 .. 1, 2 .. 3]
-
-  findImpl()
+  result = matchImpl(s, pattern, m)
 
 template runeIncAt(s: string, n: var int) =
   ## increment ``n`` up to
@@ -660,6 +622,42 @@ func findAndCaptureAll*(
   for m in s.findAll(pattern):
     result.add s[m.boundaries]
 
+# XXX find shortest match; disable captures
+func contains*(s: string, pattern: Regex): bool {.inline, raises: [].} =
+  ##  search for the pattern anywhere in the string
+  runnableExamples:
+    doAssert re"bc" in "abcd"
+    doAssert re"(23)+" in "23232"
+    doAssert re"^(23)+$" notin "23232"
+
+  for _ in findAllBounds(s, pattern):
+    return true
+  return false
+
+func find*(
+  s: string,
+  pattern: Regex,
+  m: var RegexMatch,
+  start = 0
+): bool {.inline, raises: [].} =
+  ## search through the string looking for the first
+  ## location where there is a match
+  runnableExamples:
+    var m: RegexMatch
+    doAssert "abcd".find(re"bc", m) and
+      m.boundaries == 1 .. 2
+    doAssert not "abcd".find(re"de", m)
+    doAssert "2222".find(re"(22)*", m) and
+      m.group(0) == @[0 .. 1, 2 .. 3]
+
+  m.clear()
+  for m2 in findAll(s, pattern, start):
+    m.captures.add m2.captures
+    m.namedGroups = m2.namedGroups
+    m.boundaries = m2.boundaries
+    return true
+  return false
+
 iterator split*(s: string, sep: Regex): string {.inline, raises: [].} =
   ## return not matched substrings
   runnableExamples:
@@ -731,26 +729,25 @@ func startsWith*(
   runnableExamples:
     doAssert "abc".startsWith(re"\w")
     doAssert not "abc".startsWith(re"\d")
-  var m: RegexMatch
-  result = matchImpl(s, pattern, m, {mfShortestMatch, mfNoCaptures}, start)
 
-template endsWithImpl: untyped {.dirty.} =
-  result = false
-  var
-    m: RegexMatch
-    i = 0
-  while i < s.len:
-    result = matchImpl(s, pattern, m, {mfNoCaptures}, i)
-    if result: return
-    s.runeIncAt(i)
+  startsWithImpl(s, pattern, start)
 
+# XXX use findAll and check last match bounds
 func endsWith*(s: string, pattern: Regex): bool {.inline, raises: [].} =
   ## return whether the string
   ## ends with the pattern or not
   runnableExamples:
     doAssert "abc".endsWith(re"\w")
     doAssert not "abc".endsWith(re"\d")
-  endsWithImpl()
+
+  result = false
+  var
+    m: RegexMatch
+    i = 0
+  while i < s.len:
+    result = match(s, pattern, m, i)
+    if result: return
+    s.runeIncAt(i)
 
 func flatCaptures(
   result: var seq[string],
@@ -831,12 +828,15 @@ func replace*(
     if limit > 0 and j == limit: break
   result.addsubstr(s, i)
 
+when not defined(nimHasEffectsOf):
+  {.pragma: effectsOf.}
+
 func replace*(
   s: string,
   pattern: Regex,
   by: proc (m: RegexMatch, s: string): string,
   limit = 0
-): string {.inline, raises: [].} =
+): string {.inline, raises: [], effectsOf: by.} =
   ## Replace matched substrings.
   ##
   ## If ``limit`` is given, at most ``limit``
@@ -870,7 +870,28 @@ func isInitialized*(re: Regex): bool {.inline, raises: [].} =
     re = re"foo"
     doAssert re.isInitialized
 
-  re.nfa.len > 0
+  re.nfa.s.len > 0
+
+func escapeRe*(s: string): string {.raises: [].} =
+  ## Escape special regex characters in ``s``
+  ## so that it can be matched verbatim
+  # The special char list is the same as re.escape
+  # in Python 3.7
+  #
+  # utf-8 ascii code-points cannot be part of multi-byte
+  # code-points, so we can read/match byte by byte
+  result = ""
+  for c in s:
+    case c
+    of ' ', '#', '$', '&', '(',
+        ')', '*', '+', '-', '.',
+        '?', '[', '\\', ']', '^',
+        '{', '|', '}', '~', char(9),
+        char(10), char(11), char(12), char(13):
+      result.add '\\'
+      result.add c
+    else:
+      result.add c
 
 proc toString(
   pattern: Regex,
@@ -884,7 +905,7 @@ proc toString(
     result = "[...]"
     return
   visited.incl(nIdx)
-  let n = pattern.nfa[nIdx]
+  let n = pattern.nfa.s[nIdx]
   result = "["
   result.add($n)
   for nn in n.next:
@@ -901,13 +922,14 @@ proc toString(pattern: Regex): string {.used.} =
 when isMainModule:
   import ./regex/parser
   import ./regex/exptransformation
+  import ./regex/dotgraph
 
   func toAtoms(s: string): string =
     var groups: GroupsCapture
     let atoms = s
       .parse
       .toAtoms(groups)
-    result = atoms.toString
+    result = atoms.s.toString
 
   func toNfaStr(s: string): string =
     result = re(s).toString
@@ -945,9 +967,7 @@ when isMainModule:
   doAssert r"a{0,0}".toNfaStr == r"a".toNfaStr
   doAssert r"a{1,2}".toNfaStr == r"aa?".toNfaStr
   doAssert r"a{2,4}".toNfaStr == r"aaa?a?".toNfaStr
-  doAssert r"a{,10}".toNfaStr == r"a?a?a?a?a?a?a?a?a?a?".toNfaStr
   doAssert r"a{0,10}".toNfaStr == r"a?a?a?a?a?a?a?a?a?a?".toNfaStr
-  doAssert r"a{,}".toNfaStr == r"a*".toNfaStr
   doAssert r"(a(b)){2}".toNfaStr == r"(a(b))(a(b))".toNfaStr
 
   # tascii_set
@@ -1085,6 +1105,40 @@ when isMainModule:
   doAssert match("abcabcabc", re"(?:(?:abc)){3}")
   doAssert match("abcabcabc", re"((abc)){3}")
 
+  doAssert match("", re"|")
+  doAssert match("a", re"a|")
+  doAssert match("", re"a|")
+  doAssert(not match("b", re"a|"))
+  doAssert match("b", re"|b")
+  doAssert match("", re"|b")
+  doAssert(not match("a", re"|b"))
+  doAssert match("", re"(|)")
+  doAssert match("a", re"(a|)")
+  doAssert match("", re"(a|)")
+  doAssert(not match("b", re"(a|)"))
+  doAssert match("b", re"(|b)")
+  doAssert match("", re"(|b)")
+  doAssert(not match("a", re"(|b)"))
+  doAssert match("", re"||")
+
+  doAssert match(" ", re"(?x)     (?-x) ")
+  doAssert match("aa", re"((?x)   a    )a")
+  doAssert match(" ", re"((?x)     ) ")
+  doAssert match(" ", re"(?x:(?x)     ) ")
+  doAssert match(" ", re"((?x:)) ")
+  doAssert match("A", re"(?xi)     a")
+  doAssert(not match("A", re"((?xi))     a"))
+  doAssert(not match("A", re"(?xi:(?xi)     )a"))
+
+  doAssert graph(re"^a+$") == """digraph graphname {
+    0 [label="q0";color=blue];
+    1 [label="q1";color=black];
+    2 [label="q2";color=blue];
+    0 -> 1 [label="a, {^}, i=0"];
+    1 -> 1 [label="a, i=0"];1 -> 2 [label="{eoe}, {$}, i=1"];
+}
+"""
+
   # subset of tests.nim
   proc raisesMsg(pattern: string): string =
     try:
@@ -1145,6 +1199,59 @@ when isMainModule:
     doAssert findAllBounds("#foo://#", re"[\w]+://") == @[1 .. 6]
     doAssert findAllBounds("abc\nabc\na", re"(?m)^a") ==
       @[0 .. 0, 4 .. 4, 8 .. 8]
+    doAssert match("ab", re"a(?=b)\w")
+    doAssert(not match("ab", re"a(?=x)\w"))
+    doAssert match("ab", re"\w(?<=a)b")
+    doAssert(not match("ab", re"\w(?<=x)b"))
+    doAssert match("aaab", re".*(?<=^(\w*?)(\w*?)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3], @[4 .. 3]]
+    doAssert match("aaab", re".*(?<=^(\w*?)(\w*)(\w??)$)", m) and
+      m.captures == @[@[0 .. -1], @[0 .. 3], @[4 .. 3]]
+    doAssert match("aaab", re"(\w*)(\w??)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    doAssert match("aaab", re".*(?<=^(\w*)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    doAssert match("aaab", re".*(?<=^(\w*)(\w?)$)", m) and
+      m.captures == @[@[0 .. 2], @[3 .. 3]]
+    doAssert match("aaab", re".*(?<=^(\d)\w+|(\w)\w{3}|(\w)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[], @[]]
+    doAssert match("aaab", re".*(?<=^(\d)\w+|(\d)\w{3}|(\w)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0], @[]]
+    doAssert match("aaab", re".*(?<=^(\d)\w+|(\d)\w{3}|(\d)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[], @[], @[0 .. 0]]
+    doAssert(not match("ab", re"\w(?<=a(?=b(?<=a)))b"))
+    doAssert(not match("ab", re"\w(?<=a(?<=a(?=b(?<=a))))b"))
+    doAssert match("ab", re"\w(?<=a(?=b(?<=b)))b")
+    doAssert match("ab", re"\w(?<=a(?<=a(?=b(?<=b))))b")
+    doAssert findAllBounds(r"1abab", re"(?<=\d\w*)ab") ==
+      @[1 .. 2, 3 .. 4]
+    doAssert findAllBounds(r"abab", re"(?<=\d\w*)ab").len == 0
+    doAssert findAllBounds(r"abab1", re"ab(?=\w*\d)") ==
+      @[0 .. 1, 2 .. 3]
+    doAssert findAllBounds(r"abab", re"ab(?=\w*\d)").len == 0
+    doAssert match("aΪ", re"a(?=Ϊ)\w")
+    doAssert match("Ϊb", re"Ϊ(?=b)\w")
+    doAssert match("弢Ⓐ", re"弢(?=Ⓐ)\w")
+    doAssert match("aΪ", re"\w(?<=a)Ϊ")
+    doAssert match("Ϊb", re"\w(?<=Ϊ)b")
+    doAssert match("弢Ⓐ", re"\w(?<=弢)Ⓐ")
+    block:  # Follows Nim re's behaviour
+      doAssert match("abc", re"(?<=a)bc", m, start = 1)
+      doAssert(not match("abc", re"(?<=x)bc", m, start = 1))
+      doAssert(not match("abc", re"^bc", m, start = 1))
+    doAssert startsWith("abc", re"b", start = 1)
+    doAssert startsWith("abc", re"(?<=a)b", start = 1)
+    doAssert startsWith("abc", re"b", start = 1)
+    doAssert(not startsWith("abc", re"(?<=x)b", start = 1))
+    doAssert(not startsWith("abc", re"^b", start = 1))
+    doAssert(not match("ab", re"ab(?=x)"))
+    doAssert(not match("ab", re"(?<=x)ab"))
+    doAssert match("ab", re"(?<=^)ab")
+    doAssert match("ab", re"ab(?=$)")
+    doAssert match("abcdefg", re"\w+(?<=(ab)(?=(cd)))\w+", m) and
+      m.captures == @[@[0 .. 1], @[2 .. 3]]
+    doAssert match("abcdefg", re"\w+(?<=(ab)(?=(cd)(?<=(cd))))\w+", m) and
+      m.captures == @[@[0 .. 1], @[2 .. 3], @[2 .. 3]]
     when canUseMacro:
       block:
         var m = false
@@ -1196,5 +1303,11 @@ when isMainModule:
           doAssert matches == @["my link", "https://example.com"]
           matched = true
         doAssert matched
+      block:
+        var matched = false
+        match "abcdefg", rex"\w+(?<=(ab)(?=(cd)))\w+":
+          doAssert matches == @["ab", "cd"]
+          matched = true
+        doAssert matched
 
   echo "ok regex.nim"
diff --git a/src/regex/common.nim b/src/regex/common.nim
index 5d796eb..9dfc5d6 100644
--- a/src/regex/common.nim
+++ b/src/regex/common.nim
@@ -35,6 +35,26 @@ proc `<=`*(x, y: Rune): bool =
 proc cmp*(x, y: Rune): int =
   x.int - y.int
 
+func bwRuneAt*(s: string, n: int): Rune =
+  ## Take rune ending at ``n``
+  doAssert n >= 0
+  doAssert n <= s.len-1
+  var n = n
+  while n > 0 and s[n].ord shr 6 == 0b10:
+    dec n
+  fastRuneAt(s, n, result, false)
+
+template bwFastRuneAt*(
+  s: string, n: var int, result: var Rune
+): untyped =
+  ## Take rune ending at ``n``
+  doAssert n > 0
+  doAssert n <= s.len
+  dec n
+  while n > 0 and s[n].ord shr 6 == 0b10:
+    dec n
+  fastRuneAt(s, n, result, false)
+
 proc `%%`*(
   formatstr: string,
   a: openArray[string]
diff --git a/src/regex/compiler.nim b/src/regex/compiler.nim
index 48d72e2..6552eae 100644
--- a/src/regex/compiler.nim
+++ b/src/regex/compiler.nim
@@ -1,23 +1,27 @@
 import ./parser
 import ./exptransformation
+import ./types
 import ./nfatype
 import ./nfa
 import ./litopt
+when defined(regexDotDir):
+  import ./dotgraph
 
 func reImpl*(s: string): Regex {.inline.} =
   var groups: GroupsCapture
   let rpn = s
     .parse
     .transformExp(groups)
-  var transitions: Transitions
-  let nfa = rpn.nfa2(transitions)
+  let nfa = rpn.nfa2()
   let opt = rpn.litopt2()
   result = Regex(
     nfa: nfa,
-    transitions: transitions,
     groupsCount: groups.count,
     namedGroups: groups.names,
     litOpt: opt)
+  when defined(regexDotDir) and (NimMajor, NimMinor) >= (1, 2):
+    const regexDotDir {.strdefine.} = ""
+    graphToFile(result, regexDotDir)
 
 func reCt*(s: string): Regex {.compileTime.} =
   reImpl(s)
diff --git a/src/regex/dotgraph.nim b/src/regex/dotgraph.nim
new file mode 100644
index 0000000..a1ddd61
--- /dev/null
+++ b/src/regex/dotgraph.nim
@@ -0,0 +1,49 @@
+import std/strutils
+when (NimMajor, NimMinor) >= (1, 2):
+  import std/hashes
+  import std/os
+
+import ./nfatype
+import ./types
+
+func color(n: Node): string =
+  case n.kind
+  of matchableKind: "black"
+  else: "blue"
+
+func graph*(nfa: Nfa): string =
+  result = "digraph graphname {\n"
+  let tab = "    "
+  for i, n in pairs nfa.s:
+    result.add tab
+    result.add($i & " [label=\"q" & $i & "\";color=" & n.color & "];")
+    result.add '\n'
+  for i, n in pairs nfa.s:
+    if n.next.len == 0:
+      continue
+    result.add tab
+    for i2, n2 in pairs n.next:
+      var t = ""
+      if nfa.t.allZ[i][i2] > -1:
+        for i3, z in pairs nfa.t.z[nfa.t.allZ[i][i2]]:
+          if i3 > 0: t &= ", "
+          t &= $z
+        t = ", {" & t & "}"
+      let label = ($nfa.s[n2] & t & ", i=" & $i2).replace(r"\", r"\\")
+      result.add($i & " -> " & $n2 & " [label=\"" & label & "\"];")
+    result.add '\n'
+  result.add "}\n"
+
+func graph*(regex: Regex): string =
+  result = graph(regex.nfa)
+
+when (NimMajor, NimMinor) >= (1, 2):
+  func graphToFile*(regex: Regex, dir: string) =
+    {.noSideEffect.}:
+      if dir.len > 0:
+        let content = graph(regex)
+        let fname = $hash(content) & ".dot"
+        try:
+          writeFile(dir / fname, content)
+        except IOError:
+          debugEcho "write file error"
diff --git a/src/regex/exptransformation.nim b/src/regex/exptransformation.nim
index a0cceb7..7bf013f 100644
--- a/src/regex/exptransformation.nim
+++ b/src/regex/exptransformation.nim
@@ -3,7 +3,8 @@ import std/sets
 import std/tables
 import std/algorithm
 
-import ./nodetype
+import ./exptype
+import ./types
 import ./common
 import ./scanner
 
@@ -23,10 +24,30 @@ func check(cond: bool, msg: string) =
   if not cond:
     raise newException(RegexError, msg)
 
-func greediness(expression: seq[Node]): seq[Node] =
+func fixEmptyOps(exp: Exp): Exp =
+  ## Handle "|", "(|)", "a|", "|b", "||", "a||b", ...
+  ## Handle "()"
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
+  for i in 0 .. exp.s.len-1:
+    if exp.s[i].kind == reOr:
+      if i-1 < 0 or exp.s[i-1].kind == reGroupStart:
+        result.s.add initSkipNode()
+      result.s.add exp.s[i]
+      if i+1 > exp.s.len-1 or exp.s[i+1].kind in {reGroupEnd, reOr}:
+        result.s.add initSkipNode()
+    elif exp.s[i].kind == reGroupStart:
+      result.s.add exp.s[i]
+      if i+1 > exp.s.len-1 or exp.s[i+1].kind == reGroupEnd:
+        result.s.add initSkipNode()
+    else:
+      result.s.add exp.s[i]
+
+func greediness(exp: Exp): Exp =
   ## apply greediness to an expression
-  result = newSeqOfCap[Node](expression.len)
-  var sc = expression.scan()
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
+  var sc = exp.s.scan()
   for n in sc.mitems():
     if n.kind in repetitionKind or
         n.kind == reZeroOrOne:
@@ -34,7 +55,7 @@ func greediness(expression: seq[Node]): seq[Node] =
       if sc.peek.kind == reZeroOrOne:
         n.isGreedy = false
         discard sc.next
-    result.add(n)
+    result.s.add n
 
 type
   GroupsCapture* = object
@@ -42,18 +63,18 @@ type
     names*: OrderedTable[string, int16]
 
 func fillGroups(
-  exp: seq[Node],
+  exp: Exp,
   groups: var GroupsCapture
-): seq[Node] =
+): Exp =
   ## populate group indices, names and capturing mark
   result = exp
   groups.names = initOrderedTable[string, int16](2)
   groups.count = 0'i16
   var gs = newSeq[int]()
-  for i, n in result.mpairs:
+  for i, n in mpairs result.s:
     case n.kind
-    of reGroupStart:
-      gs.add(i)
+    of groupStartKind:
+      gs.add i
       if n.isCapturing:
         n.idx = groups.count
         inc groups.count
@@ -66,8 +87,8 @@ func fillGroups(
         "Invalid capturing group. " &
         "Found too many closing symbols")
       let start = gs.pop()
-      n.isCapturing = result[start].isCapturing
-      n.idx = result[start].idx
+      n.isCapturing = result.s[start].isCapturing
+      n.idx = result.s[start].idx
     else:
       discard
     check(
@@ -192,36 +213,35 @@ func applyFlag(n: var Node, f: Flag) =
       flagVerbose,
       flagNotVerbose}
 
-func applyFlags(expression: seq[Node]): seq[Node] =
+func applyFlags(exp: Exp): Exp =
   ## apply flags to each group
-  result = newSeqOfCap[Node](expression.len)
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
   var flags = newSeq[seq[Flag]]()
-  var sc = expression.scan()
+  var sc = exp.s.scan()
   for n in sc.mitems():
     # (?flags)
     # Orphan flags are added to current group
     case n.kind
-    of reGroupStart:
-      if n.flags.len == 0:
-        flags.add(@[])
-        result.add(n)
-        continue
-      if sc.peek.kind == reGroupEnd:  # (?flags)
-        discard sc.next()
-        if flags.len > 0:
-          flags[flags.len - 1].add(n.flags)
-        else:
-          flags.add(n.flags)
-        continue  # skip (
-      flags.add(n.flags)
+    of groupStartKind:
+      flags.add n.flags
     of reGroupEnd:
       discard flags.pop()
+    of reFlags:
+      if flags.len > 0:
+        flags[flags.len-1].add n.flags
+      else:
+        flags.add n.flags
+      # XXX define skipKind = {reSkip, reFlags} in types,
+      #     instead of this hack
+      result.s.add Node(kind: reSkip)
+      continue  # skip node
     else:
       let ff = flags.squash()
       for f in Flag.low .. Flag.high:
         if ff[f]:
           applyFlag(n, f)
-    result.add(n)
+    result.s.add n
 
 func expandOneRepRange(subExpr: seq[Node], n: Node): seq[Node] =
   ## expand a repetition-range expression
@@ -255,24 +275,25 @@ func expandOneRepRange(subExpr: seq[Node], n: Node): seq[Node] =
       cp: "?".toRune,
       isGreedy: n.isGreedy))
 
-func expandRepRange(expression: seq[Node]): seq[Node] =
+func expandRepRange(exp: Exp): Exp =
   ## expand every repetition range
-  result = newSeqOfCap[Node](expression.len)
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
   var i: int
   var gi: int
-  for n in expression:
+  for n in exp.s:
     if n.kind != reRepRange:
-      result.add(n)
+      result.s.add n
       continue
     check(
-      result.len > 0,
+      result.s.len > 0,
       "Invalid repeition range, " &
       "nothing to repeat")
-    case result[^1].kind
+    case result.s[^1].kind
     of reGroupEnd:
       i = 0
       gi = 0
-      for ne in result.reversed:
+      for ne in result.s.reversed:
         inc i
         if ne.kind == reGroupEnd:
           inc gi
@@ -282,10 +303,10 @@ func expandRepRange(expression: seq[Node]): seq[Node] =
           break
         doAssert gi >= 0
       doAssert gi == 0
-      assert result[result.len-i].kind == reGroupStart
-      result.add(result[result.len-i .. result.len-1].expandOneRepRange(n))
+      assert result.s[result.s.len-i].kind == reGroupStart
+      result.s.add result.s[result.s.len-i .. result.s.len-1].expandOneRepRange(n)
     of matchableKind:
-      result.add(result[result.len-1 .. result.len-1].expandOneRepRange(n))
+      result.s.add result.s[result.s.len-1 .. result.s.len-1].expandOneRepRange(n)
     else:
       check(
         false, 
@@ -293,33 +314,34 @@ func expandRepRange(expression: seq[Node]): seq[Node] =
          "char, shorthand (i.e: \\w), group, or set " &
          "expected before repetition range"))
 
-func populateUid(exp: seq[Node]): seq[Node] =
+func populateUid(exp: Exp): Exp =
   check(
-    exp.high < NodeUid.high,
+    exp.s.high < NodeUid.high,
     ("The expression is too long, " &
      "limit is ~$#") %% $NodeUid.high)
   result = exp
   var uid = 1.NodeUid
-  for n in result.mitems:
+  for n in mitems result.s:
     n.uid = uid
     inc uid
 
-func joinAtoms(expression: seq[Node]): seq[Node] =
+func joinAtoms(exp: Exp): AtomsExp =
   ## Put a ``~`` joiner between atoms. An atom is
   ## a piece of expression that would loose
   ## meaning when breaking it up (i.e.: ``a~(b|c)*~d``)
-  result = newSeqOfCap[Node](expression.len * 2)
+  result.s = newSeq[Node](exp.s.len * 2)
+  result.s.setLen 0
   var atomsCount = 0
-  for n in expression:
+  for n in exp.s:
     case n.kind
-    of matchableKind, assertionKind:
+    of matchableKind, assertionKind - lookaroundKind, reSkip:
       inc atomsCount
       if atomsCount > 1:
         atomsCount = 1
-        result.add(initJoinerNode())
-    of reGroupStart:
+        result.s.add initJoinerNode()
+    of groupStartKind:
       if atomsCount > 0:
-        result.add(initJoinerNode())
+        result.s.add initJoinerNode()
       atomsCount = 0
     of reOr:
       atomsCount = 0
@@ -331,7 +353,7 @@ func joinAtoms(expression: seq[Node]): seq[Node] =
       inc atomsCount
     else:
       doAssert false
-    result.add(n)
+    result.s.add n
 
 type
   Associativity = enum
@@ -381,17 +403,17 @@ func popGreaterThan(ops: var seq[Node], op: Node): seq[Node] =
   while (ops.len > 0 and
       ops[ops.len - 1].kind in opKind and
       ops[ops.len - 1].kind.hasPrecedence(op.kind)):
-    result.add(ops.pop())
+    result.add ops.pop()
 
 func popUntilGroupStart(ops: var seq[Node]): seq[Node] =
   result = newSeqOfCap[Node](ops.len)
   while true:
     let op = ops.pop()
-    result.add(op)
+    result.add op
     if op.kind == reGroupStart:
       break
 
-func rpn(expression: seq[Node]): seq[Node] =
+func rpn(exp: AtomsExp): RpnExp =
   ## An adaptation of the Shunting-yard algorithm
   ## for producing `Reverse Polish Notation` out of
   ## an expression specified in infix notation.
@@ -400,31 +422,74 @@ func rpn(expression: seq[Node]): seq[Node] =
   ## the parsing of the regular expression into an NFA.
   ## Suffix notation removes nesting and so it can
   ## be parsed in a linear way instead of recursively
-  result = newSeqOfCap[Node](expression.len)
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
   var ops = newSeq[Node]()
-  for n in expression:
+  for n in exp.s:
     case n.kind
-    of matchableKind, assertionKind:
-      result.add(n)
+    of matchableKind, assertionKind, reSkip:
+      result.s.add n
     of reGroupStart:
-      ops.add(n)
+      ops.add n
     of reGroupEnd:
-      result.add(ops.popUntilGroupStart())
-      result.add(n)
+      result.s.add ops.popUntilGroupStart()
+      result.s.add n
     of opKind:
-      result.add(ops.popGreaterThan(n))
-      ops.add(n)
+      result.s.add ops.popGreaterThan(n)
+      ops.add n
     else:
       doAssert false
   # reverse ops
   for i in 1 .. ops.len:
-    result.add(ops[ops.len - i])
+    result.s.add ops[ops.len-i]
+
+func subExps(exp: AtomsExp, parentKind = reLookahead): AtomsExp =
+  ## Collect and convert lookaround sub-expressions to RPN
+  template n: untyped = result.s[^1]
+  result.s = newSeq[Node](exp.s.len)
+  result.s.setLen 0
+  var i = 0
+  var parens = newSeq[bool]()
+  while i < exp.s.len:
+    if exp.s[i].kind in lookaroundKind:
+      result.s.add exp.s[i]
+      inc i
+      parens.setLen 0
+      let i0 = i
+      while i < exp.s.len:
+        case exp.s[i].kind
+        of groupStartKind:
+          parens.add true
+        of reGroupEnd:
+          if parens.len > 0:
+            discard parens.pop()
+          else:
+            break
+        else:
+          discard
+        inc i
+      doAssert exp.s[i].kind == reGroupEnd
+      let atoms = AtomsExp(s: exp.s[i0 .. i-1])
+      n.subExp.rpn = atoms
+        .subExps(parentKind = n.kind)
+        .rpn
+      # Store whether the capts must be reversed
+      if parentKind in lookaheadKind:
+        n.subExp.reverseCapts = n.kind in lookbehindKind
+      else:
+        doAssert parentKind in lookbehindKind
+        n.subExp.reverseCapts = n.kind in lookaheadKind
+      inc i
+    else:
+      result.s.add exp.s[i]
+      inc i
 
 func toAtoms*(
-  exp: seq[Node],
+  exp: Exp,
   groups: var GroupsCapture
-): seq[Node] {.inline.} =
+): AtomsExp {.inline.} =
   result = exp
+    .fixEmptyOps
     .fillGroups(groups)
     .greediness
     .applyFlags
@@ -433,9 +498,10 @@ func toAtoms*(
     .joinAtoms
 
 func transformExp*(
-  exp: seq[Node],
+  exp: Exp,
   groups: var GroupsCapture
-): seq[Node] {.inline.} =
+): RpnExp {.inline.} =
   result = exp
     .toAtoms(groups)
+    .subExps
     .rpn
diff --git a/src/regex/exptype.nim b/src/regex/exptype.nim
new file mode 100644
index 0000000..7772b3f
--- /dev/null
+++ b/src/regex/exptype.nim
@@ -0,0 +1,7 @@
+import ./types
+
+type
+  Exp* = object
+    s*: seq[Node]
+  AtomsExp* = object
+    s*: seq[Node]
diff --git a/src/regex/litopt.nim b/src/regex/litopt.nim
index 45f636c..7285ca0 100644
--- a/src/regex/litopt.nim
+++ b/src/regex/litopt.nim
@@ -4,16 +4,11 @@
 ## within the input string. See issue #59.
 
 #[
-Only optimizations that are
-guaranteed to run in linear
-time are implemented.
-We must also ensure the matches
+Only optimizations that are guaranteed to run in linear
+time are implemented. We must also ensure the matches
 are not overlapped.
-I think the best way to understand
-how the current implementation works
-is by example. However, I'll describe
-some parts of the algorithm that
-may be useful to grasp first:
+
+Here’s a high-level description of the algorithm:
   * we pick a literal that is memchr'ed
     to skip parts of the text
   * the prefix is the regex part before
@@ -44,139 +39,9 @@ How is quadratic runtime avoided?
   * The `overlapTests` in tests.nim are good examples
     of this
 
-------------------------------------
-Examples
-
-Most of the optimizations for these examples
-are not implemented, may not get implemented,
-or are already part of the more general implemented
-optimization
-
-re"(?ms)^.+abc"
-can be optimized by running the match
-at every \n. We can do a quick memchr to find
-every \n.
-
-There is a gotcha, though. If we do this on
-an input such as `\n\n\n\n\n` the matcher will
-consume the whole string for every `\n`, hence
-runtime is quadratic.
-
-The solution is to run a special kind of find,
-instead of a regular match. We need a special find,
-because the regular find will always consume the
-whole string, so we'd only be able to skip the first
-part of the input until the first `\n`, instead
-of skipping multiple parts of the input.
-
-The special find works just like the regular one,
-except it stops when the current character of the
-input cannot be matched by the Regular Expression (RE).
-Once the special find stops, we can run a memchr from
-the index where the find stopped.
-
-Notice find returns non-overlapping matches,
-hence the next search must not process
-characters that are part of a previous matched string.
-The start of text cannot be within the boundaries
-of the previous match.
-
-^ There is a bunch of possible literal optimizations,
-  but this is the meat of it.
-
-re"\d\w+x"
-text: "xxxxxxxxxxx"
-This would take quadratic time,
-as the prefix matcher will match until
-the start of the string every time.
-The solution is disallow the optimization in
-cases where the literal can be matched by the
-prefix. Hence the literal becomes a delimiter
-for the prefix matcher.
-
-re"\wabc"
-This can be optimized the same way as
-the first example, except going back
-one character after running memchr.
-
-The runtime is again linear, every
-character can be consumed at most 2 times:
-one fordward (memchr), and one fordward
-(special find). The "go back" operation does
-not consumes characters, it just does
-index - len(prefix).
-
-The prefix may be longer than just one
-character, as long as the lenght of it
-is fixed we can just go back the lenght of
-the prefix and run a find. We can do
-this even when the prefix contain alternations
-as long as they have the same lenght, for
-example re"(x|y)abc".
-
-The alternative to going back X chars
-is to run the regex prefix in reverse, and
-then run the special find for the full regex.
-This would support prefixes with mixed
-alternation lengths (i.e: re"(\w\w|\d)foo").
-Since this is needed, the going back X
-chars may just be an optimal optimization
-(meaning I may not implement it).
-
-re"\d+abc"
-This can be optimized by doing a memchr
-for "a", running a special match for the
-regex prefix ("\d+") in reverse, and running
-the special find for the full regex.
-
-The special match is a regular match that returns
-the longest match.
-
-We cannot divide the regex in a prefix and suffix
-of "a", and just run that because that would
-run in cuadratic time (see the first "\n\n\n" input example).
-Also, we need to support captures.
-
-re"\w(a|b)"
-This can be optimized by running two scans,
-one for "a" and one for "b" at the start and
-after each match, and then trying to match the one
-with smaller char index first (i.e: the one
-that we found first).
-
-My experiments show PCRE do this with up to
-two literal alternations. When there are more
-than two alternations, it's likely better to
-generate a DFA. For example:
-re"foo|bar|baz|quz" would generate a DFA
-that matches re"f|b|q". We'd use the DFA
-instead of memchr.
-
-Single literals should be preferred over
-alternations. For example: re"\w(a|b)c" would
-memchr the "c" character. Meaning other listed
-optimizations are preferred.
-
----------------------------
-
-We should try to match the larger
-amount of literals and try to fail
-early. Let's say we have re"\d+abc",
-trying to match "a" and then an unbounded
-amount of chars at the left will likely take
-more time than trying to match "c", then
-"ab" at the left, since "a" will appear
-at least the same amount of times as "abc",
-but possibly more times.
-
-This translates to finding the largest amount
-of contiguous lits and picking the right most
-lit to memchr.
-
-However, this has the cost of potentially
-requiring a larger left part of the regex (to match the left side),
-so it may be better to find the first lit in the regex and then
-pick the last lit of its contiguous lits (if any).
+There used to be a list of regex examples here, but
+I wrote a blog post explaining things better, see
+https://nitely.github.io/2020/11/30/regex-literals-optimization.html
 
 ]#
 
@@ -184,7 +49,7 @@ import std/algorithm
 import std/sets
 import std/unicode
 
-import ./nodetype
+import ./types
 import ./nodematch
 import ./nfa
 
@@ -192,19 +57,18 @@ when (NimMajor, NimMinor, NimPatch) < (0, 20, 0):
   import common
 
 type
-  RpnExp = seq[Node]
-  LitNfa = Nfa
+  LitNfa = Enfa
   End = seq[int16]
 
 func combine(
-  nfa: var seq[Node],
+  eNfa: var Enfa,
   ends: var seq[End],
   org, target: int16
 ) =
   for e in ends[org]:
-    for i, ni in nfa[e].next.mpairs:
-      if nfa[ni].kind == reEoe:
-        ni = target
+    for i in mitems eNfa.s[e].next:
+      if eNfa.s[i].kind == reEoe:
+        i = target
   ends[org] = ends[target]
 
 const eoe = 0'i16
@@ -214,7 +78,7 @@ func update(
   ni: int16,
   next: openArray[int16]
 ) =
-  ends[ni].setLen(0)
+  ends[ni].setLen 0
   for n in next:
     if n == eoe:
         ends[ni].add ni
@@ -225,23 +89,24 @@ func update(
 # replace (...)+, (...)*, (...)?,
 # and (...|...) by skip nodes.
 # Based on Thompson's construction
-func toLitNfa(exp: RPNExp): LitNfa =
-  result = newSeqOfCap[Node](exp.len + 2)
-  result.add initEoeNode()
+func toLitNfa(exp: RpnExp): LitNfa =
+  result.s = newSeq[Node](exp.s.len + 2)
+  result.s.setLen 0
+  result.s.add initEoeNode()
   var
-    ends = newSeq[End](exp.len + 1)
+    ends = newSeq[End](exp.s.len + 1)
     states = newSeq[int16]()
-  if exp.len == 0:
+  if exp.s.len == 0:
     states.add eoe
-  for n in exp:
+  for n in exp.s:
     var n = n
     doAssert n.next.len == 0
-    let ni = result.len.int16
+    let ni = result.s.len.int16
     case n.kind
-    of matchableKind, assertionKind:
+    of matchableKind, assertionKind, reSkip:
       n.next.add eoe
       ends.update(ni, [eoe])
-      result.add n
+      result.s.add n
       states.add ni
     of reJoiner:
       let
@@ -253,12 +118,12 @@ func toLitNfa(exp: RPNExp): LitNfa =
       discard states.pop()
       discard states.pop()
       ends.update(ni, [eoe])
-      result.add initSkipNode([eoe])
+      result.s.add initSkipNode([eoe])
       states.add ni
     of reZeroOrMore, reZeroOrOne, reOneOrMore:
       discard states.pop()
       ends.update(ni, [eoe])
-      result.add initSkipNode([eoe])
+      result.s.add initSkipNode([eoe])
       states.add ni
     of reGroupStart:
       discard
@@ -267,18 +132,18 @@ func toLitNfa(exp: RPNExp): LitNfa =
     else:
       doAssert false
   doAssert states.len == 1
-  result.add initSkipNode(states)
+  result.s.add initSkipNode(states)
 
 type
   NodeIdx = int16
 
 func lonelyLit(exp: RpnExp): NodeIdx =
-  template state: untyped = litNfa[stateIdx]
+  template state: untyped = litNfa.s[stateIdx]
   result = -1
   let litNfa = exp.toLitNfa()
   var cpSeen = initHashSet[Rune](16)
   var lits = newSeq[int16]()
-  var stateIdx = litNfa.len.int16-1
+  var stateIdx = litNfa.s.len.int16-1
   while state.kind != reEoe:
     if state.kind == reChar and
         state.cp.int <= 127:  # XXX support unicode
@@ -292,35 +157,35 @@ func lonelyLit(exp: RpnExp): NodeIdx =
   # time, it seems sensible to limit the lits
   lits.setLen min(lits.len, 128)
   var litsTmp = newSeq[int16]()
-  for ni, n in exp.pairs:
+  for ni, n in pairs exp.s:
     # break after last lit of first lits sequence
-    if result > -1 and exp[result].uid+1 < n.uid:
+    if result > -1 and exp.s[result].uid+1 < n.uid:
       break
     if n.kind notin matchableKind:
       continue
     for nlit in lits:
-      doAssert n.uid <= litNfa[nlit].uid
-      if n.uid == litNfa[nlit].uid:
+      doAssert n.uid <= litNfa.s[nlit].uid
+      if n.uid == litNfa.s[nlit].uid:
         result = ni.NodeIdx
         #return
-      if not match(n, litNfa[nlit].cp):
+      if not match(n, litNfa.s[nlit].cp):
         litsTmp.add nlit
     swap lits, litsTmp
     litsTmp.setLen 0
 
 func prefix(eNfa: Enfa, uid: NodeUid): Enfa =
-  template state0: untyped = eNfa.len.int16-1
+  template state0: untyped = eNfa.s.len.int16-1
   result = eNfa
-  for n in result.mitems:
+  for n in result.s.mitems:
     n.next.setLen 0
   # reverse transitions; DFS
   var stack = @[(state0, -1'i16)]
   var visited: set[int16]
-  template state: untyped = eNfa[ni]
+  template state: untyped = eNfa.s[ni]
   while stack.len > 0:
     let (ni, pi) = stack.pop()
     if pi > -1:
-      result[ni].next.add pi
+      result.s[ni].next.add pi
     if ni in visited:
       continue
     visited.incl ni
@@ -329,52 +194,52 @@ func prefix(eNfa: Enfa, uid: NodeUid): Enfa =
       continue
     for mi in state.next:
       stack.add (mi, ni)
-  for n in result.mitems:
+  for n in mitems result.s:
     n.next.reverse
     n.isGreedy = true
   # Swap initial state by eoe
   var eoeIdx = -1'i16
-  for ni, n in result.pairs:
+  for ni, n in pairs result.s:
     if n.kind == reEoe:
       doAssert eoeIdx == -1
       eoeIdx = ni.int16
   doAssert eoeIdx != -1
-  for ni in eNfa[state0].next:
-    for i in 0 .. result[ni].next.len-1:
-      if result[ni].next[i] == state0:
-        result[ni].next[i] = eoeIdx
-  doAssert result[eoeIdx].kind == reEoe
-  doAssert result[state0].kind == reSkip
-  swap result[state0].kind, result[eoeIdx].kind
-  swap result[state0], result[eoeIdx]
+  for ni in eNfa.s[state0].next:
+    for i in 0 .. result.s[ni].next.len-1:
+      if result.s[ni].next[i] == state0:
+        result.s[ni].next[i] = eoeIdx
+  doAssert result.s[eoeIdx].kind == reEoe
+  doAssert result.s[state0].kind == reSkip
+  swap result.s[state0].kind, result.s[eoeIdx].kind
+  swap result.s[state0], result.s[eoeIdx]
   # cut
   var nIdx = -1
-  for ni, n in eNfa.pairs:
+  for ni, n in pairs eNfa.s:
     if n.uid == uid:
       doAssert nIdx == -1
       nIdx = ni
   doAssert nIdx != -1
-  result[state0].next = result[nIdx].next
+  result.s[state0].next = result.s[nIdx].next
 
 type
   LitOpt* = object
     lit*: Rune
     nfa*: Nfa
-    tns*: Transitions
 
 func canOpt*(litOpt: LitOpt): bool =
-  return litOpt.nfa.len > 0
+  return litOpt.nfa.s.len > 0
 
 func litopt2*(exp: RpnExp): LitOpt =
-  template litNode: untyped = exp[litIdx]
+  template litNode: untyped = exp.s[litIdx]
   let litIdx = exp.lonelyLit()
   if litIdx == -1:
     return
   result.lit = litNode.cp
   result.nfa = exp
+    .subExps
     .eNfa
     .prefix(litNode.uid)
-    .eRemoval(result.tns)
+    .eRemoval
 
 when isMainModule:
   from unicode import toUtf8, `$`
@@ -401,10 +266,9 @@ when isMainModule:
 
   func toNfa(s: string): Nfa =
     var groups: GroupsCapture
-    var transitions: Transitions
     result = s
       .rpn(groups)
-      .nfa2(transitions)
+      .nfa2
 
   proc toString(
     nfa: Nfa,
@@ -412,14 +276,14 @@ when isMainModule:
     visited: var set[int16]
   ): string =
     # XXX zero-match transitions are missing
-    if nfa[nIdx].kind == reEoe:
+    if nfa.s[nIdx].kind == reEoe:
       result = "eoe"
       return
     if nIdx in visited:
       result = "[...]"
       return
     visited.incl nIdx
-    let n = nfa[nIdx]
+    let n = nfa.s[nIdx]
     result = "["
     result.add $n.cp
     for nn in n.next:
@@ -489,9 +353,9 @@ when isMainModule:
   doAssert r"(a|ab)\w@&%".prefix.toString ==
     "[#, [&, [@, [w, [a, eoe], [b, [a, eoe]]]]]]"
   doAssert r"a?b".prefix.toString == r"a?".toNfa.toString
-  doAssert r"".prefix.len == 0
-  doAssert r"a?".prefix.len == 0
-  doAssert r"\w".prefix.len == 0
+  doAssert r"".prefix.s.len == 0
+  doAssert r"a?".prefix.s.len == 0
+  doAssert r"\w".prefix.s.len == 0
   doAssert r"(\w\d)*@".prefix.toString == r"(\d\w)*".toNfa.toString
   doAssert r"(\w\d)+@".prefix.toString == r"(\d\w)+".toNfa.toString
   # We search the start of the match, so greeddiness
diff --git a/src/regex/nfa.nim b/src/regex/nfa.nim
index ffc4a04..cf16427 100644
--- a/src/regex/nfa.nim
+++ b/src/regex/nfa.nim
@@ -1,6 +1,7 @@
 import std/deques
+import std/algorithm
 
-import ./nodetype
+import ./types
 import ./common
 
 func check(cond: bool, msg: string) =
@@ -8,26 +9,24 @@ func check(cond: bool, msg: string) =
     raise newException(RegexError, msg)
 
 type
-  Enfa* = seq[Node]
-  Nfa* = seq[Node]
   End = seq[int16]
-    ## store all the last
+    ## store the last
     ## states of a given state.
     ## Avoids having to recurse
-    ## a state to find its ends,
+    ## a state to find its end,
     ## but have to keep them up-to-date
 
 func combine(
-  nfa: var seq[Node],
+  eNfa: var Enfa,
   ends: var seq[End],
   org, target: int16
 ) =
   ## combine ends of ``org``
   ## with ``target``
   for e in ends[org]:
-    for i, ni in nfa[e].next.mpairs:
-      if nfa[ni].kind == reEOE:
-        ni = target
+    for i in mitems eNfa.s[e].next:
+      if eNfa.s[i].kind == reEOE:
+        i = target
   ends[org] = ends[target]
 
 const eoe = 0'i16
@@ -41,35 +40,36 @@ func update(
   ## to point to the end of the nodes in next.
   ## If a node in next is Eoe,
   ## the end of ``ni`` will point to itself
-  ends[ni].setLen(0)
+  ends[ni].setLen 0
   for n in next:
     if n == eoe:
-        ends[ni].add ni 
+      ends[ni].add ni 
     else:
-        ends[ni].add ends[n]
+      ends[ni].add ends[n]
 
-func eNfa*(expression: seq[Node]): Enfa {.raises: [RegexError].} =
+func eNfa*(exp: RpnExp): Enfa {.raises: [RegexError].} =
   ## Thompson's construction
-  result = newSeqOfCap[Node](expression.len + 2)
-  result.add initEOENode()
+  result.s = newSeq[Node](exp.s.len + 2)
+  result.s.setLen 0
+  result.s.add initEOENode()
   var
-    ends = newSeq[End](expression.len + 1)
+    ends = newSeq[End](exp.s.len + 1)
     states = newSeq[int16]()
-  if expression.len == 0:
+  if exp.s.len == 0:
     states.add eoe
-  for n in expression:
+  for n in exp.s:
     var n = n
     doAssert n.next.len == 0
     check(
-      result.high < int16.high,
+      result.s.high < int16.high,
       ("The expression is too long, " &
        "limit is ~$#") %% $int16.high)
-    let ni = result.len.int16
+    let ni = result.s.len.int16
     case n.kind
-    of matchableKind, assertionKind:
+    of matchableKind, assertionKind, reSkip:
       n.next.add eoe
       ends.update(ni, [eoe])
-      result.add n
+      result.s.add n
       states.add ni
     of reJoiner:
       let
@@ -87,7 +87,7 @@ func eNfa*(expression: seq[Node]): Enfa {.raises: [RegexError].} =
         stateA = states.pop()
       n.next.add [stateA, stateB]
       ends.update(ni, n.next)
-      result.add n
+      result.s.add n
       states.add ni
     of reZeroOrMore:
       check(
@@ -98,10 +98,10 @@ func eNfa*(expression: seq[Node]): Enfa {.raises: [RegexError].} =
       n.next.add [stateA, eoe]
       ends.update(ni, n.next)
       result.combine(ends, stateA, ni)
-      result.add n
+      result.s.add n
       states.add ni
       if not n.isGreedy:
-        swap(result[^1].next[0], result[^1].next[1])
+        swap result.s[^1].next[0], result.s[^1].next[1]
     of reOneOrMore:
       check(
         states.len >= 1,
@@ -111,10 +111,10 @@ func eNfa*(expression: seq[Node]): Enfa {.raises: [RegexError].} =
       n.next.add [stateA, eoe]
       ends.update(ni, n.next)
       result.combine(ends, stateA, ni)
-      result.add n
+      result.s.add n
       states.add stateA
       if not n.isGreedy:
-        swap(result[^1].next[0], result[^1].next[1])
+        swap result.s[^1].next[0], result.s[^1].next[1]
     of reZeroOrOne:
       check(
         states.len >= 1,
@@ -123,27 +123,27 @@ func eNfa*(expression: seq[Node]): Enfa {.raises: [RegexError].} =
       let stateA = states.pop()
       n.next.add [stateA, eoe]
       ends.update(ni, n.next)
-      result.add n
+      result.s.add n
       states.add ni
       if not n.isGreedy:
-        swap(result[^1].next[0], result[^1].next[1])
+        swap result.s[^1].next[0], result.s[^1].next[1]
     of reGroupStart:
       let stateA = states.pop()
       n.next.add stateA
       ends.update(ni, n.next)
-      result.add n
+      result.s.add n
       states.add ni
     of reGroupEnd:
       n.next.add eoe
       ends.update(ni, n.next)
       let stateA = states.pop()
       result.combine(ends, stateA, ni)
-      result.add n
+      result.s.add n
       states.add stateA
     else:
       doAssert(false, "Unhandled node: $#" %% $n.kind)
   doAssert states.len == 1
-  result.add initSkipNode(states)
+  result.s.add initSkipNode(states)
 
 type
   Zclosure = seq[int16]
@@ -160,59 +160,58 @@ func isTransitionZ(n: Node): bool {.inline.} =
 
 func teClosure(
   result: var TeClosure,
-  enfa: Enfa,
+  eNfa: Enfa,
   state: int16,
-  visited: var set[int16],
+  processing: var seq[int16],
   zTransitions: Zclosure
 ) =
   var zTransitionsCurr = zTransitions
-  if isTransitionZ(enfa[state]):
+  if isTransitionZ eNfa.s[state]:
     zTransitionsCurr.add state
-  if enfa[state].kind in matchableKind + {reEOE}:
-    result.add((state, zTransitionsCurr))
+  if eNfa.s[state].kind in matchableKind + {reEOE}:
+    result.add (state, zTransitionsCurr)
     return
-  for i, s in pairs enfa[state].next:
-    # Enter loops only once. Allows: "(a*)*" -> ["a", ""] 
-    if enfa[state].kind in repetitionKind:
-      if s in visited and i == int(not enfa[state].isGreedy):
-        continue
-      visited.incl s
-    teClosure(result, enfa, s, visited, zTransitionsCurr)
+  for i, s in pairs eNfa.s[state].next:
+    # Enter loops only once. "a", re"(a*)*" -> ["a", ""]
+    if eNfa.s[state].kind in repetitionKind:
+      if s notin processing or i == int(eNfa.s[state].isGreedy):
+        processing.add s
+        teClosure(result, eNfa, s, processing, zTransitionsCurr)
+        discard processing.pop()
+      # else skip loop
+    else:
+      teClosure(result, eNfa, s, processing, zTransitionsCurr)
 
 func teClosure(
   result: var TeClosure,
-  enfa: Enfa,
-  state: int16
+  eNfa: Enfa,
+  state: int16,
+  processing: var seq[int16]
 ) =
-  var visited: set[int16]
+  doAssert processing.len == 0
   var zclosure: Zclosure
-  for s in enfa[state].next:
-    teClosure(result, enfa, s, visited, zclosure)
+  for s in eNfa.s[state].next:
+    teClosure(result, eNfa, s, processing, zclosure)
 
-type
-  TransitionsAll* = seq[seq[int16]]
-  ZclosureStates* = seq[seq[Node]]
-  Transitions* = object
-    allZ*: TransitionsAll
-    z*: ZclosureStates
+when (NimMajor, NimMinor, NimPatch) < (1,4,0) and not declared(IndexDefect):
+  # avoids a warning
+  type IndexDefect = IndexError
 
-func eRemoval*(
-  eNfa: Enfa,
-  transitions: var Transitions
-): Nfa {.raises: [].} =
+func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
   ## Remove e-transitions and return
   ## remaining state transtions and
   ## submatches, and zero matches.
   ## Transitions are added in matching order (BFS),
   ## which may help matching performance
   #echo eNfa
-  result = newSeqOfCap[Node](eNfa.len)
-  transitions.allZ.setLen eNfa.len
-  var statesMap = newSeq[int16](eNfa.len)
+  result.s = newSeq[Node](eNfa.s.len)
+  result.s.setLen 0
+  result.t.allZ.setLen eNfa.s.len
+  var statesMap = newSeq[int16](eNfa.s.len)
   for i in 0 .. statesMap.len-1:
     statesMap[i] = -1
-  let start = int16(eNfa.len-1)
-  result.add eNfa[start]
+  let start = int16(eNfa.s.len-1)
+  result.s.add eNfa.s[start]
   statesMap[start] = 0'i16
   var closure: TeClosure
   var zc: seq[Node]
@@ -221,36 +220,101 @@ func eRemoval*(
   var qu: set[int16]
   qu.incl start
   var qa: int16
+  var processing = newSeqOfCap[int16](8)
   while qw.len > 0:
     try:
       qa = qw.popLast()
-    except IndexError:
+    except IndexDefect:
       doAssert false
     closure.setLen 0
-    teClosure(closure, eNfa, qa)
-    result[statesMap[qa]].next.setLen 0
+    teClosure(closure, eNfa, qa, processing)
+    result.s[statesMap[qa]].next.setLen 0
     for qb, zclosure in closure.items:
       if statesMap[qb] == -1:
-        result.add eNfa[qb]
-        statesMap[qb] = result.len.int16-1
+        result.s.add eNfa.s[qb]
+        statesMap[qb] = result.s.len.int16-1
       doAssert statesMap[qb] > -1
       doAssert statesMap[qa] > -1
-      result[statesMap[qa]].next.add statesMap[qb]
-      transitions.allZ[statesMap[qa]].add -1'i16
+      result.s[statesMap[qa]].next.add statesMap[qb]
+      result.t.allZ[statesMap[qa]].add -1'i16
       zc.setLen 0
       for z in zclosure:
-        zc.add eNfa[z]
+        zc.add eNfa.s[z]
       if zc.len > 0:
-        transitions.z.add zc
-        transitions.allZ[statesMap[qa]][^1] = int16(transitions.z.len-1)
+        result.t.z.add zc
+        result.t.allZ[statesMap[qa]][^1] = int16(result.t.z.len-1)
       if qb notin qu:
         qu.incl qb
         qw.addFirst qb
-  transitions.allZ.setLen result.len
+  result.t.allZ.setLen result.s.len
+
+func reverse(eNfa: Enfa): Enfa =
+  template state0: untyped = int16(eNfa.s.len-1)
+  result = eNfa
+  for n in mitems result.s:
+    n.next.setLen 0
+  var stack = @[(state0, -1'i16)]
+  var visited: set[int16]
+  template state: untyped = eNfa.s[ni]
+  while stack.len > 0:
+    let (ni, pi) = stack.pop()
+    if pi > -1:
+      result.s[ni].next.add pi
+    if ni in visited:
+      continue
+    visited.incl ni
+    for mi in state.next:
+      stack.add (mi, ni)
+  for n in mitems result.s:
+    n.next.reverse
+  # fix loops greediness
+  for i, n in pairs eNfa.s:
+    case n.kind:
+    of reZeroOrMore:
+      if not n.isGreedy:
+        result.s[i].next.reverse
+    of reOneOrMore:
+      if not n.isGreedy:
+        result.s[n.next[1]].next.reverse
+    else:
+      discard
+  # Swap initial state by eoe
+  var eoeIdx = -1'i16
+  for i, n in pairs result.s:
+    if n.kind == reEoe:
+      doAssert eoeIdx == -1
+      eoeIdx = i.int16
+  doAssert eoeIdx != -1
+  for i in eNfa.s[state0].next:
+    for j in 0 .. result.s[i].next.len-1:
+      if result.s[i].next[j] == state0:
+        result.s[i].next[j] = eoeIdx
+  swap result.s[state0].next, result.s[eoeIdx].next
+  doAssert result.s[state0].kind == reSkip
+  doAssert result.s[eoeIdx].kind == reEoe
+  doAssert result.s[state0].next.len > 0
+  doAssert result.s[eoeIdx].next.len == 0
+
+func subExps*(exp: RpnExp): RpnExp =
+  result = exp
+  for n in mitems result.s:
+    case n.kind
+    of reLookahead, reNotLookahead:
+      n.subExp.nfa = n.subExp.rpn
+        .subExps
+        .eNfa
+        .eRemoval
+      n.subExp.rpn.s = newSeq[Node]()  # nullify
+    of reLookbehind, reNotLookbehind:
+      n.subExp.nfa = n.subExp.rpn
+        .subExps
+        .eNfa
+        .reverse
+        .eRemoval
+      n.subExp.rpn.s = newSeq[Node]()  # nullify
+    else:
+      discard
 
 # XXX rename to nfa when Nim v0.19 is dropped
-func nfa2*(
-  exp: seq[Node],
-  transitions: var Transitions
-): Nfa {.raises: [RegexError].} =
-  result = exp.eNfa.eRemoval(transitions)
+func nfa2*(exp: RpnExp): Nfa {.raises: [RegexError].} =
+  exp.subExps.eNfa.eRemoval
diff --git a/src/regex/nfafindall.nim b/src/regex/nfafindall.nim
index 2814da0..18c129d 100644
--- a/src/regex/nfafindall.nim
+++ b/src/regex/nfafindall.nim
@@ -15,15 +15,15 @@ match taking priority according to PCRE rules
 (longest-left match wins).
 
 This algorithm works the same way as calling
-nfamatch find repeatedly would, except it keeps
+nfamatch find repeatedly, except it keeps
 all possible matches and returns them as soon
 as the current character cannot match the regex,
 i.e: it's a safe point to return. This is just
 to avoid consuming too much memory if possible.
 
-The downside is it takes linear time in the length
-of the text to match + the regex. In most cases it
-should take less space, since the matches are index ranges.
+The downside is it takes linear space in the length
+of the text to match + the regex. In practice it almost always
+takes little space, since the matches are index ranges.
 
 The tricky part is to replace all overlapped
 temporary matches every time an Eoe is found,
@@ -31,24 +31,21 @@ then prune the next states (as they're overlapped),
 then try to match the initial state to the
 current character (next possible match). Other than
 that is the same algorithm as nfamatch.
+
+The "literals optimization" is also implemented here,
+see https://nitely.github.io/2020/11/30/regex-literals-optimization.html
+for the algorithm description
 ]#
 
 import std/unicode
 import std/tables
 from std/strutils import find
 
+import ./common
 import ./nodematch
-import ./nodetype
+import ./types
 import ./nfatype
-
-func bwRuneAt(s: string, n: int): Rune =
-  ## Take rune ending at ``n``
-  doAssert n >= 0
-  doAssert n <= s.len-1
-  var n = n
-  while n > 0 and s[n].ord shr 6 == 0b10:
-    dec n
-  fastRuneAt(s, n, result, false)
+import ./nfamatch
 
 type
   MatchItemIdx = int
@@ -62,6 +59,7 @@ type
     a, b: Submatches
     m: Matches
     c: Capts
+    look: Lookaround
 
 func len(ms: Matches): int {.inline.} =
   ms.i
@@ -87,14 +85,21 @@ func clear(ms: var Matches) {.inline.} =
 
 template initMaybeImpl(
   ms: var RegexMatches,
-  regex: Regex
+  size: int
 ) =
   if ms.a == nil:
     assert ms.b == nil
-    ms.a = newSubmatches(regex.nfa.len)
-    ms.b = newSubmatches(regex.nfa.len)
-  doAssert ms.a.cap >= regex.nfa.len and
-    ms.b.cap >= regex.nfa.len
+    ms.a = newSubmatches size
+    ms.b = newSubmatches size
+    ms.look = initLook()
+  doAssert ms.a.cap >= size and
+    ms.b.cap >= size
+
+template initMaybeImpl(
+  ms: var RegexMatches,
+  regex: Regex
+) =
+  initMaybeImpl(ms, regex.nfa.s.len)
 
 func hasMatches(ms: RegexMatches): bool {.inline.} =
   return ms.m.len > 0
@@ -136,18 +141,20 @@ func dummyMatch*(ms: var RegexMatches, i: int) {.inline.} =
 
 func submatch(
   ms: var RegexMatches,
+  text: string,
   regex: Regex,
   i: int,
   cPrev, c: int32
 ) {.inline.} =
-  template tns: untyped = regex.transitions
-  template nfa: untyped = regex.nfa
+  template tns: untyped = regex.nfa.t
+  template nfa: untyped = regex.nfa.s
   template smA: untyped = ms.a
   template smB: untyped = ms.b
   template capts: untyped = ms.c
   template n: untyped = ms.a[smi].ni
   template capt: untyped = ms.a[smi].ci
   template bounds: untyped = ms.a[smi].bounds
+  template look: untyped = ms.look
   smB.clear()
   var captx: int32
   var matched = true
@@ -172,8 +179,10 @@ func submatch(
               bound: i,
               idx: z.idx)
             captx = (capts.len-1).int32
-          of assertionKind:
+          of assertionKind - lookaroundKind:
             matched = match(z, cPrev.Rune, c.Rune)
+          of lookaroundKind:
+            lookAroundTpl()
           else:
             assert false
             discard
@@ -208,14 +217,14 @@ func findSomeImpl*(
     iPrev = start.int
     optFlag = mfFindMatchOpt in flags
   smA.add (0'i16, -1'i32, i .. i-1)
-  if 0 <= start-1 and start-1 <= len(text)-1:
+  if start-1 in 0 .. text.len-1:
     cPrev = bwRuneAt(text, start-1).int32
-  while i < len(text):
+  while i < text.len:
     #debugEcho "it= ", i, " ", cPrev
     fastRuneAt(text, i, c, true)
     #c = text[i].Rune
     #i += 1
-    submatch(ms, regex, iPrev, cPrev, c.int32)
+    submatch(ms, text, regex, iPrev, cPrev, c.int32)
     if smA.len == 0:
       # avoid returning right before final zero-match
       if i < len(text):
@@ -229,7 +238,7 @@ func findSomeImpl*(
     smA.add (0'i16, -1'i32, i .. i-1)
     iPrev = i
     cPrev = c.int32
-  submatch(ms, regex, iPrev, cPrev, -1'i32)
+  submatch(ms, text, regex, iPrev, cPrev, -1'i32)
   doAssert smA.len == 0
   if ms.hasMatches():
     #debugEcho "m= ", ms.m.s
@@ -241,105 +250,6 @@ func findSomeImpl*(
 # there is an explanation of how this work
 # in litopt.nim, move this there?
 
-template initMaybeImpl(
-  ms: var RegexMatches,
-  size: int
-) =
-  if ms.a == nil:
-    assert ms.b == nil
-    ms.a = newSubmatches size
-    ms.b = newSubmatches size
-  doAssert ms.a.cap >= size and
-    ms.b.cap >= size
-
-template bwFastRuneAt(
-  s: string, n: var int, result: var Rune
-) =
-  ## Take rune ending at ``n``
-  doAssert n > 0
-  doAssert n <= s.len-1
-  dec n
-  while n > 0 and s[n].ord shr 6 == 0b10:
-    dec n
-  fastRuneAt(s, n, result, false)
-
-func submatch2(
-  smA, smB: var Submatches,
-  regex: Regex,
-  i: int,
-  cPrev, c: int32
-) =
-  template nfa: untyped = regex.litOpt.nfa
-  template tns: untyped = regex.litOpt.tns
-  smB.clear()
-  var matched = true
-  for n, capt, bounds in smA.items:
-    if nfa[n].kind == reEoe:
-      smB.add (n, capt, bounds)
-      break
-    for nti, nt in nfa[n].next.pairs:
-      if smB.hasState(nt):
-        continue
-      #debugEcho nfa[nt].kind
-      if nfa[nt].kind != reEoe and not match(nfa[nt], c.Rune):
-        continue
-      matched = true
-      if tns.allZ[n][nti] > -1:
-        for z in tns.z[tns.allZ[n][nti]]:
-          if z.kind in assertionKind:
-            matched = match(z, c.Rune, cPrev.Rune)
-          if not matched:
-            break
-      if matched:
-        smB.add (nt, capt, i .. bounds.b)
-        if nfa[nt].kind == reEoe:
-          swap smA, smB
-          return
-  swap smA, smB
-
-func matchPrefixImpl(
-  text: string,
-  regex: Regex,
-  smA, smB: var Submatches,
-  start, limit: Natural = 0
-): int {.inline.} =
-  template nfa: untyped = regex.litOpt.nfa
-  doAssert start < len(text)
-  doAssert start >= limit
-  smA.clear()
-  smB.clear()
-  var
-    c = Rune(-1)
-    cPrev = text.runeAt(start).int32
-    i = start.int
-    iPrev = start.int
-  #debugEcho cPrev.Rune
-  smA.add (0'i16, -1'i32, i .. i-1)
-  while i > limit:
-    bwFastRuneAt(text, i, c)
-    #i -= 1
-    #c = text[i].Rune
-    #debugEcho "txt.Rune=", c
-    #debugEcho "txt.i=", i
-    submatch2(smA, smB, regex, iPrev, cPrev, c.int32)
-    if smA.len == 0:
-      return -1
-    if nfa[smA[0].ni].kind == reEoe:
-      return smA[0].bounds.a
-    iPrev = i
-    cPrev = c.int32
-  if i > 0:
-    # limit can be part of the match, there is no overlap
-    bwFastRuneAt(text, i, c)
-    #debugEcho "c=", c, " limit=", limit
-  else:
-    c = Rune(-1)
-  submatch2(smA, smB, regex, iPrev, cPrev, c.int32)
-  for n, capt, bounds in smA.items:
-    if nfa[n].kind == reEoe:
-      return bounds.a
-  return -1
-
 func findSomeOptImpl*(
   text: string,
   regex: Regex,
@@ -347,11 +257,12 @@ func findSomeOptImpl*(
   start: Natural
 ): int =
   template regexSize: untyped =
-    max(regex.litOpt.nfa.len, regex.nfa.len)
+    max(regex.litOpt.nfa.s.len, regex.nfa.s.len)
   template opt: untyped = regex.litOpt
   template smA: untyped = ms.a
   template smB: untyped = ms.b
-  doAssert opt.nfa.len > 0
+  template look: untyped = ms.look
+  doAssert opt.nfa.s.len > 0
   initMaybeImpl(ms, regexSize)
   ms.clear()
   var limit = start.int
@@ -367,7 +278,7 @@ func findSomeOptImpl*(
     #debugEcho "litIdx=", litIdx
     doAssert litIdx >= i
     i = litIdx
-    i = matchPrefixImpl(text, regex, smA, smB, i, limit)
+    i = reversedMatchImpl(smA, smB, text, opt.nfa, look, i, limit)
     if i == -1:
       #debugEcho "not.Match=", i
       i = litIdx+1
diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim
index f67d00d..9f032df 100644
--- a/src/regex/nfamacro.nim
+++ b/src/regex/nfamacro.nim
@@ -6,9 +6,10 @@ import std/tables
 import std/sets
 
 import pkg/unicodedb/properties
-import pkg/unicodedb/types
+import pkg/unicodedb/types as utypes
 
-import ./nodetype
+import ./common
+import ./types
 import ./nfatype
 import ./compiler
 
@@ -26,6 +27,17 @@ macro defForVars(idns: varargs[untyped]): untyped =
       idn, newEmptyNode(), newCall("genSym", newLit nskForVar, newLit $idn))
   return newStmtList lets
 
+type
+  Sig = proc (
+    smA, smB, capts, captIdx, matched, text, start: NimNode,
+    nfa: Nfa,
+    look: Lookaround,
+    flags: set[MatchFlag]
+  ): NimNode {.noSideEffect, raises: [].}
+  Lookaround = object
+    ahead, behind: Sig
+    smL: NimNode
+
 # todo: can not use unicodeplus due to
 # https://github.com/nim-lang/Nim/issues/7059
 func swapCase(r: Rune): Rune =
@@ -200,7 +212,6 @@ func genWordBoundaryAscii(cA, cB: NimNode): NimNode =
       (`cB` != -1'i32 and `wordMatchB`)
 
 func genMatch(n: Node, cA, cB: NimNode): NimNode =
-  let cpLit = newLit n.cp.int32
   case n.kind
   of reStart, reStartSym:
     quote do: `cA` == -1'i32
@@ -220,32 +231,64 @@ func genMatch(n: Node, cA, cB: NimNode): NimNode =
   of reNotWordBoundaryAscii:
     let wordBoundary = genWordBoundaryAscii(cA, cB)
     quote do: not `wordBoundary`
-  of reLookahead:
-    quote do: `cpLit` == `cB`
-  of reNotLookahead:
-    quote do: `cpLit` != `cB`
-  of reLookbehind:
-    quote do: `cpLit` == `cA`
-  of reNotLookbehind:
-    quote do: `cpLit` != `cA`
   else:
     doAssert false
     quote do: false
 
+func genLookaroundMatch(
+  capts, captx, matched, charIdx, text: NimNode,
+  n: Node,
+  look: Lookaround
+): NimNode =
+  template nfa: untyped = n.subExp.nfa
+  template smL: untyped = look.smL
+  let smlA = quote do: lastA(`smL`)
+  let smlB = quote do: lastB(`smL`)
+  var flags = {mfAnchored}
+  if n.subExp.reverseCapts:
+    flags.incl mfReverseCapts
+  var lookaroundStmt = case n.kind
+    of reLookahead, reNotLookahead:
+      look.ahead(
+        smlA, smlB, capts, captx, matched,
+        text, charIdx, nfa, look, flags)
+    else:
+      doAssert n.kind in {reLookbehind, reNotLookbehind}
+      look.behind(
+        smlA, smlB, capts, captx, matched,
+        text, charIdx, nfa, look, flags)
+  if n.kind in {reNotLookahead, reNotLookbehind}:
+    lookaroundStmt = quote do:
+      `lookaroundStmt`
+      `matched` = not `matched`
+  let nfaLenLit = newLit nfa.s.len
+  result = quote do:
+    grow `smL`
+    `smL`.last.setLen `nfaLenLit`
+    `lookaroundStmt`
+    removeLast `smL`
+
 func genMatchedBody(
   smB, ntLit, capt, bounds, matched, captx,
-  capts, charIdx, cPrev, c: NimNode,
+  capts, charIdx, cPrev, c, text: NimNode,
   i, nti: int,
-  regex: Regex
+  nfa: Nfa,
+  look: Lookaround,
+  flags: set[MatchFlag]
 ): NimNode =
-  if regex.transitions.allZ[i][nti] == -1'i16:
+  template t: untyped = nfa.t
+  let bounds2 = if mfBwMatch in flags:
+    quote do: `charIdx` .. `bounds`.b
+  else:
+    quote do: `bounds`.a .. `charIdx`-1
+  if t.allZ[i][nti] == -1'i16:
     return quote do:
-      add(`smB`, (`ntLit`, `capt`, `bounds`.a .. `charIdx`-1))
+      add(`smB`, (`ntLit`, `capt`, `bounds2`))
   var matchedBody: seq[NimNode]
   matchedBody.add quote do:
     `matched` = true
     `captx` = `capt`
-  for z in regex.transitions.z[regex.transitions.allZ[i][nti]]:
+  for z in t.z[t.allZ[i][nti]]:
     case z.kind
     of groupKind:
       let zIdx = newLit z.idx
@@ -255,23 +298,33 @@ func genMatchedBody(
           bound: `charIdx`,
           idx: `zIdx`))
         `captx` = (len(`capts`) - 1).int32
-    of assertionKind:
-      # https://github.com/nim-lang/Nim/issues/13266
-      #let zLit = newLit z
-      let matchCond = genMatch(`z`, `cPrev`, `c`)
+    of assertionKind - lookaroundKind:
+      let matchCond = if mfBwMatch in flags:
+        genMatch(z, c, cPrev)
+      else:
+        genMatch(z, cPrev, c)
       matchedBody.add quote do:
         `matched` = `matched` and `matchCond`
+    of lookaroundKind:
+      let matchStmt = genLookaroundMatch(
+        capts, captx, matched, charIdx, text, z, look)
+      matchedBody.add quote do:
+        if `matched`:
+          `matchStmt`
     else:
       doAssert false
   matchedBody.add quote do:
     if `matched`:
-      add(`smB`, (`ntLit`, `captx`, `bounds`.a .. `charIdx`-1))
+      add(`smB`, (`ntLit`, `captx`, `bounds2`))
   return newStmtList matchedBody
 
-func genSubmatch(
+func genNextState(
   n, capt, bounds, smB, c, matched, captx,
-  capts, charIdx, cPrev: NimNode,
-  regex: Regex
+  capts, charIdx, cPrev, text: NimNode,
+  nfa: Nfa,
+  look: Lookaround,
+  flags: set[MatchFlag],
+  eoeOnly: bool
 ): NimNode =
   #[
     case n
@@ -287,130 +340,208 @@ func genSubmatch(
     else:
       error
   ]#
+  template s: untyped = nfa.s
   result = newStmtList()
   var caseStmtN: seq[NimNode]
   caseStmtN.add n
-  for i in 0 .. regex.nfa.len-1:
-    if regex.nfa[i].kind == reEoe:
+  for i in 0 .. s.len-1:
+    if s[i].kind == reEoe:
       continue
     var branchBodyN: seq[NimNode]
-    for nti, nt in regex.nfa[i].next.pairs:
-      let matchCond = case regex.nfa[nt].kind
+    for nti, nt in s[i].next.pairs:
+      if eoeOnly and s[nt].kind != reEoe:
+        continue
+      let matchCond = case s[nt].kind
         of reEoe:
           quote do: `c` == -1'i32
         of reInSet:
-          let m = genSetMatch(c, regex.nfa[nt])
+          let m = genSetMatch(c, s[nt])
           quote do: `c` >= 0'i32 and `m`
         of reNotSet:
-          let m = genSetMatch(c, regex.nfa[nt])
+          let m = genSetMatch(c, s[nt])
           quote do: `c` >= 0'i32 and not `m`
         else:
-          let m = genMatch(c, regex.nfa[nt])
+          let m = genMatch(c, s[nt])
           quote do: `c` >= 0'i32 and `m`
       let ntLit = newLit nt
       let matchedBodyStmt = genMatchedBody(
         smB, ntLit, capt, bounds, matched, captx,
-        capts, charIdx, cPrev, c,
-        i, nti, regex)
-      branchBodyN.add quote do:
-        if not hasState(`smB`, `ntLit`) and `matchCond`:
-          `matchedBodyStmt`
-    doAssert branchBodyN.len > 0
-    caseStmtN.add newTree(nnkOfBranch,
-      newLit i.int16,
-      newStmtList(
-        branchBodyN))
-  doAssert caseStmtN.len > 1
-  caseStmtN.add newTree(nnkElse,
-    quote do:
-      doAssert false
-      discard)
-  result.add newTree(nnkCaseStmt, caseStmtN)
-  when defined(reDumpMacro):
-    echo "==== genSubmatch ===="
-    echo repr(result)
-
-func submatch(
-  smA, smB, c,
-  capts, charIdx, cPrev,
-  captx, matched: NimNode,
-  regex: Regex
-): NimNode =
-  defForVars n, capt, bounds
-  let genSubmatchCall = genSubmatch(
-    n, capt, bounds, smB, c, matched, captx,
-    capts, charIdx, cPrev, regex)
-  result = quote do:
-    `smB`.clear()
-    for `n`, `capt`, `bounds` in `smA`.items:
-      `genSubmatchCall`
-    swap `smA`, `smB`
-
-func genSubmatchEoe(
-  n, capt, bounds, smB, matched, captx,
-  capts, charIdx, cPrev: NimNode,
-  regex: Regex
-): NimNode =
-  # This is the same as genSubmatch
-  # but just for EOE states
-  #[
-    case n
-    of 0:
-      if not smB.hasState(1):
-        if c == -1:
-          smB.add((1, capt, bounds))
-    of 1:
-      ...
-    else:
-      discard
-  ]#
-  result = newStmtList()
-  var caseStmtN: seq[NimNode]
-  caseStmtN.add n
-  for i in 0 .. regex.nfa.len-1:
-    if regex.nfa[i].kind == reEoe:
-      continue
-    var branchBodyN: seq[NimNode]
-    for nti, nt in regex.nfa[i].next.pairs:
-      if regex.nfa[nt].kind == reEoe:
-        let ntLit = newLit nt
-        let cLit = newLit -1'i32
-        let matchedBodyStmt = genMatchedBody(
-          smB, ntLit, capt, bounds, matched, captx,
-          capts, charIdx, cPrev, cLit,
-          i, nti, regex)
+        capts, charIdx, cPrev, c, text,
+        i, nti, nfa, look, flags)
+      if mfAnchored in flags and s[nt].kind == reEoe:
         branchBodyN.add quote do:
           if not hasState(`smB`, `ntLit`):
             `matchedBodyStmt`
+      else:
+        branchBodyN.add quote do:
+          if not hasState(`smB`, `ntLit`) and `matchCond`:
+            `matchedBodyStmt`
+    doAssert eoeOnly or branchBodyN.len > 0
     if branchBodyN.len > 0:
       caseStmtN.add newTree(nnkOfBranch,
         newLit i.int16,
         newStmtList(
           branchBodyN))
   doAssert caseStmtN.len > 1
+  let elseAssertion = if eoeOnly:
+    newEmptyNode()
+  else:
+    quote do: doAssert false
   caseStmtN.add newTree(nnkElse,
-    quote do: discard)
+    quote do:
+      `elseAssertion`
+      discard)
   result.add newTree(nnkCaseStmt, caseStmtN)
   when defined(reDumpMacro):
-    echo "==== genSubmatchEoe ===="
+    echo "==== genNextState ===="
     echo repr(result)
 
-func submatchEoe(
-  smA, smB,
-  capts, charIdx, cPrev,
-  captx, matched: NimNode,
-  regex: Regex
+func nextState(
+  smA, smB, c, capts, charIdx, cPrev,
+  captx, matched, eoe, text: NimNode,
+  nfa: Nfa,
+  look: Lookaround,
+  flags: set[MatchFlag],
+  eoeOnly = false
 ): NimNode =
   defForVars n, capt, bounds
-  let genSubmatchEoeCall = genSubmatchEoe(
-    n, capt, bounds, smB, matched, captx,
-    capts, charIdx, cPrev, regex)
+  let eoeBailOut = if mfAnchored in flags:
+    quote do:
+      if `n` == `eoe`:
+        if not hasState(`smB`, `n`):
+          add(`smB`, (`n`, `capt`, `bounds`))
+        break
+  else:
+    newEmptyNode()
+  let nextStateStmt = genNextState(
+    n, capt, bounds, smB, c, matched, captx,
+    capts, charIdx, cPrev, text, nfa, look,
+    flags, eoeOnly)
   result = quote do:
     `smB`.clear()
     for `n`, `capt`, `bounds` in `smA`.items:
-      `genSubmatchEoeCall`
+      `eoeBailOut`
+      `nextStateStmt`
     swap `smA`, `smB`
 
+func eoeIdx(nfa: Nfa): int16 =
+  for i in 0 .. nfa.s.len-1:
+    if nfa.s[i].kind == reEoe:
+      return i.int16
+  doAssert false
+
+func matchImpl(
+  smA, smB, capts, captIdx, matched, text, start: NimNode,
+  nfa: Nfa,
+  look: Lookaround,
+  flags: set[MatchFlag]
+): NimNode =
+  defVars c, i, cPrev, captx
+  let eoe = newLit nfa.eoeIdx
+  let c2 = quote do: int32(`c`)
+  let nextStateStmt = nextState(
+    smA, smB, c2, capts, i, cPrev, captx,
+    matched, eoe, text, nfa, look, flags)
+  let cEoe = newLit -1'i32
+  let nextStateEoeStmt = nextState(
+    smA, smB, cEoe, capts, i, cPrev, captx,
+    matched, eoe, text, nfa, look, flags, eoeOnly = true)
+  let eoeBailOutStmt = if mfAnchored in flags:
+    quote do:
+      if `smA`[0].ni == `eoe`:
+        break
+  else:
+    newEmptyNode()
+  let captsStmt = if mfReverseCapts in flags:
+    quote do:
+      `captIdx` = reverse(`capts`, `smA`[0].ci, `captIdx`)
+  else:
+    quote do:
+      `captIdx` = `smA`[0].ci
+  result = quote do:
+    var
+      `c` = Rune(-1)
+      `cPrev` = -1'i32
+      `i` = `start`
+      iNext = `start`
+      `captx` {.used.} = -1'i32
+    if `start`-1 in 0 .. `text`.len-1:
+      `cPrev` = bwRuneAt(`text`, `start`-1).int32
+    clear(`smA`)
+    add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
+    while `i` < `text`.len:
+      fastRuneAt(`text`, iNext, `c`, true)
+      `nextStateStmt`
+      if `smA`.len == 0:
+        break
+      `eoeBailOutStmt`
+      `i` = iNext
+      `cPrev` = `c2`
+    `c` = Rune(-1)
+    `nextStateEoeStmt`
+    if `smA`.len > 0:
+      doAssert `smA`[0].ni == `eoe`
+      `captsStmt`
+    `matched` = `smA`.len > 0
+
+func reversedMatchImpl(
+  smA, smB, capts, captIdx, matched, text, start: NimNode,
+  nfa: Nfa,
+  look: Lookaround,
+  flags: set[MatchFlag]
+): NimNode =
+  defVars c, i, cPrev, captx
+  let flags = flags + {mfAnchored, mfBwMatch}
+  let eoe = newLit nfa.eoeIdx
+  let c2 = quote do: int32(`c`)
+  let nextStateStmt = nextState(
+    smA, smB, c2, capts, i, cPrev, captx,
+    matched, eoe, text, nfa, look, flags)
+  #let limit0 = limit.kind in nnkLiterals and limit.intVal == 0
+  let cEoe = newLit -1'i32
+  let nextStateEoeStmt = nextState(
+    smA, smB, cEoe, capts, i, cPrev, captx,
+    matched, eoe, text, nfa, look, flags, eoeOnly = true)
+  let captsStmt = if mfReverseCapts in flags:
+    quote do:
+      `captIdx` = reverse(`capts`, `smA`[0].ci, `captIdx`)
+  else:
+    quote do:
+      `captIdx` = `smA`[0].ci
+  result = quote do:
+    #doAssert `start` >= `limit`
+    var
+      `c` = Rune(-1)
+      `cPrev` = -1'i32
+      `i` = `start`
+      iNext = `start`
+      `captx` {.used.} = -1'i32
+    if `start` in 0 .. `text`.len-1:
+      `cPrev` = runeAt(`text`, `start`).int32
+    clear(`smA`)
+    add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
+    while iNext > 0:
+      bwFastRuneAt(`text`, iNext, `c`)
+      `nextStateStmt`
+      if `smA`.len == 0:
+        break
+      if `smA`[0].ni == `eoe`:
+        break
+      `i` = iNext
+      `cPrev` = `c2`
+    `c` = Rune(-1)
+    `nextStateEoeStmt`
+    if `smA`.len > 0:
+      doAssert `smA`[0].ni == `eoe`
+      `captsStmt`
+    `matched` = `smA`.len > 0
+
+template look(smL: NimNode): untyped =
+  Lookaround(
+    ahead: matchImpl,
+    behind: reversedMatchImpl,
+    smL: smL)
+
 template constructSubmatches2(
   captures, txt, capts, capt, size: untyped
 ): untyped =
@@ -432,40 +563,28 @@ proc matchImpl*(text, expLit, body: NimNode): NimNode =
   if not (expLit.kind == nnkCallStrLit and $expLit[0] == "rex"):
     error "not a regex literal; only rex\"regex\" is allowed", expLit
   let exp = expLit[1]
-  defVars smA, smB, c, capts, iPrev, cPrev, captx, matched
-  let c2 = quote do: int32(`c`)
+  defVars smA, smB, capts, capt, matched, smL
   let regex = reCt(exp.strVal)
-  let submatchCall = submatch(
-    smA, smB, c2, capts, iPrev, cPrev, captx, matched, regex)
-  let submatchEoeCall = submatchEoe(
-    smA, smB, capts, iPrev, cPrev, captx, matched, regex)
-  let nfaLenLit = newLit regex.nfa.len
-  let nfaGroupsLen = regex.groupsCount
+  let startLit = newLit 0
+  let flags: set[MatchFlag] = {}
+  let matchImplStmt = matchImpl(
+    smA, smB, capts, capt, matched,
+    text, startLit, regex.nfa, look(smL), flags)
+  let nfaLenLit = newLit regex.nfa.s.len
+  let nfaGroupsLen = int(regex.groupsCount)
   result = quote do:
     block:
       var
-        `smA`, `smB`: Submatches
-        `c` = Rune(-1)
-        `cPrev` = -1'i32
-        `iPrev` = 0
-        `capts` {.used.}: Capts
-        `captx` {.used.}: int32
-        `matched` {.used.}: bool
-        i = 0
-      `smA` = newSubmatches `nfaLenLit`
-      `smB` = newSubmatches `nfaLenLit`
-      add(`smA`, (0'i16, -1'i32, 0 .. -1))
-      while i < len(`text`):
-        fastRuneAt(`text`, i, `c`, true)
-        `submatchCall`
-        if `smA`.len == 0:
-          break
-        `iPrev` = i
-        `cPrev` = `c2`
-      `submatchEoeCall`
-      if `smA`.len > 0:
+        `smA` = newSubmatches `nfaLenLit`
+        `smB` = newSubmatches `nfaLenLit`
+        `capts`: Capts
+        `capt` = -1'i32
+        `matched` = false
+        `smL` {.used.}: SmLookaround
+      `matchImplStmt`
+      if `matched`:
         var matches {.used, inject.}: seq[string]
         when `nfaGroupsLen` > 0:
           constructSubmatches2(
-            matches, `text`, `capts`, `smA`[0].ci, `nfaGroupsLen`)
+            matches, `text`, `capts`, `capt`, `nfaGroupsLen`)
         `body`
diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim
index db779c0..54b6f2c 100644
--- a/src/regex/nfamatch.nim
+++ b/src/regex/nfamatch.nim
@@ -8,51 +8,91 @@
 import std/unicode
 import std/tables
 
+import ./common
 import ./nodematch
-import ./nodetype
+import ./types
 import ./nfatype
 
-template findMatchBailOut: untyped {.dirty.} =
-  if nfa[n].kind == reEoe:
-    if not smB.hasState n:
-      smB.add (n, capt, bounds)
-    # first Eoe in SmA wins, it's pointless to
-    # keep matching further than the last Eoe
-    break
+type
+  AheadSig = proc (
+    smA, smB: var Submatches,
+    capts: var Capts,
+    captIdx: var int32,
+    text: string,
+    nfa: Nfa,
+    look: var Lookaround,
+    start: int,
+    flags: set[MatchFlag]
+  ): bool {.noSideEffect, raises: [].}
+  BehindSig = proc (
+    smA, smB: var Submatches,
+    capts: var Capts,
+    captIdx: var int32,
+    text: string,
+    nfa: Nfa,
+    look: var Lookaround,
+    start, limit: int,
+    flags: set[MatchFlag]
+  ): int {.noSideEffect, raises: [].}
+  Lookaround* = object
+    ahead*: AheadSig
+    behind*: BehindSig
+    smL*: SmLookaround
 
-template notEoe: untyped {.dirty.} =
-  when mfFindMatch in flags:
-    nfa[nt].kind != reEoe
+template lookAroundTpl*: untyped {.dirty.} =
+  template smL: untyped = look.smL
+  template smLa: untyped = smL.lastA
+  template smLb: untyped = smL.lastB
+  template zNfa: untyped = z.subExp.nfa
+  let flags2 = if z.subExp.reverseCapts:
+    {mfAnchored, mfReverseCapts}
   else:
-    true
+    {mfAnchored}
+  smL.grow()
+  smL.last.setLen zNfa.s.len
+  matched = case z.kind
+  of reLookahead:
+    look.ahead(
+      smLa, smLb, capts, captx,
+      text, zNfa, look, i, flags2)
+  of reNotLookahead:
+    not look.ahead(
+      smLa, smLb, capts, captx,
+      text, zNfa, look, i, flags2)
+  of reLookbehind:
+    look.behind(
+      smLa, smLb, capts, captx,
+      text, zNfa, look, i, 0, flags2) != -1
+  of reNotLookbehind:
+    look.behind(
+      smLa, smLb, capts, captx,
+      text, zNfa, look, i, 0, flags2) == -1
+  else:
+    doAssert false
+    false
+  smL.removeLast()
 
-func submatch(
-  smA, smB: var Submatches,
-  capts: var Capts,
-  regex: Regex,
-  i: int,
-  cPrev, c, c2: int32,
-  flags: static MatchFlags,
-) {.inline.} =
-  template t: untyped = regex.transitions
-  template nfa: untyped = regex.nfa
+template nextStateTpl(bwMatch = false): untyped {.dirty.} =
+  template bounds2: untyped =
+    when bwMatch: i .. bounds.b else: bounds.a .. i-1
   smB.clear()
-  var captx: int32
-  var matched = true
-  for n, capt, bounds in smA.items:
-    when mfFindMatch in flags:
-      findMatchBailOut()
-    for nti, nt in nfa[n].next.pairs:
+  for n, capt, bounds in items smA:
+    if anchored and nfa.s[n].kind == reEoe:
+      if not smB.hasState n:
+        smB.add (n, capt, bounds)
+      break
+    for nti, nt in pairs nfa.s[n].next:
       if smB.hasState nt:
         continue
-      if notEoe() and not match(nfa[nt], c2.Rune):
-        continue
-      if t.allZ[n][nti] == -1'i16:
-        smB.add (nt, capt, bounds.a .. i-1)
+      if not match(nfa.s[nt], c):
+        if not (anchored and nfa.s[nt].kind == reEoe):
+          continue
+      if nfa.t.allZ[n][nti] == -1'i16:
+        smB.add (nt, capt, bounds2)
         continue
       matched = true
       captx = capt
-      for z in t.z[t.allZ[n][nti]]:
+      for z in nfa.t.z[nfa.t.allZ[n][nti]]:
         if not matched:
           break
         case z.kind
@@ -62,83 +102,154 @@ func submatch(
             bound: i,
             idx: z.idx)
           captx = (capts.len-1).int32
-        of assertionKind:
-          matched = match(z, cPrev.Rune, c.Rune)
+        of assertionKind - lookaroundKind:
+          when bwMatch:
+            matched = match(z, c, cPrev.Rune)
+          else:
+            matched = match(z, cPrev.Rune, c)
+        of lookaroundKind:
+          lookAroundTpl()
         else:
-          assert false
+          doAssert false
           discard
       if matched:
-        smB.add (nt, captx, bounds.a .. i-1)
+        smB.add (nt, captx, bounds2)
   swap smA, smB
 
-template shortestMatch: untyped {.dirty.} =
-  submatch(smA, smB, capts, regex, iPrev, cPrev, c.int32, -1'i32, flags)
+func matchImpl(
+  smA, smB: var Submatches,
+  capts: var Capts,
+  captIdx: var int32,
+  text: string,
+  nfa: Nfa,
+  look: var Lookaround,
+  start = 0,
+  flags: set[MatchFlag] = {}
+): bool =
+  var
+    c = Rune(-1)
+    cPrev = -1'i32
+    i = start
+    iNext = start
+    captx = -1'i32
+    matched = false
+    anchored = mfAnchored in flags
+  if start-1 in 0 .. text.len-1:
+    cPrev = bwRuneAt(text, start-1).int32
+  smA.clear()
+  smA.add (0'i16, captIdx, i .. i-1)
+  while i < text.len:
+    fastRuneAt(text, iNext, c, true)
+    nextStateTpl()
+    if smA.len == 0:
+      return false
+    if anchored and nfa.s[smA[0].ni].kind == reEoe:
+      break
+    i = iNext
+    cPrev = c.int32
+  c = Rune(-1)
+  nextStateTpl()
   if smA.len > 0:
-    return true
-  swap smA, smB
+    if mfReverseCapts in flags:
+      captIdx = reverse(capts, smA[0].ci, captIdx)
+    else:
+      captIdx = smA[0].ci
+  return smA.len > 0
 
-template findMatch: untyped {.dirty.} =
-  when mfFindMatchOpt in flags:
+func reversedMatchImpl(
+  smA, smB: var Submatches,
+  capts: var Capts,
+  captIdx: var int32,
+  text: string,
+  nfa: Nfa,
+  look: var Lookaround,
+  start: int,
+  limit = 0,
+  flags: set[MatchFlag] = {}
+): int =
+  #doAssert start < len(text)
+  doAssert start >= limit
+  var
+    c = Rune(-1)
+    cPrev = -1'i32
+    i = start
+    iNext = start
+    captx: int32
+    matched = false
+    anchored = true
+  if start in 0 .. text.len-1:
+    cPrev = text.runeAt(start).int32
+  smA.clear()
+  smA.add (0'i16, captIdx, i .. i-1)
+  while iNext > limit:
+    bwFastRuneAt(text, iNext, c)
+    nextStateTpl(bwMatch = true)
     if smA.len == 0:
-      # XXX needed on exit too
-      m.boundaries = i .. i-1
-      return false
-  smA.add((0'i16, -1'i32, i .. i-1))
-  if regex.nfa[smA[0][0]].kind == reEoe:
-    constructSubmatches(m.captures, capts, smA[0][1], regex.groupsCount)
-    if regex.namedGroups.len > 0:
-      m.namedGroups = regex.namedGroups
-    m.boundaries = smA[0].bounds
-    return true
+      return -1
+    if nfa.s[smA[0].ni].kind == reEoe:
+      break
+    i = iNext
+    cPrev = c.int32
+  c = Rune(-1)
+  if iNext > 0:
+    bwFastRuneAt(text, iNext, c)
+  nextStateTpl(bwMatch = true)
+  for n, capt, bounds in items smA:
+    if nfa.s[n].kind == reEoe:
+      if mfReverseCapts in flags:
+        captIdx = reverse(capts, capt, captIdx)
+      else:
+        captIdx = capt
+      return bounds.a
+  return -1
 
-func bwRuneAt(s: string, n: int): Rune =
-  ## Take rune ending at ``n``
-  doAssert n >= 0
-  doAssert n <= s.len-1
-  var n = n
-  while n > 0 and s[n].ord shr 6 == 0b10:
-    dec n
-  fastRuneAt(s, n, result, false)
+func reversedMatchImpl*(
+  smA, smB: var Submatches,
+  text: string,
+  nfa: Nfa,
+  look: var Lookaround,
+  start, limit: int
+): int =
+  var capts: Capts
+  var captIdx = -1'i32
+  reversedMatchImpl(
+    smA, smB, capts, captIdx, text, nfa, look, start, limit)
+
+template initLook*: Lookaround =
+  Lookaround(
+    ahead: matchImpl,
+    behind: reversedMatchImpl)
 
 func matchImpl*(
   text: string,
   regex: Regex,
   m: var RegexMatch,
-  flags: static MatchFlags,
   start = 0
-): bool {.inline.} =
+): bool =
   m.clear()
   var
-    smA, smB: Submatches
+    smA = newSubmatches(regex.nfa.s.len)
+    smB = newSubmatches(regex.nfa.s.len)
     capts: Capts
-    c = Rune(-1)
-    cPrev = -1'i32
-    i = start
-    iPrev = start
-  smA = newSubmatches(regex.nfa.len)
-  smB = newSubmatches(regex.nfa.len)
-  smA.add (0'i16, -1'i32, start .. start-1)
-  when mfFindMatch in flags:
-    if 0 <= start-1 and start-1 <= len(text)-1:
-      cPrev = bwRuneAt(text, start-1).int32
-  while i < len(text):
-    fastRuneAt(text, i, c, true)
-    when mfShortestMatch in flags:
-      shortestMatch()
-    submatch(smA, smB, capts, regex, iPrev, cPrev, c.int32, c.int32, flags)
-    when mfFindMatch in flags:
-      findMatch()
-    if smA.len == 0:
-      return false
-    iPrev = i
-    cPrev = c.int32
-  submatch(smA, smB, capts, regex, iPrev, cPrev, -1'i32, -1'i32, flags)
-  if smA.len == 0:
-    when mfFindMatchOpt in flags:
-      m.boundaries = i .. i-1
-    return false
-  constructSubmatches(m.captures, capts, smA[0].ci, regex.groupsCount)
-  if regex.namedGroups.len > 0:
-    m.namedGroups = regex.namedGroups
-  m.boundaries = smA[0].bounds
-  return true
+    capt = -1'i32
+    look = initLook()
+  result = matchImpl(
+    smA, smB, capts, capt, text, regex.nfa, look, start)
+  if result:
+    constructSubmatches(
+      m.captures, capts, capt, regex.groupsCount)
+    if regex.namedGroups.len > 0:
+      m.namedGroups = regex.namedGroups
+    m.boundaries = smA[0].bounds
+
+func startsWithImpl*(text: string, regex: Regex, start: int): bool =
+  # XXX optimize mfShortestMatch, mfNoCaptures
+  template flags: untyped = {mfAnchored, mfShortestMatch, mfNoCaptures}
+  var
+    smA = newSubmatches(regex.nfa.s.len)
+    smB = newSubmatches(regex.nfa.s.len)
+    capts: Capts
+    capt = -1'i32
+    look = initLook()
+  result = matchImpl(
+    smA, smB, capts, capt, text, regex.nfa, look, start, flags)
diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim
index f9f52d5..2d9cb26 100644
--- a/src/regex/nfatype.nim
+++ b/src/regex/nfatype.nim
@@ -4,7 +4,7 @@ import std/tables
 import std/sets
 import std/algorithm
 
-import ./nfa
+import ./types
 import ./litopt
 
 type
@@ -41,13 +41,24 @@ func constructSubmatches*(
   for c in captures.mitems:
     c.reverse()
 
+func reverse*(capts: var Capts, a, b: int32): int32 =
+  ## reverse capture indices from a to b; return head
+  doAssert a >= b
+  var capt = a
+  var parent = b
+  while capt != b:
+    let p = capts[capt].parent
+    capts[capt].parent = parent
+    parent = capt
+    capt = p
+  return parent
+
 type
   RegexLit* = distinct string
     ## raw regex literal string
   Regex* = object
     ## a compiled regular expression
     nfa*: Nfa
-    transitions*: Transitions
     groupsCount*: int16
     namedGroups*: OrderedTable[string, int16]
     #flags*: set[RegexFlag]
@@ -57,6 +68,9 @@ type
     mfNoCaptures
     mfFindMatch
     mfFindMatchOpt
+    mfAnchored
+    mfBwMatch
+    mfReverseCapts
   MatchFlags* = set[MatchFlag]
   RegexMatch* = object
     ## result from matching operations
@@ -128,5 +142,46 @@ iterator items*(sm: Submatches): PState {.inline.} =
 func cap*(sm: Submatches): int {.inline.} =
   sm.ss.len
 
+func setLen*(sm: var Submatches, size: int) {.inline.} =
+  sm.ss.setLen size
+
 when defined(release):
   {.pop.}
+
+# XXX maybe store the lookaround number + count, and use a fixed
+#     size seq to reduce allocations
+type
+  SmLookaroundItem* = object
+    a, b: Submatches
+  SmLookaround* = object
+    s: seq[SmLookaroundItem]
+    i: int
+
+func setLen*(item: var SmLookaroundItem, size: int) {.inline.} =
+  if item.a == nil:
+    doAssert item.b == nil
+    item.a = newSubmatches size
+    item.b = newSubmatches size
+  else:
+    doAssert item.b != nil
+    item.a.setLen size
+    item.b.setLen size
+
+template last*(sm: var SmLookaround): untyped =
+  sm.s[sm.i-1]
+
+template lastA*(sm: var SmLookaround): untyped =
+  last(sm).a
+
+template lastB*(sm: var SmLookaround): untyped =
+  last(sm).b
+
+func grow*(sm: var SmLookaround) {.inline.} =
+  doAssert sm.i <= sm.s.len
+  if sm.i == sm.s.len:
+    sm.s.setLen(max(1, sm.s.len) * 2)
+  sm.i += 1
+
+func removeLast*(sm: var SmLookaround) {.inline.} =
+  doAssert sm.i > 0
+  sm.i -= 1
diff --git a/src/regex/nodematch.nim b/src/regex/nodematch.nim
index 94baac0..2bda7d8 100644
--- a/src/regex/nodematch.nim
+++ b/src/regex/nodematch.nim
@@ -2,15 +2,17 @@ import std/unicode
 import std/sets
 
 import pkg/unicodedb/properties
-import pkg/unicodedb/types
-import pkg/unicodeplus
+import pkg/unicodedb/types as utypes
 
-import ./nodetype
+import ./types
 import ./common
 
-func isWord*(r: Rune): bool {.inline.} =
+func isWord(r: Rune): bool {.inline.} =
   utmWord in unicodeTypes(r)
 
+func isDecimal(r: Rune): bool {.inline.} =
+  utmDecimal in unicodeTypes(r)
+
 func isWordAscii(r: Rune): bool {.inline.} =
   ## return ``true`` if the given
   ## rune is in ``[A-Za-z0-9]`` range
@@ -60,14 +62,6 @@ func match*(n: Node, r: Rune, nxt: Rune): bool {.inline.} =
     isWordBoundaryAscii(r, nxt)
   of reNotWordBoundaryAscii:
     not isWordBoundaryAscii(r, nxt)
-  of reLookahead:
-    n.cp == nxt
-  of reNotLookahead:
-    n.cp != nxt
-  of reLookbehind:
-    n.cp == r
-  of reNotLookbehind:
-    n.cp != r
   else:
     assert false
     false
diff --git a/src/regex/parser.nim b/src/regex/parser.nim
index b1d0d95..3cee43d 100644
--- a/src/regex/parser.nim
+++ b/src/regex/parser.nim
@@ -5,7 +5,8 @@ import std/parseutils
 
 import pkg/unicodedb/properties
 
-import ./nodetype
+import ./exptype
+import ./types
 import ./common
 import ./scanner
 
@@ -122,10 +123,9 @@ func parseUnicodeLit(sc: Scanner[Rune], size: int): Node =
       ("Invalid unicode literal. " &
        "Expected $# hex digits, but found $#") %% [$size, $i])
     prettyCheck(
-      sc.curr.int in {
-        '0'.ord .. '9'.ord,
-        'a'.ord .. 'z'.ord,
-        'A'.ord .. 'Z'.ord},
+      sc.curr.int in '0'.ord .. '9'.ord or
+      sc.curr.int in 'a'.ord .. 'z'.ord or
+      sc.curr.int in 'A'.ord .. 'Z'.ord,
       ("Invalid unicode literal. " &
        "Expected hex digit, but found $#") %% $sc.curr)
     rawCP[i] = sc.next().int.char
@@ -427,31 +427,52 @@ func parseSet(sc: Scanner[Rune]): Node =
     hasEnd,
     "Invalid set. Missing `]`")
 
+func noRepeatCheck(sc: Scanner[Rune]) =
+  ## Check next symbol is not a repetition
+  let startPos = sc.pos
+  let hasDoubleQ = sc.peek == '?'.Rune and sc.peek(1) == '?'.Rune
+  prettyCheck(
+    sc.peek notin ['*'.Rune, '+'.Rune] and not hasDoubleQ,
+    "Invalid repetition. There's nothing to repeat")
+
 func parseRepRange(sc: Scanner[Rune]): Node =
   ## parse a repetition range ``{n,m}``
+  # This is not PCRE compatible. PCRE allows
+  # {,} and {,1} to be parsed as chars instead of a
+  # repetition range, we raise an error instead.
+  # Range limit can be defined with the `reRepRangeLimit` param.
+  if sc.peek.int != ','.ord and
+      sc.peek.int notin '0'.ord .. '9'.ord:
+    return Node(kind: reChar, cp: '{'.Rune)
   let startPos = sc.pos
   var
     first, last: string
     hasFirst = false
     curr = ""
   for cp in sc:
-    if cp == "}".toRune:
+    if cp == '}'.Rune:
       last = curr
       break
-    if cp == ",".toRune:
+    if cp == ','.Rune:
       first = curr
       curr = ""
+      prettyCheck(
+        not hasFirst, "Invalid repetition range. Expected {n,m}")
       hasFirst = true
       continue
     prettyCheck(
       cp.int in '0'.ord .. '9'.ord,
       "Invalid repetition range. Range can only contain digits")
-    curr.add(char(cp.int))
+    curr.add char(cp.int)
+  prettyCheck(
+    sc.prev == '}'.Rune,
+    "Invalid repetition range. Missing closing symbol `}`")
   if not hasFirst:  # {n}
     first = curr
-  if first.len == 0:  # {,m} or {,}
-    first.add('0')
-  if last.len == 0:  # {n,} or {,}
+  prettyCheck(
+    first.len > 0,
+    "Invalid repetition range. Expected {n}, {n,m}, or {n,}")
+  if last.len == 0:  # {n,}
     last = "-1"
   var
     firstNum: int
@@ -473,15 +494,17 @@ func parseRepRange(sc: Scanner[Rune]): Node =
     "Invalid repetition range. Max value is $#" %% $int16.high)
   # for perf reasons. This becomes a?a?a?...
   # too many parallel states
+  const reRepRangeLimit {.intdefine.} = 100
   prettyCheck(
-    not (lastNum - firstNum > 100),
+    not (lastNum - firstNum > reRepRangeLimit),
     ("Invalid repetition range. " &
-     "Expected 100 repetitions or less, " &
-     "but found: $#") %% $(lastNum - firstNum))
+     "Expected $# repetitions or less, " &
+     "but found: $#") %% [$reRepRangeLimit, $(lastNum - firstNum)])
   result = Node(
     kind: reRepRange,
     min: firstNum.int16,
     max: lastNum.int16)
+  noRepeatCheck sc
 
 func toFlag(r: Rune): Flag =
   result = case r
@@ -523,11 +546,6 @@ func toNegFlag(r: Rune): Flag =
       ("Invalid group flag, found -$# " &
        "but expected one of: -i, -m, -s, -U or -u") %% $r)
 
-template checkEmptyGroup() {.dirty.} =
-  prettyCheck(
-    peek(sc) != toRune(")"),
-    "Invalid group. Empty group is not allowed")
-
 func parseGroupTag(sc: Scanner[Rune]): Node =
   ## parse a special group (name, flags, non-captures).
   ## Return a regular ``reGroupStart``
@@ -535,14 +553,12 @@ func parseGroupTag(sc: Scanner[Rune]): Node =
   # A regular group
   let startPos = sc.pos
   if sc.peek != "?".toRune:
-    checkEmptyGroup()
     result = initGroupStart()
     return
   discard sc.next()  # Consume "?"
   case sc.peek
   of ":".toRune:
     discard sc.next()
-    checkEmptyGroup()
     result = initGroupStart(isCapturing = false)
   of "P".toRune:
     discard sc.next()
@@ -570,7 +586,6 @@ func parseGroupTag(sc: Scanner[Rune]): Node =
     prettyCheck(
       sc.prev == ">".toRune,
       "Invalid group name. Missing `>`")
-    checkEmptyGroup()
     result = initGroupStart(name)
   of "i".toRune,
       "m".toRune,
@@ -583,36 +598,36 @@ func parseGroupTag(sc: Scanner[Rune]): Node =
       flags: seq[Flag] = @[]
       isNegated = false
     for cp in sc:
-      if cp == ":".toRune:
-        checkEmptyGroup()
+      if cp == ":".toRune or cp == ")".toRune:
         break
       if cp == "-".toRune:
         isNegated = true
         continue
       if isNegated:
-        flags.add(toNegFlag(cp))
+        flags.add toNegFlag(cp)
       else:
-        flags.add(toFlag(cp))
-      if sc.peek == ")".toRune:
-        break
-    result = initGroupStart(
-      flags = flags,
-      isCapturing = false)
+        flags.add toFlag(cp)
+    result = if sc.prev == ")".toRune:
+      Node(kind: reFlags, flags: flags)
+    else:
+      initGroupStart(
+        flags = flags,
+        isCapturing = false)
   #reLookahead,
   #reLookbehind,
-  of "=".toRune, "<".toRune, "!".toRune:
+  of '='.Rune, '<'.Rune, '!'.Rune:
     var lookAroundKind: NodeKind
     case sc.peek
-    of "=".toRune:
+    of '='.Rune:
       lookAroundKind = reLookahead
-    of "!".toRune:
+    of '!'.Rune:
       lookAroundKind = reNotLookahead
-    of "<".toRune:
+    of '<'.Rune:
       discard sc.next()
       case sc.peek:
-      of "=".toRune:
+      of '='.Rune:
         lookAroundKind = reLookbehind
-      of "!".toRune:
+      of '!'.Rune:
         lookAroundKind = reNotLookbehind
       else:
         prettyCheck(
@@ -620,30 +635,12 @@ func parseGroupTag(sc: Scanner[Rune]): Node =
           "Invalid lookabehind, expected `<=` or `<!` symbol")
     else:
       doAssert false
-    doAssert sc.peek in ["=".toRune, "!".toRune]
+    doAssert sc.peek in ['='.Rune, '!'.Rune]
     discard sc.next
-    # todo: support sets and more
-    case sc.peek
-    of "\\".toRune:
-      let n = parseEscapedSeq(sc)
-      prettyCheck(
-        n.kind == reChar,
-        "Invalid lookaround. A " &
-        "character was expected, but " &
-        "found a special symbol")
-      result = Node(kind: lookAroundKind, cp: n.cp)
-    else:
-      prettyCheck(
-        not sc.finished,
-        "Invalid lookaround. A character " &
-        "was expected, but found nothing (end of string)")
-      result = Node(kind: lookAroundKind, cp: sc.next())
     prettyCheck(
-      sc.peek == ")".toRune,
-      "Invalid lookaround, expected closing symbol. " &
-      "Beware lookaround is currently limited to " &
-      "match one single character")
-    discard sc.next()
+      sc.peek != ')'.Rune,
+      "Empty lookaround is not allowed")
+    result = Node(kind: lookAroundKind)
   else:
     prettyCheck(
       false,
@@ -663,10 +660,13 @@ func subParse(sc: Scanner[Rune]): Node =
   of "|".toRune:
     Node(kind: reOr, cp: r)
   of "*".toRune:
+    noRepeatCheck sc
     Node(kind: reZeroOrMore, cp: r)
   of "+".toRune:
+    noRepeatCheck sc
     Node(kind: reOneOrMore, cp: r)
   of "?".toRune:
+    noRepeatCheck sc
     Node(kind: reZeroOrOne, cp: r)
   of ")".toRune:
     Node(kind: reGroupEnd, cp: r)
@@ -709,37 +709,45 @@ func verbosity(
   case n.kind:
   of reGroupStart:
     if vb.len > 0:
-      vb.add(vb[vb.high])
+      vb.add vb[vb.len-1]
     else:
-      vb.add(false)
+      vb.add false
     for f in n.flags:
       case f:
       of flagVerbose:
-        vb[vb.high] = true
+        vb[vb.len-1] = true
       of flagNotVerbose:
-        vb[vb.high] = false
+        vb[vb.len-1] = false
       else:
         discard
-    if sc.peek == ")".toRune:  # (?flags)
-      if vb.len > 1:  # set outter group
-        vb[vb.high - 1] = vb[vb.high]
-      else:
-        vb.add(vb[vb.high])
   of reGroupEnd:
     if vb.len > 0:
       discard vb.pop()
     # else: unbalanced parentheses,
     # it'll raise later
+  of reFlags:
+    if vb.len == 0:
+      vb.add false
+    # else set outter group
+    for f in n.flags:
+      case f:
+      of flagVerbose:
+        vb[vb.len-1] = true
+      of flagNotVerbose:
+        vb[vb.len-1] = false
+      else:
+        discard
   else:
     discard
 
-func parse*(expression: string): seq[Node] =
+func parse*(expression: string): Exp =
   ## convert a ``string`` regex expression
   ## into a ``Node`` expression
-  result = newSeqOfCap[Node](expression.len)
+  result.s = newSeq[Node](expression.len)
+  result.s.setLen 0
   var vb = newSeq[bool]()
   let sc = expression.scan()
   for _ in sc:
     if sc.skipWhiteSpace(vb): continue
-    result.add(sc.subParse())
-    vb.verbosity(sc, result[^1])
+    result.s.add sc.subParse()
+    vb.verbosity(sc, result.s[^1])
diff --git a/src/regex/scanner.nim b/src/regex/scanner.nim
index 1bd7d82..c33183b 100644
--- a/src/regex/scanner.nim
+++ b/src/regex/scanner.nim
@@ -1,6 +1,6 @@
 import std/unicode
 
-import ./nodetype
+import ./types
 import ./common
 
 type
@@ -68,6 +68,12 @@ iterator peek*[T](sc: Scanner[T]): (T, T) =
   for s in sc:
     yield (s, sc.peek)
 
+func peek*(sc: Scanner[Rune], n: int): Rune =
+  if sc.pos+n > sc.s.len-1:
+    invalidRune
+  else:
+    sc.s[sc.pos+n]
+
 func find*(sc: Scanner[Rune], r: Rune): int =
   ## return number of consumed chars.
   ## The scanner's position is not moved.
diff --git a/src/regex/nodetype.nim b/src/regex/types.nim
similarity index 69%
rename from src/regex/nodetype.nim
rename to src/regex/types.nim
index 0c0448d..569a1dd 100644
--- a/src/regex/nodetype.nim
+++ b/src/regex/types.nim
@@ -5,12 +5,39 @@ when NimMajor >= 1:
 import std/unicode
 import std/sets
 from std/algorithm import sorted
+from std/sequtils import toSeq
 
 import pkg/unicodedb/properties
 
 import ./common
 
+# XXX split nfatype.nim and nodetype.nim
+#     once acyclic imports are supported
+# XXX refactor transitions, add tIdx: int16
+#     to Node, make TransitionsAll dense;
+#     remove z and store transition Nodes in
+#     the NFA; flatten TransitionsAll to seq[int16]
+#     + delimiter (-1'i16) or set first bit of
+#     every last tn idx
+
 type
+  # exptype.nim
+  RpnExp* = object
+    s*: seq[Node]
+
+  # nfatype.nim
+  Enfa* = object
+    s*: seq[Node]
+  TransitionsAll* = seq[seq[int16]]
+  ZclosureStates* = seq[seq[Node]]
+  Transitions* = object
+    allZ*: TransitionsAll
+    z*: ZclosureStates
+  Nfa* = object
+    s*: seq[Node]
+    t*: Transitions
+
+  # nodetype.nim
   Flag* = enum
     flagCaseInsensitive,  # i
     flagNotCaseInsensitive,  # -i
@@ -30,6 +57,7 @@ type
     reJoiner,  # ~
     reGroupStart,  # (
     reGroupEnd,  # )
+    reFlags,  # (?flags)
     reOr,  # |
     reZeroOrMore,  # *
     reOneOrMore,  # +
@@ -91,6 +119,13 @@ type
     shorthands*: seq[Node]
     # reUCC, reNotUCC
     cc*: UnicodeCategorySet
+    # reLookahead, reLookbehind,
+    # reNotLookahead, reNotLookbehind
+    subExp*: SubExp
+  SubExp* = object
+    nfa*: Nfa
+    rpn*: RpnExp
+    reverseCapts*: bool
 
 func toCharNode*(r: Rune): Node =
   ## return a ``Node`` that is meant to be matched
@@ -142,13 +177,17 @@ func initGroupStart*(
     flags: flags,
     isCapturing: isCapturing)
 
+func initSkipNode*(): Node =
+  result = Node(
+    kind: reSkip,
+    cp: "#".toRune)
+
 func initSkipNode*(next: openArray[int16]): Node =
   ## Return a dummy node that should be skipped
   ## while traversing the NFA
   result = Node(
     kind: reSkip,
-    cp: "#".toRune)
-  result.next.add next
+    cp: "#".toRune, next: toSeq(next))
 
 func isEmpty*(n: Node): bool =
   ## check if a set ``Node`` is empty
@@ -181,6 +220,17 @@ const
     reLookbehind,
     reNotLookahead,
     reNotLookbehind}
+  lookaroundKind* = {
+    reLookahead,
+    reLookbehind,
+    reNotLookahead,
+    reNotLookbehind}
+  lookaheadKind* = {
+    reLookahead,
+    reNotLookahead}
+  lookbehindKind* = {
+    reLookbehind,
+    reNotLookbehind}
   shorthandKind* = {
     reWord,
     reDigit,
@@ -226,39 +276,44 @@ const
   groupKind* = {
     reGroupStart,
     reGroupEnd}
+  groupStartKind* = {reGroupStart} + lookaroundKind
 
-func toString*(n: Node): string =
+func `$`*(n: Node): string =
   ## return the string representation
   ## of a `Node`. The string is always
   ## equivalent to the original
   ## expression but not necessarily equal
-  # Note this is not complete. Just
-  # what's needed for debugging so far
   case n.kind
-  of reZeroOrMore,
-      reOneOrMore,
-      reZeroOrOne:
-    if not n.isGreedy:
-      n.cp.toUTF8 & "?"
-    else:
-      n.cp.toUTF8
-  of reRepRange:
-    "#"  # Invalid
-  of reStart:
-    "\\A"
-  of reEnd:
-    "\\z"
-  of reWordBoundary:
-    "\\b"
-  of reNotWordBoundary:
-    "\\B"
-  of shorthandKind:
-    '\\' & n.cp.toUTF8
+  of reChar, reCharCi: $n.cp
+  of reJoiner: "~"
+  of reGroupStart: "("
+  of reGroupEnd: ")"
+  of reFlags: "(?flags)"
+  of reOr: "|"
+  of reZeroOrMore: "*" & (if n.isGreedy: "" else: "?")
+  of reOneOrMore: "+" & (if n.isGreedy: "" else: "?")
+  of reZeroOrOne: "?" & (if n.isGreedy: "" else: "?")
+  of reRepRange: "{" & $n.min & "," & $n.max & "}"
+  of reStartSym, reStartSymML: "^"
+  of reEndSym, reEndSymML: "$"
+  of reStart: r"\A"
+  of reEnd: r"\z"
+  of reWordBoundary, reWordBoundaryAscii: r"\b"
+  of reNotWordBoundary, reNotWordBoundaryAscii: r"\B"
+  of reWord, reWordAscii: r"\w"
+  of reDigit, reDigitAscii: r"\d"
+  of reWhiteSpace, reWhiteSpaceAscii: r"\s"
+  of reUCC: r"\pN"
+  of reNotAlphaNum, reNotAlphaNumAscii: r"\W"
+  of reNotDigit, reNotDigitAscii: r"\D"
+  of reNotWhiteSpace, reNotWhiteSpaceAscii: r"\S"
+  of reNotUCC: r"\PN"
+  of reAny, reAnyNl, reAnyAscii, reAnyNlAscii: "."
   of reInSet, reNotSet:
     var str = ""
-    str.add('[')
+    str.add '['
     if n.kind == reNotSet:
-      str.add('^')
+      str.add '^'
     var
       cps = newSeq[Rune](n.cps.len)
       i = 0
@@ -266,21 +321,21 @@ func toString*(n: Node): string =
       cps[i] = cp
       inc i
     for cp in cps.sorted(cmp):
-      str.add(cp.toUTF8)
+      str.add $cp
     for sl in n.ranges:
-      str.add(sl.a.toUTF8 & '-' & sl.b.toUTF8)
+      str.add($sl.a & '-' & $sl.b)
     for nn in n.shorthands:
-      str.add(nn.toString)
-    str.add(']')
+      str.add $nn
+    str.add ']'
     str
-  of reSkip:
-    "SKIP"
-  of reEOE:
-    "EOE"
-  else:
-    n.cp.toUTF8
+  of reLookahead: "(?=...)"
+  of reLookbehind: "(?<=...)"
+  of reNotLookahead: "(?!...)"
+  of reNotLookbehind: "(?<!...)"
+  of reSkip: r"{skip}"
+  of reEoe: r"{eoe}"
 
 func toString*(n: seq[Node]): string =
   result = newStringOfCap(n.len)
   for nn in n:
-    result.add(nn.toString)
+    result.add $nn
diff --git a/tests/tests.nim b/tests/tests.nim
index 63720ca..15ee3f1 100644
--- a/tests/tests.nim
+++ b/tests/tests.nim
@@ -16,8 +16,8 @@ template test(desc: string, body: untyped): untyped =
         echo "[CT/RT] " & desc
       body)()
 
-template check(conditions: bool) =
-  doAssert(conditions)
+template check(condition: bool) =
+  doAssert(condition)
 
 template expect(exception: typedesc, body: untyped): untyped =
   doAssertRaises(exception):
@@ -74,21 +74,20 @@ func findAllCapt(s: string, reg: Regex): seq[seq[seq[Slice[int]]]] =
 
 when (NimMajor, NimMinor) >= (1, 1):
   template matchMacro(s, r: untyped): untyped =
-    block:
-      var m = false
-      match s, r:
-        m = true
-      m
+    (func (): bool =
+      result = false
+      let exp = s
+      match exp, r:
+        result = true)()
 
   template matchMacroCapt(s, r: untyped): untyped =
-    block:
+    (func (): seq[string] =
       var m = false
-      var c: seq[string]
-      match s, r:
+      let exp = s
+      match exp, r:
         m = true
-        c = matches
-      check m
-      c
+        result = matches
+      check m)()
 
   test "tmatch_macro":
     block hasOwnScope:
@@ -141,6 +140,19 @@ when (NimMajor, NimMinor) >= (1, 1):
       match txt, rex"(\w)+":
         m = true
       check m
+    block:
+      (func () = 
+        var m = false
+        var txt = "abc"
+        match txt, rex"(\w)+":
+          m = true
+        check m)()
+    block:
+      var m = false
+      match "abcdefg", rex"\w+(?<=(ab)(?=(cd)))\w+":
+        check matches == @["ab", "cd"]
+        m = true
+      check m
   
   test "tmatch_macro_captures":
     check matchMacroCapt("ab", rex"(a)b") == @["a"]
@@ -180,6 +192,24 @@ when (NimMajor, NimMinor) >= (1, 1):
     check matchMacroCapt("aaaa", rex"(a*)(a*)") == @["aaaa", ""]
     check matchMacroCapt("aaaa", rex"(a*?)(a*?)") == @["", "aaaa"]
     check matchMacroCapt("aaaa", rex"(a)*(a)") == @["a", "a"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab))\w+") == @["ab"]
+    check matchMacroCapt("abcdefg", rex"\w+(?=(cd))\w+") == @["cd"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab))\w+(?<=(fg))") ==
+      @["ab", "fg"]
+    check matchMacroCapt("abcdefg", rex"\w+(?=(cd))\w+(?=(ef))\w+") ==
+      @["cd", "ef"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab))\w+(?=(ef))\w+") ==
+      @["ab", "ef"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)cd(?<=(cd)))\w+") ==
+      @["ab", "cd"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)(?<=(ab)))\w+") ==
+      @["ab", "ab"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)(?=(cd)))\w+") ==
+      @["ab", "cd"]
+    check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)(?=(cd)(?<=(cd))))\w+") ==
+      @["ab", "cd", "cd"]
+    check matchMacroCapt("abcdefg", rex"\w+(?=(cd)(?<=(cd)))\w+") ==
+      @["cd", "cd"]
 
   test "tmatch_macro_misc":
     check matchMacro("abc", rex"\w+")
@@ -198,16 +228,12 @@ when (NimMajor, NimMinor) >= (1, 1):
     check(not matchMacro("a", rex"b"))
     check(not matchMacro("a", rex""))
     check matchMacro(" \"word\" ", rex"\s"".*""\s")
-    check matchMacro("aaa", rex"a**")
     check matchMacro("aaa", rex"(a*)*")
     check matchMacro("aaabbbaaa", rex"((a*|b*))*")
-    check matchMacro("aaa", rex"a*****")
     check matchMacro("aaa", rex"(a?)*")
     check matchMacro("aaaa", rex"((a)*(a)*)*")
-    check matchMacro("aaa", rex"a**")
     check matchMacro("aaa", rex"(a*)*")
     check matchMacro("aaabbbaaa", rex"((a*|b*))*")
-    check matchMacro("aaa", rex"a*****")
     check matchMacro("aaa", rex"(a?)*")
     check matchMacro("aaaa", rex"((a)*(a)*)*")
     check matchMacro("<TAG>two</TAG>", rex"<TAG>.*?</TAG>")
@@ -245,13 +271,13 @@ when (NimMajor, NimMinor) >= (1, 1):
     check matchMacro("650-253-0001", rex"[0-9]+..*")
     check(not matchMacro("abc-253-0001", rex"[0-9]+..*"))
     check(not matchMacro("6", rex"[0-9]+..*"))
-    check matchMacro("", rex"(11)*+(111)*")
-    check matchMacro("11", rex"(11)*+(111)*")
-    check matchMacro("111", rex"(11)*+(111)*")
-    check matchMacro("11111", rex"(11)*+(111)*")
-    check matchMacro("1111111", rex"(11)*+(111)*")
-    check matchMacro("1111111111", rex"(11)*+(111)*")
-    check(not matchMacro("1", rex"(11)*+(111)*"))
+    check matchMacro("", rex"((11)*)+(111)*")
+    check matchMacro("11", rex"((11)*)+(111)*")
+    check matchMacro("111", rex"((11)*)+(111)*")
+    check matchMacro("11111", rex"((11)*)+(111)*")
+    check matchMacro("1111111", rex"((11)*)+(111)*")
+    check matchMacro("1111111111", rex"((11)*)+(111)*")
+    check(not matchMacro("1", rex"((11)*)+(111)*"))
     check(not matchMacro("", rex"(11)+(111)*"))
     check matchMacro("11", rex"(11)+(111)*")
     check(not matchMacro("111", rex"(11)+(111)*"))
@@ -275,6 +301,198 @@ when (NimMajor, NimMinor) >= (1, 1):
     check matchMacro("111111", rex"(11)*|(111)*")
     check(not matchMacro("1", rex"(11)*|(111)*"))
 
+  test "tmacro_full_lookarounds":
+    check matchMacro("ab", rex"a(?=b)\w")
+    check(not matchMacro("ab", rex"a(?=x)\w"))
+    check(not matchMacro("ab", rex"ab(?=x)"))
+    check matchMacro("ab", rex"ab(?=$)")
+    check matchMacro("abc", rex"a(?=b)\w+")
+    check matchMacroCapt("abcdefg", rex"a(?=(bc))\w+") ==
+      @["bc"]
+    check matchMacro("ab", rex"\w(?<=a)b")
+    check(not matchMacro("ab", rex"\w(?<=x)b"))
+    check(not matchMacro("ab", rex"(?<=x)ab"))
+    check matchMacro("ab", rex"(?<=^)ab")
+    check matchMacro("abc", rex"\w\w(?<=(b))c")
+    check matchMacroCapt("abc", rex"\w\w(?<=(b))c") ==
+      @["b"]
+    check matchMacro("abc", rex"\w\w(?<=(ab))c")
+    check matchMacroCapt("abc", rex"\w\w(?<=(ab))c") ==
+      @["ab"]
+    check matchMacro("a", rex"\w(?<=a)")
+    check(not matchMacro("a", rex"\w(?=a)"))
+    check matchMacro("ab", rex"\w(?<=a(?<=a))b")
+    check matchMacro("ab", rex"\w(?<=a(?<=a(?<=a)))b")
+    check matchMacro("ab", rex"\w(?<=a(?=b))b")
+    check(not matchMacro("ab", rex"\w(?=b(?=b))b"))
+    check(not matchMacro("ab", rex"\w(?<=a(?=b(?=b)))b"))
+    check matchMacro("ab", rex"\w(?<=a(?<=a)(?=b))b")
+    check matchMacro("ab", rex"\w(?<=a(?<=a(?=b)))b")
+    check(not matchMacro("ab", rex"\w(?<=(?<=a)a)b"))
+    check matchMacro("aab", rex"\w\w(?<=aa(?=b))b")
+    block:
+      check matchMacroCapt("aaab", rex"(\w*)(\w*?)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w*?)$)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w*)$)") ==
+        @["", "aaab"]
+      check matchMacroCapt("aaab", rex"(\w*?)(\w*)(\w*?)") ==
+        @["", "aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w*)(\w*?)$)") ==
+        @["", "aaab", ""]
+      check matchMacroCapt("aaab", rex"(\w*)(\w??)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w??)$)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w?)$)") ==
+        @["aaa", "b"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w?)(\w*?)$)") ==
+        @["aaa", "b", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w??)(\w*?)$)") ==
+        @["aaab", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w??)(\w*?)$)") ==
+        @["a", "aab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w?)(\w*?)$)") ==
+        @["a", "aab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w??)$)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w??)$)") ==
+        @["aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w?)$)") ==
+        @["aaa", "b"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*)(\w?)$)") ==
+        @["aaa", "b"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w*?)(\w??)$)") ==
+        @["aaab", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*?)(\w*)(\w??)$)") ==
+        @["", "aaab", ""]
+    block:
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+|(\w)\w+$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\w)\w+$)") ==
+        @["", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+|(\w)\w+|(\w)\w+$)") ==
+        @["a", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\w)\w+|(\w)\w+$)") ==
+        @["", "a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\d)\w+|(\w)\w+$)") ==
+        @["", "", "a"]
+    block:
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+)+?$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+)+?$)") ==
+        @["", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)+?$)") ==
+        @["a", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)+?$)") ==
+        @["", "a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)+?$)") ==
+        @["", "", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+)*?$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+)*?$)") ==
+        @["", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)*?$)") ==
+        @["a", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)*?$)") ==
+        @["", "a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)*?$)") ==
+        @["", "", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+)??$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+)??$)") ==
+        @["", "a"]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)??$)") ==
+        @["a", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)??$)") ==
+        @["", "a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)??$)") ==
+        @["", "", "a"]
+    block:
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+?|(\w)\w+?$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+|(\w)\w+$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+?|(\w)\w+$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+|(\w)\w+?$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w+|\w{4}|\w{4}|\w+)*$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex"(\w*|\w{4}|\w{4}|\w*)*") ==
+        @[""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*|\w{4}|\w{4}|\w*)*$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w+|\w{4}|\w{4}|\w+)+$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w)\w+|\w{4}|\w{4}|(\w)\w+$)") ==
+        @["a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\w{4})|(\w{4})|(\w)\w+$)") ==
+        @["", "aaab", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\w)\w{3}|(\w)\w{3}|(\w)\w+$)") ==
+        @["", "a", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\d)\w{3}|(\w)\w{3}|(\w)\w+$)") ==
+        @["", "", "a", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\d)\w+|(\d)\w{3}|(\d)\w{3}|(\w)\w+$)") ==
+        @["", "", "", "a"]
+      check matchMacroCapt("aaab", rex"(\w*|\w{4}|\w{4}|\w*)+") ==
+        @[""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*|\w{4}|\w{4}|\w*)+$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w+)|(\w{4})|(\w{10})$)") ==
+        @["aaab", "", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w{10})|(\w{4})|(\w+)$)") ==
+        @["", "aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^((\w{10})|(\w{4})|(\w+))+$)") ==
+        @["aaab", "", "aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^((\w{10})|(\w{4})|(\w+))*$)") ==
+        @["aaab", "", "aaab", ""]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w+|\w{4}|\w{4}|\w+)*?$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w*|\w{4}|\w{4}|\w*)*?$)") ==
+        @["aaab"]
+      check matchMacroCapt("aaab", rex".*(?<=^(\w+|\w{4}|\w{4}|\w+)+?$)") ==
+        @["aaab"]
+    check(not matchMacro("ab", rex"\w(?<=a(?=b(?<=a)))b"))
+    check(not matchMacro("ab", rex"\w(?<=a(?<=a(?=b(?<=a))))b"))
+    check matchMacro("ab", rex"\w(?<=a(?=b(?<=b)))b")
+    check matchMacro("ab", rex"\w(?<=a(?<=a(?=b(?<=b))))b")
+    block:
+      template asterisk: untyped = rex".*(?<=(?<!\\)(?:\\\\)*)\*"
+      check matchMacro(r"*", asterisk)
+      check(not matchMacro(r"\*", asterisk))
+      check matchMacro(r"\\*", asterisk)
+      check(not matchMacro(r"\\\*", asterisk))
+      check matchMacro(r"\\\\*", asterisk)
+      check(not matchMacro(r"\\\\\*", asterisk))
+    block:
+      check matchMacro(r"弢b", rex"弢(?=b)\w")
+      check matchMacro(r"a弢", rex"a(?=弢)\w")
+      check matchMacro(r"Ⓐb", rex"Ⓐ(?=b)\w")
+      check matchMacro(r"aⒶ", rex"a(?=Ⓐ)\w")
+      check matchMacro(r"Ⓐb", rex"Ⓐ(?=b)\w")
+      check matchMacro(r"aΪ", rex"a(?=Ϊ)\w")
+      check matchMacro(r"Ϊb", rex"Ϊ(?=b)\w")
+      check matchMacro(r"弢Ⓐ", rex"弢(?=Ⓐ)\w")
+    block:
+      check matchMacro(r"a弢", rex"\w(?<=a)弢")
+      check matchMacro(r"弢b", rex"\w(?<=弢)b")
+      check matchMacro(r"aⒶ", rex"\w(?<=a)Ⓐ")
+      check matchMacro(r"Ⓐb", rex"\w(?<=Ⓐ)b")
+      check matchMacro(r"aΪ", rex"\w(?<=a)Ϊ")
+      check matchMacro(r"Ϊb", rex"\w(?<=Ϊ)b")
+      check matchMacro(r"弢Ⓐ", rex"\w(?<=弢)Ⓐ")
+    block:
+      check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)cd(?<=(cd)))\w+") ==
+        @["ab", "cd"]
+      check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)(?=(cd)))\w+") ==
+        @["ab", "cd"]
+      check matchMacroCapt("abcdefg", rex"\w+(?<=(ab)(?=(cd)(?<=(cd))))\w+") ==
+        @["ab", "cd", "cd"]
+      check matchMacroCapt("abcdefg", rex"\w+(?=(cd)(?<=(cd)))\w+") ==
+        @["cd", "cd"]
+
 test "tfull_match":
   check "".isMatch(re"")
   check "a".isMatch(re"a")
@@ -294,10 +512,38 @@ test "tfull_match":
   check " \"word\" ".isMatch(re"\s"".*""\s")
 
 test "trepetition_cycle":
-  check "aaa".isMatch(re"a**")
+  check "**".isMatch(re"\**")
+  check "++".isMatch(re"\++")
+  check "??".isMatch(re"\?+")
+  check "??".isMatch(re"\?*")
+  check "?".isMatch(re"\??")
+  check "?".isMatch(re"\???")
+  check "**".isMatch(re"\**?")
+  check "++".isMatch(re"\++?")
+  check "??".isMatch(re"\?+?")
+  check "??".isMatch(re"\?*?")
+  check raises(r"a**")
+  check raises(r"a++")
+  check raises(r"a*+")
+  check raises(r"a+*")
+  check raises(r"a?+")
+  check raises(r"a?*")
+  check raises(r"a??+")
+  check raises(r"a??*")
+  check raises(r"a*??")
+  check raises(r"a+??")
+  check raises(r"a???")
+  check raises(r"a{1,}*")
+  check raises(r"a{1}*")
+  check raises(r"a{1,}+")
+  check raises(r"a{1}+")
+  check raises(r"a{1,}??")
+  check raises(r"a{1}??")
+  check(not raises(r"a{1,}?"))
+  check(not raises(r"a{1}?"))
   check "aaa".isMatch(re"(a*)*")
   check "aaabbbaaa".isMatch(re"((a*|b*))*")
-  check "aaa".isMatch(re"a*****")
+  check raises(r"a*****")
   check raises(r"a*{,}")
   check "aaa".isMatch(re"(a?)*")
   check "aaaa".isMatch(re"((a)*(a)*)*")
@@ -374,6 +620,15 @@ test "trepetition_cycle":
   check findAllBounds("@", re"(a*?)+@") == @[0 .. 0]
   check findAllBounds("@", re"(a*?)+?@") == @[0 .. 0]
 
+test "talternations":
+  check raises(r"a|?")
+  check raises(r"a|?b")
+  check raises(r"?|?")
+  check raises(r"a|*")
+  check raises(r"a|*b")
+  check raises(r"a|+")
+  check raises(r"a|+b")
+
 test "tcaptures":
   check "ab".matchWithCapt(re"(a)b") == @[@["a"]]
   check "aa".matchWithCapt(re"(a)*") == @[@["a", "a"]]
@@ -647,11 +902,9 @@ test "tnot_set":
 test "trepetition_range":
   check(not "".isMatch(re"a{0}"))
   check(not "".isMatch(re"a{0,0}"))
-  check(not "".isMatch(re"a{,0}"))
-  check "".isMatch(re"a{,2}")
+  check "".isMatch(re"a{0,2}")
   check "a".isMatch(re"a{0}")
   check "a".isMatch(re"a{0,0}")
-  check "a".isMatch(re"a{,0}")
   check "a".isMatch(re"a{1}")
   check "aa".isMatch(re"a{2}")
   check "aaa".isMatch(re"a{3}")
@@ -667,47 +920,49 @@ test "trepetition_range":
   check "aaa".isMatch(re"a{1,}")
   check "aaaaaaaaaa".isMatch(re"a{1,}")
   check "aa".isMatch(re"a{2,}")
-  check "a".isMatch(re"a{,}")
-  check "aa".isMatch(re"a{,}")
-  check "aaaaaaaaaa".isMatch(re"a{,}")
-  check "".isMatch(re"a{,}")
   check "aaaaaaaaaa".isMatch(re"a{0,}")
   check "".isMatch(re"a{0,}")
   check(not "a".isMatch(re"a{2,}"))
+  check "a{a,1}".isMatch(re"a{a,1}")
+  check(not "a".isMatch(re"a{a,1}"))
+  check(not "a1".isMatch(re"a{a,1}"))
+  check "a{".isMatch(re"a{")
+  check "a{{".isMatch(re"a{{")
+  check "a{}".isMatch(re"a{}")
   check raises(r"a*{,}")
   check raises(r"a*{0}")
   check raises(r"a*{1}")
-  check "aaa".matchWithCapt(re"(a){,}") ==
+  check "aaa".matchWithCapt(re"(a){0,}") ==
     @[@["a", "a", "a"]]
-  check "aaa".matchWithCapt(re"(a{,}){,}") == @[@["aaa", ""]]
+  check "aaa".matchWithCapt(re"(a{0,}){0,}") == @[@["aaa", ""]]
   check "aaaaa".matchWithCapt(re"(a){5}") ==
     @[@["a", "a", "a", "a", "a"]]
   check "a".matchWithCapt(re"(a){1,5}") == @[@["a"]]
   check "aaa".matchWithCapt(re"(a){1,5}") ==
     @[@["a", "a", "a"]]
-  check "".matchWithCapt(re"(a){,}") ==
+  check "".matchWithCapt(re"(a){0,}") ==
     @[newSeq[string]()]
-  check "aaa".matchWithCapt(re"(a{,}){,}") == @[@["aaa", ""]]
-  check "aaa".matchWithCapt(re"(a{1}){,}") ==
+  check "aaa".matchWithCapt(re"(a{0,}){0,}") == @[@["aaa", ""]]
+  check "aaa".matchWithCapt(re"(a{1}){0,}") ==
     @[@["a", "a", "a"]]
-  check "aaaa".matchWithCapt(re"(a{2}){,}") ==
+  check "aaaa".matchWithCapt(re"(a{2}){0,}") ==
     @[@["aa", "aa"]]
-  check "aaaa".matchWithCapt(re"(a{,3}){,}") ==
+  check "aaaa".matchWithCapt(re"(a{0,3}){0,}") ==
     @[@["aaa", "a", ""]]
-  check "".matchWithCapt(re"(a{,3}){,}") ==
+  check "".matchWithCapt(re"(a{0,3}){0,}") ==
     @[@[""]]
-  check "aaa".matchWithCapt(re"(a{1,}){,}") ==
+  check "aaa".matchWithCapt(re"(a{1,}){0,}") ==
     @[@["aaa"]]
-  check "".matchWithCapt(re"(a{1,}){,}") ==
+  check "".matchWithCapt(re"(a{1,}){0,}") ==
     @[newSeq[string]()]
   check(not "".isMatch(re"(a{1,})"))
   check "a".matchWithCapt(re"(a{1,})") == @[@["a"]]
   check "aaa".matchWithCapt(re"(a{1,})") == @[@["aaa"]]
   check "abab".matchWithCapt(re"(a(b)){2}") ==
     @[@["ab", "ab"], @["b", "b"]]
-  check raisesMsg(r"a{bad}") ==
+  check raisesMsg(r"a{0,bad}") ==
     "Invalid repetition range. Range can only contain digits\n" &
-    "a{bad}\n" &
+    "a{0,bad}\n" &
     " ^"
   check raisesMsg(r"a{1111111111}") ==
     "Invalid repetition range. Max value is 32767\n" &
@@ -720,8 +975,15 @@ test "trepetition_range":
     " ^"
   check(not raises(r"a{1,101}"))
   check raises(r"a{0,a}")
-  check raises(r"a{a,1}")
-  check raises(r"a{-1}")
+  check raises(r"a{,}")
+  check raises(r"a{,1}")
+  check raises(r"a{1x}")
+  check raises(r"a{1,x}")
+  check raises(r"a{1")
+  check raises(r"a{1,")
+  check raises(r"a{1,,}")
+  check raises(r"a{1,,2}")
+  check raises(r"a{1,,,2}")
   check raisesMsg(r"{10}") ==
     "Invalid repeition range, " &
     "nothing to repeat"
@@ -764,13 +1026,13 @@ test "tgreediness":
     @[@["a"], @["a", "a"], @[]]
   check "aaa".matchWithCapt(re"(a)+?(a)+?(a)?") ==
     @[@["a"], @["a"], @["a"]]
-  check "aaa".matchWithCapt(re"(a){,}(a){,}(a){,}") ==
+  check "aaa".matchWithCapt(re"(a){0,}(a){0,}(a){0,}") ==
     @[@["a", "a", "a"], @[], @[]]
-  check "aaa".matchWithCapt(re"(a){,}?(a){,}(a){,}?") ==
+  check "aaa".matchWithCapt(re"(a){0,}?(a){0,}(a){0,}?") ==
     @[@[], @["a", "a", "a"], @[]]
-  check "aaa".matchWithCapt(re"(a){,}?(a){,}?(a){,}") ==
+  check "aaa".matchWithCapt(re"(a){0,}?(a){0,}?(a){0,}") ==
     @[@[], @[], @["a", "a", "a"]]
-  check "aaa".matchWithCapt(re"(a){,}?(a){,}?(a){,}?") ==
+  check "aaa".matchWithCapt(re"(a){0,}?(a){0,}?(a){0,}?") ==
     @[@[], @[], @["a", "a", "a"]]
   check "aaa".matchWithCapt(re"(a){1,}(a){1,}(a)?") ==
     @[@["a", "a"], @["a"], @[]]
@@ -893,10 +1155,6 @@ test "tnamed_groups":
   check raisesMsg(r"(b(a)") ==
     "Invalid capturing group. " &
     "Found too many opening symbols"
-  check raisesMsg(r"a()") ==
-    "Invalid group. Empty group is not allowed\n" &
-    "a()\n" &
-    " ^"
   check raisesMsg(r"a(?P<asd)") ==
     "Invalid group name. Expected char in " &
     "{'a'..'z', 'A'..'Z', '0'..'9', '-', '_'}, " &
@@ -1070,13 +1328,6 @@ test "tflags":
     "abc(?q)\n" &
     "   ^"
 
-test "tor_op":
-  check raisesMsg(r"|") ==
-    "Invalid OR conditional, nothing " &
-    "to match at right/left side of the condition"
-  check raises(r"abc|")
-  check raises(r"|abc")
-
 test "tescaped_sequences":
   check "\x07".isMatch(re"\a")
   check "\x0C".isMatch(re"\f")
@@ -1300,6 +1551,10 @@ test "tstarts_with":
   check "abc".startsWith(re"(a|b)")
   check "bc".startsWith(re"(a|b)")
   check(not "c".startsWith(re"(a|b)"))
+  check startsWith("abc", re"b", start = 1)
+  check startsWith("abc", re"(?<=a)b", start = 1)
+  check(not startsWith("abc", re"(?<=x)b", start = 1))
+  check(not startsWith("abc", re"^b", start = 1))
 
 test "tends_with":
   check "abc".endsWith(re"bc")
@@ -1667,6 +1922,244 @@ test "tnegative_look_around":
   check(not "ab".isMatch(re"\w(?<!a)b"))
   check "ab".matchWithCapt(re"(\w(?<!c))b") == @[@["a"]]
 
+test "tfull_lookarounds":
+  var m: RegexMatch
+  check match("ab", re"a(?=b)\w")
+  check(not match("ab", re"a(?=x)\w"))
+  check(not match("ab", re"ab(?=x)"))
+  check match("ab", re"ab(?=$)")
+  check match("abc", re"a(?=b)\w+")
+  check match("abc", re"a(?=(bc))\w+", m) and
+    m.captures == @[@[1 .. 2]]
+  check match("ab", re"\w(?<=a)b")
+  check(not match("ab", re"\w(?<=x)b"))
+  check(not match("ab", re"(?<=x)ab"))
+  check match("ab", re"(?<=^)ab")
+  check match("abc", re"\w\w(?<=(b))c")
+  check match("abc", re"\w\w(?<=(b))c", m) and
+    m.captures == @[@[1 .. 1]]
+  check match("abc", re"\w\w(?<=(ab))c")
+  check match("abc", re"\w\w(?<=(ab))c", m) and
+    m.captures == @[@[0 .. 1]]
+  check match("a", re"\w(?<=a)")
+  check(not match("a", re"\w(?=a)"))
+  check match("ab", re"\w(?<=a(?<=a))b")
+  check match("ab", re"\w(?<=a(?<=a(?<=a)))b")
+  check match("ab", re"\w(?<=a(?=b))b")
+  check(not match("ab", re"\w(?=b(?=b))b"))  # JS, D
+  check(not match("ab", re"\w(?<=a(?=b(?=b)))b"))  # JS, D
+  check match("ab", re"\w(?<=a(?<=a)(?=b))b")
+  check match("ab", re"\w(?<=a(?<=a(?=b)))b")
+  check(not match("ab", re"\w(?<=(?<=a)a)b"))  # JS, D
+  check match("aab", re"\w\w(?<=aa(?=b))b")
+  block:
+    check match("aaab", re"(\w*)(\w*?)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w*?)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w*)$)", m) and
+      m.captures == @[@[0 .. -1], @[0 .. 3]]
+    check match("aaab", re"(\w*?)(\w*)(\w*?)", m) and
+      m.captures == @[@[0 .. -1], @[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w*)(\w*?)$)", m) and
+      m.captures == @[@[0 .. -1], @[0 .. 3], @[4 .. 3]]
+    check match("aaab", re"(\w*)(\w??)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w?)$)", m) and
+      m.captures == @[@[0 .. 2], @[3 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w?)(\w*?)$)", m) and
+      m.captures == @[@[0 .. 2], @[3 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w??)(\w*?)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w??)(\w*?)$)", m) and
+      m.captures == @[@[0 .. 0], @[1 .. 3]]
+    check match("aaab", re".*(?<=^(\w?)(\w*?)$)", m) and
+      m.captures == @[@[0 .. 0], @[1 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w?)$)", m) and
+      m.captures == @[@[0 .. 2], @[3 .. 3]]
+    check match("aaab", re".*(?<=^(\w*)(\w?)$)", m) and
+      m.captures == @[@[0 .. 2], @[3 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w*?)(\w??)$)", m) and
+      m.captures == @[@[0 .. 3], @[4 .. 3], @[4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*?)(\w*)(\w??)$)", m) and
+      m.captures == @[@[0 .. -1], @[0 .. 3], @[4 .. 3]]
+  block:
+    check match("aaab", re".*(?<=^(\w)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(\w)\w+|(\w)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[0 .. 0], @[], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\w)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\d)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0]]
+  block:
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+)+?$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+)+?$)", m) and
+      m.captures == @[@[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)+?$)", m) and
+      m.captures == @[@[0 .. 0], @[], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)+?$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)+?$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+)*?$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+)*?$)", m) and
+      m.captures == @[@[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)*?$)", m) and
+      m.captures == @[@[0 .. 0], @[], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)*?$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)*?$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+)??$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+)??$)", m) and
+      m.captures == @[@[], @[0 .. 0]]
+    check match("aaab", re".*(?<=^(?:(\w)\w+|(\w)\w+|(\w)\w+)??$)", m) and
+      m.captures == @[@[0 .. 0], @[], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\w)\w+|(\w)\w+)??$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(?:(\d)\w+|(\d)\w+|(\w)\w+)??$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0]]
+  block:
+    check match("aaab", re".*(?<=^(\w)\w+?|(\w)\w+?$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\w)\w+|(\w)\w+$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\w)\w+?|(\w)\w+$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\w)\w+|(\w)\w+?$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\w+|\w{4}|\w{4}|\w+)*$)", m) and
+      m.captures == @[@[0 .. 3]]
+    check match("aaab", re"(\w*|\w{4}|\w{4}|\w*)*", m) and
+      m.captures == @[@[0 .. 3, 4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*|\w{4}|\w{4}|\w*)*$)", m) and
+      m.captures == @[@[0 .. -1, 0 .. 3]]
+    check match("aaab", re".*(?<=^(\w+|\w{4}|\w{4}|\w+)+$)", m) and
+      m.captures == @[@[0 .. 3]]
+    check match("aaab", re".*(?<=^(\w)\w+|\w{4}|\w{4}|(\w)\w+$)", m) and
+      m.captures == @[@[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\w{4})|(\w{4})|(\w)\w+$)", m) and
+      m.captures == @[@[], @[0 .. 3], @[], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\w)\w{3}|(\w)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[0 .. 0], @[], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\d)\w{3}|(\w)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[], @[0 .. 0], @[]]
+    check match("aaab", re".*(?<=^(\d)\w+|(\d)\w{3}|(\d)\w{3}|(\w)\w+$)", m) and
+      m.captures == @[@[], @[], @[], @[0 .. 0]]
+    check match("aaab", re"(\w*|\w{4}|\w{4}|\w*)+", m) and
+      m.captures == @[@[0 .. 3, 4 .. 3]]
+    check match("aaab", re".*(?<=^(\w*|\w{4}|\w{4}|\w*)+$)", m) and
+      m.captures == @[@[0 .. -1, 0 .. 3]]
+    check match("aaab", re".*(?<=^(\w+)|(\w{4})|(\w{10})$)", m) and
+      m.captures == @[@[0 .. 3], @[], @[]]
+    check match("aaab", re".*(?<=^(\w{10})|(\w{4})|(\w+)$)", m) and
+      m.captures == @[@[], @[0 .. 3], @[]]
+    check match("aaab", re".*(?<=^((\w{10})|(\w{4})|(\w+))+$)", m) and
+      m.captures == @[@[0 .. 3], @[], @[0 .. 3], @[]]
+    check match("aaab", re".*(?<=^((\w{10})|(\w{4})|(\w+))*$)", m) and
+      m.captures == @[@[0 .. 3], @[], @[0 .. 3], @[]]
+    check match("aaab", re".*(?<=^(\w+|\w{4}|\w{4}|\w+)*?$)", m) and
+      m.captures == @[@[0 .. 3]]
+    check match("aaab", re".*(?<=^(\w*|\w{4}|\w{4}|\w*)*?$)", m) and
+      m.captures == @[@[0 .. 3]]
+    check match("aaab", re".*(?<=^(\w+|\w{4}|\w{4}|\w+)+?$)", m) and
+      m.captures == @[@[0 .. 3]]
+  check findAllBounds(r"1abab", re"(?<=\d\w*)ab") ==
+    @[1 .. 2, 3 .. 4]
+  check findAllBounds(r"abab", re"(?<=\d\w*)ab").len == 0
+  check findAllBounds(r"abab1", re"ab(?=\w*\d)") ==
+    @[0 .. 1, 2 .. 3]
+  check findAllBounds(r"abab", re"ab(?=\w*\d)").len == 0
+  check findAllBounds("foo\nbar\nbar", re"bar(?=$)") ==
+    @[8 .. 10]
+  check findAllBounds("foo\nbar\nbar", re"(?m)bar(?=$)") ==
+    @[4 .. 6, 8 .. 10]
+  check findAllBounds("bar\nfoo\nbar", re"(?<=^)bar") ==
+    @[0 .. 2]
+  check findAllBounds("bar\nfoo\nbar", re"(?m)(?<=^)bar") ==
+    @[0 .. 2, 8 .. 10]
+  block:
+    # There is a difference in how nesting is
+    # handled by JS vs D; in D all of them start
+    # from the outermost one, while in JS they
+    # start from the outer one/parent. I think the JS way
+    # is less mind blending, and it makes more sense to me
+    #
+    # These 2 match in D, but not in JS
+    check(not match("ab", re"\w(?<=a(?=b(?<=a)))b"))  # JS, !D
+    check(not match("ab", re"\w(?<=a(?<=a(?=b(?<=a))))b"))  # JS, !D
+    # These 2 match in JS, but not in D
+    check match("ab", re"\w(?<=a(?=b(?<=b)))b")  # JS, !D
+    check match("ab", re"\w(?<=a(?<=a(?=b(?<=b))))b")  # JS, !D
+  block:
+    let asterisk = re"(?<=(?<!\\)(?:\\\\)*)\*"
+    check findAllBounds(r"*foo*", asterisk) ==
+      @[0 .. 0, 4 .. 4]
+    check findAllBounds(r"\*foo\*", asterisk).len == 0
+    check findAllBounds(r"\\*foo\\*", asterisk) ==
+      @[2 .. 2, 8 .. 8]
+    check findAllBounds(r"\\\*foo\\\*", asterisk).len == 0
+    check findAllBounds(r"\\\\*foo\\\\*", asterisk) ==
+      @[4 .. 4, 12 .. 12]
+    check findAllBounds(r"\\\\\*foo\\\\\*", asterisk).len == 0
+  block:
+    let asterisk = re".*(?<=(?<!\\)(?:\\\\)*)\*"
+    check match(r"*", asterisk)
+    check(not match(r"\*", asterisk))
+    check match(r"\\*", asterisk)
+    check(not match(r"\\\*", asterisk))
+    check match(r"\\\\*", asterisk)
+    check(not match(r"\\\\\*", asterisk))
+  block:
+    check findAllBounds("foobarbaz", re"(?<=o)b") == @[3 .. 3]
+    check findAllBounds("foobar", re"o(?=b)") == @[2 .. 2]
+    check findAllBounds("100 in USD100", re"(?<=USD)\d{3}") ==
+      @[10 .. 12]
+    check findAllBounds("100 in USD100", re"\d{3}(?<=USD\d{3})") ==
+      @[10 .. 12]
+  block:
+    check match("弢b", re"弢(?=b)\w")
+    check match("a弢", re"a(?=弢)\w")
+    check match("Ⓐb", re"Ⓐ(?=b)\w")
+    check match("aⒶ", re"a(?=Ⓐ)\w")
+    check match("Ⓐb", re"Ⓐ(?=b)\w")
+    check match("aΪ", re"a(?=Ϊ)\w")
+    check match("Ϊb", re"Ϊ(?=b)\w")
+    check match("弢Ⓐ", re"弢(?=Ⓐ)\w")
+  block:
+    check match("a弢", re"\w(?<=a)弢")
+    check match("弢b", re"\w(?<=弢)b")
+    check match("aⒶ", re"\w(?<=a)Ⓐ")
+    check match("Ⓐb", re"\w(?<=Ⓐ)b")
+    check match("aΪ", re"\w(?<=a)Ϊ")
+    check match("Ϊb", re"\w(?<=Ϊ)b")
+    check match("弢Ⓐ", re"\w(?<=弢)Ⓐ")
+  block:  # Follows Nim re's behaviour
+    check(not match("abc", re"^bc", m, start = 1))
+    check match("abc", re"(?<=a)bc", m, start = 1)
+    check(not match("abc", re"(?<=x)bc", m, start = 1))
+  block:
+    check match("abcdefg", re"\w+(?<=(ab)cd(?<=(cd)))\w+", m) and
+      m.captures == @[@[0 .. 1], @[2 .. 3]]
+    check match("abcdefg", re"\w+(?<=(ab)(?=(cd)))\w+", m) and
+      m.captures == @[@[0 .. 1], @[2 .. 3]]
+    check match("abcdefg", re"\w+(?<=(ab)(?=(cd)(?<=(cd))))\w+", m) and
+      m.captures == @[@[0 .. 1], @[2 .. 3], @[2 .. 3]]
+    check match("abcdefg", re"\w+(?=(cd)(?<=(cd)))\w+", m) and
+      m.captures == @[@[2 .. 3], @[2 .. 3]]
+
 test "tpretty_errors":
   block:
     var exp = ""
@@ -2176,21 +2669,25 @@ test "tmisc2_5":
   check match("650-253-0001", re"[0-9]+..*", m)
   check(not match("abc-253-0001", re"[0-9]+..*", m))
   check(not match("6", re"[0-9]+..*", m))
-  block:
-    const re1 = re"(11)*+(111)*"
-    check match("", re1)
-    check match("11", re1)
-    check match("111", re1)
-    check match("11111", re1)
-    check match("1111111", re1)
-    check match("1111111111", re1)
-    check(not match("1", re1))
-  block:
-    const re1 = re"(11)+(111)*"
-    check(not match("", re1))
-    check match("11", re1)
-    check(not match("111", re1))
-    check match("11111", re1)
+  # VM registry error on Nim < 1.1 (devel)
+  when not defined(runTestAtCT) or (NimMajor, NimMinor) > (1, 0):
+    block:
+      const re1 = re"((11)*)+(111)*"
+      check match("", re1)
+      check match("11", re1)
+      check match("111", re1)
+      check match("11111", re1)
+      check match("1111111", re1)
+      check match("1111111111", re1)
+      check(not match("1", re1))
+    block:
+      const re1 = re"(11)+(111)*"
+      check(not match("", re1))
+      check match("11", re1)
+      check(not match("111", re1))
+      check match("11111", re1)
+
+test "tmisc2_6":
   block:
     const re1 = re"(aabb)(ab)*"
     check match("aabb", re1)
@@ -2293,8 +2790,8 @@ test "tmisc3":
       m.boundaries == 11 .. 10
     # start is out of bounds, but this is what Nim's re
     # does, nre throws an error
-    check find("foo\nbar\nbar", re"(?m)$", m, start=12) and
-      m.boundaries == 12 .. 11
+    #check find("foo\nbar\nbar", re"(?m)$", m, start=12) and
+    #  m.boundaries == 12 .. 11
   # XXX make this return false?
   check match("abc", re"(?m)$", m, start=50)
   check match("abc", re"(?m)^", m, start=50)
@@ -2350,3 +2847,121 @@ test "tmisc3":
   check findAndCaptureAll(
     "He was carefully disguised but captured quickly by police.",
     re"\w+ly") == @["carefully", "quickly"]
+
+test "fix#83":
+  block:
+    let pattern = "^src/(?:[^\\/]*(?:\\/|$))*[^/]*\\.nim$"
+    check "src/dir/foo.nim".match(pattern.re)
+  check match("foo", re"(?:$)*\w*")
+  check match("foo", re"($)*\w*")
+  check match("foo", re"^*\w*")
+  check match("foo", re"([^/]*$)*\w*")
+  check match("src/dir/foo.nim", re"^src/(?:[^/]*(?:/|$))*[^/]*\.nim$")
+  check(not match("foo", re"$($*)*(\w*)"))
+  check(not match("foo", re"$($$*$*)*(\w*)"))
+  check(not match("foo", re"($|$)($*)*(\w*)"))
+  check(not match("foo", re"($|$$)($*)*(\w*)"))
+  check(not match("foo", re"($$|$)($*)*(\w*)"))
+  check(not match("foo", re"($|$)(\w*)"))
+  check(not match("foo", re"($|$$)(\w*)"))
+  check(not match("foo", re"($$|$)(\w*)"))
+  check(not match("foo", re"(^$|$)(\w*)"))
+  check(not match("foo", re"($|^$)(\w*)"))
+  check match("", re"$*\w*")
+  check match("foo", re"($*?)(\w*)")
+  check match("foo", re"($*)(\w*)")
+  check match("foox", re"($*)(\w*)x")
+  check match("foo", re"(a*?$*?)*?(\w*)")
+  check match("foox", re"(a*$*)*(\w*)x")
+  check match("aaa", re"($|^)(a*)")
+  check match("aaa", re"(^|$)(a*)")
+  block:
+    check findAllBounds("aaaxaaa", re"^(a*)") == @[0 .. 2]
+    check findAllBounds("aaaxaaa", re"$(a*)") == @[7 .. 6]
+    check findAllBounds("aaaxaaa", re"($|^)(a*)") == @[0 .. 2, 7 .. 6]
+    check findAllBounds("aaaxaaa", re"(^|$)(a*)") == @[0 .. 2, 7 .. 6]
+  when (NimMajor, NimMinor) >= (1, 1):
+    check matchMacroCapt("foo", rex"($*?)(\w*)") == @["", "foo"]
+    check matchMacroCapt("foo", rex"($*)(\w*)") == @["", "foo"]
+    check matchMacroCapt("foox", rex"($*)(\w*)x") == @["", "foo"]
+    check matchMacroCapt("foo", rex"(a*?$*?)*?(\w*)") == @["", "foo"]
+    check matchMacroCapt("foox", rex"(a*$*)*(\w*)x") == @["", "foo"]
+    check matchMacroCapt("aaa", rex"($|^)(a*)") == @["", "aaa"]
+    check matchMacroCapt("aaa", rex"(^|$)(a*)") == @["", "aaa"]
+
+test "escapeRe":
+  check escapeRe("abc") == "abc"
+  check escapeRe("123") == "123"
+  check escapeRe("!") == "!"
+  check escapeRe("*") == r"\*"
+  check escapeRe" #$&()*+-.?[\]^{|}~" ==
+    r"\ \#\$\&\(\)\*\+\-\.\?\[\\\]\^\{\|\}\~"
+  check escapeRe("\L") == "\\\L"
+  check escapeRe"aΪⒶ弢" == "aΪⒶ弢"
+  check escapeRe"$Ϊ$Ⓐ$弢$" == r"\$Ϊ\$Ⓐ\$弢\$"
+  check match("$", re(escapeRe"$"))
+  block:
+    var s = ""
+    for c in 0 .. 255:
+      s.add c.char
+    discard re(escapeRe(s))
+
+test "issue_98":
+  check match("", re"|")
+  check match("a", re"a|")
+  check match("", re"a|")
+  check(not match("b", re"a|"))
+  check match("b", re"|b")
+  check match("", re"|b")
+  check(not match("a", re"|b"))
+  check match("", re"(|)")
+  check match("a", re"(a|)")
+  check match("", re"(a|)")
+  check(not match("b", re"(a|)"))
+  check match("b", re"(|b)")
+  check match("", re"(|b)")
+  check(not match("a", re"(|b)"))
+  check match("", re"||")
+  check match("a", re"a||")
+  check match("", re"a||")
+  check match("b", re"||b")
+  check match("", re"||b")
+  check match("", re"a||b")
+  check match("a", re"a||b")
+  check match("b", re"a||b")
+  check match("", re"|||")
+  check match("a", re"a|||")
+  check match("", re"a|||")
+  check match("b", re"|||b")
+  check match("", re"|||b")
+  check match("a", re"a|||b")
+  check match("b", re"a|||b")
+  check match("", re"(||)")
+  check match("a", re"(a||)")
+  check match("", re"(a||)")
+  check match("b", re"(||b)")
+  check match("", re"(||b)")
+  check match("1.1.1.1", re"(\d+)\.(\d+)(\.(\d+)|)(\.(\d+)|)")
+  check match("1.1.1", re"(\d+)\.(\d+)(\.(\d+)|)(\.(\d+)|)")
+  check match("1.1", re"(\d+)\.(\d+)(\.(\d+)|)(\.(\d+)|)")
+
+test "issue_101":
+  var m: RegexMatch
+  check match("TXT1/TXT2.1", re"(TXT1)/TXT2()\.(\d+)")
+  check match("TXT1/TXT2.1", re"(TXT1)/TXT2(?:)\.(\d+)")
+  check match("TXT1/TXT2.1", re"(TXT1)/TXT2(?i:)\.(\d+)")
+  check match("TXT1/TXT2.1", re"(TXT1)/TXT2(?P<foo>)\.(\d+)")
+  check match("TXT1/TXT2.1", re"(TXT1)/TXT2()\.(\d+)", m) and
+    m.captures == @[@[0 .. 3], @[9 .. 8], @[10 .. 10]]
+  check match(" ", re"(?x)     (?-x) ")
+  check match("aa", re"((?x)a    )a")
+  check match("aa", re"((?x)   a    )a")
+  check match(" ", re"((?x)) ")
+  check match(" ", re"((?x)     ) ")
+  check match(" ", re"(?x:(?x)     ) ")
+  check match(" ", re"(?x:) ")
+  check match(" ", re"(?x:   ) ")
+  check match(" ", re"((?x:)) ")
+  check match("A", re"(?xi)     a")
+  check(not match("A", re"((?xi))     a"))
+  check(not match("A", re"(?xi:(?xi)     )a"))

Debdiff

[The following lists of changes regard files as different if they have different names, permissions or owners.]

Files in second set of .debs but not in first

-rw-r--r--  root/root   /usr/share/nimble/regex/regex/dotgraph.nim
-rw-r--r--  root/root   /usr/share/nimble/regex/regex/exptype.nim
-rw-r--r--  root/root   /usr/share/nimble/regex/regex/types.nim

Files in first set of .debs but not in second

-rw-r--r--  root/root   /usr/share/nimble/regex/regex/nodetype.nim

No differences were encountered in the control files

More details

Full run details