New Upstream Release - golang-github-victoriametrics-metricsql

Ready changes

Summary

Merged new upstream version: 0.56.1+ds1 (was: 0.49.0+ds1).

Diff

diff --git a/aggr.go b/aggr.go
index 5dfe5ae..b8c77c3 100644
--- a/aggr.go
+++ b/aggr.go
@@ -29,6 +29,7 @@ var aggrFuncs = map[string]bool{
 	"outliersk":      true,
 	"quantile":       true,
 	"quantiles":      true,
+	"share":          true,
 	"stddev":         true,
 	"stdvar":         true,
 	"sum":            true,
diff --git a/debian/changelog b/debian/changelog
index 36e6fbc..129a4db 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+golang-github-victoriametrics-metricsql (0.56.1+ds1-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Fri, 07 Apr 2023 23:05:01 -0000
+
 golang-github-victoriametrics-metricsql (0.49.0+ds1-1) unstable; urgency=medium
 
   * New upstream release.
diff --git a/lexer.go b/lexer.go
index c1a0ec6..f0dec0a 100644
--- a/lexer.go
+++ b/lexer.go
@@ -289,6 +289,9 @@ func scanPositiveNumber(s string) (string, error) {
 }
 
 func scanNumMultiplier(s string) int {
+	if len(s) > 3 {
+		s = s[:3]
+	}
 	s = strings.ToLower(s)
 	switch true {
 	case strings.HasPrefix(s, "kib"):
@@ -331,19 +334,22 @@ func scanNumMultiplier(s string) int {
 func scanIdent(s string) string {
 	i := 0
 	for i < len(s) {
-		if isIdentChar(s[i]) {
-			i++
+		r, size := utf8.DecodeRuneInString(s[i:])
+		if i == 0 && isFirstIdentChar(r) || i > 0 && isIdentChar(r) {
+			i += size
 			continue
 		}
-		if s[i] != '\\' {
+		if r != '\\' {
 			break
 		}
-		i++
-
-		// Do not verify the next char, since it is escaped.
-		// The next char may be encoded as multi-byte UTF8 sequence. See https://en.wikipedia.org/wiki/UTF-8#Encoding
-		_, size := utf8.DecodeRuneInString(s[i:])
 		i += size
+		r, n := decodeEscapeSequence(s[i:])
+		if r == utf8.RuneError {
+			// Invalid escape sequence
+			i -= size
+			break
+		}
+		i += n
 	}
 	if i == 0 {
 		panic("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
@@ -360,23 +366,12 @@ func unescapeIdent(s string) string {
 	for {
 		dst = append(dst, s[:n]...)
 		s = s[n+1:]
-		if len(s) == 0 {
-			return string(dst)
-		}
-		if s[0] == 'x' && len(s) >= 3 {
-			h1 := fromHex(s[1])
-			h2 := fromHex(s[2])
-			if h1 >= 0 && h2 >= 0 {
-				dst = append(dst, byte((h1<<4)|h2))
-				s = s[3:]
-			} else {
-				dst = append(dst, s[0])
-				s = s[1:]
-			}
+		r, size := decodeEscapeSequence(s)
+		if r == utf8.RuneError {
+			// Cannot decode escape sequence. Put it in the output as is
+			dst = append(dst, '\\')
 		} else {
-			// UTF8 char. See https://en.wikipedia.org/wiki/UTF-8#Encoding
-			_, size := utf8.DecodeRuneInString(s)
-			dst = append(dst, s[:size]...)
+			dst = utf8.AppendRune(dst, r)
 			s = s[size:]
 		}
 		n = strings.IndexByte(s, '\\')
@@ -387,49 +382,16 @@ func unescapeIdent(s string) string {
 	}
 }
 
-func fromHex(ch byte) int {
-	if ch >= '0' && ch <= '9' {
-		return int(ch - '0')
-	}
-	if ch >= 'a' && ch <= 'f' {
-		return int((ch - 'a') + 10)
-	}
-	if ch >= 'A' && ch <= 'F' {
-		return int((ch - 'A') + 10)
-	}
-	return -1
-}
-
-func toHex(n byte) byte {
-	if n < 10 {
-		return '0' + n
-	}
-	return 'a' + (n - 10)
-}
-
 func appendEscapedIdent(dst []byte, s string) []byte {
-	for i := 0; i < len(s); i++ {
-		ch := s[i]
-		if isIdentChar(ch) {
-			if i == 0 && !isFirstIdentChar(ch) {
-				// hex-encode the first char
-				dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
-			} else {
-				dst = append(dst, ch)
-			}
-			continue
-		}
-
-		// escape ch
-		dst = append(dst, '\\')
+	i := 0
+	for i < len(s) {
 		r, size := utf8.DecodeRuneInString(s[i:])
-		if r != utf8.RuneError && unicode.IsPrint(r) {
-			dst = append(dst, s[i:i+size]...)
-			i += size - 1
+		if i == 0 && isFirstIdentChar(r) || i > 0 && isIdentChar(r) {
+			dst = utf8.AppendRune(dst, r)
 		} else {
-			// hex-encode non-printable chars
-			dst = append(dst, 'x', toHex(ch>>4), toHex(ch&0xf))
+			dst = appendEscapeSequence(dst, r)
 		}
+		i += size
 	}
 	return dst
 }
@@ -560,13 +522,17 @@ func DurationValue(s string, step int64) (int64, error) {
 	if len(s) == 0 {
 		return 0, fmt.Errorf("duration cannot be empty")
 	}
-	// Try parsing floating-point duration
-	d, err := strconv.ParseFloat(s, 64)
-	if err == nil {
-		// Convert the duration to milliseconds.
-		return int64(d * 1000), nil
+	lastChar := s[len(s)-1]
+	if lastChar >= '0' && lastChar <= '9' || lastChar == '.' {
+		// Try parsing floating-point duration
+		d, err := strconv.ParseFloat(s, 64)
+		if err == nil {
+			// Convert the duration to milliseconds.
+			return int64(d * 1000), nil
+		}
 	}
 	isMinus := false
+	d := float64(0)
 	for len(s) > 0 {
 		n := scanSingleDuration(s, true)
 		if n <= 0 {
@@ -593,6 +559,7 @@ func DurationValue(s string, step int64) (int64, error) {
 }
 
 func parseSingleDuration(s string, step int64) (float64, error) {
+	s = strings.ToLower(s)
 	numPart := s[:len(s)-1]
 	if strings.HasSuffix(numPart, "m") {
 		// Duration in ms
@@ -672,14 +639,26 @@ func scanSingleDuration(s string, canBeNegative bool) int {
 			return -1
 		}
 	}
-	switch s[i] {
+	switch unicode.ToLower(rune(s[i])) {
 	case 'm':
-		if i+1 < len(s) && s[i+1] == 's' {
-			// duration in ms
-			return i + 2
+		if i+1 < len(s) {
+			switch unicode.ToLower(rune(s[i+1])) {
+			case 's':
+				// duration in ms
+				return i + 2
+			case 'i', 'b':
+				// This is not a duration, but Mi or MB suffix.
+				// See parsePositiveNumber() and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664
+				return -1
+			}
 		}
-		// duration in minutes
-		return i + 1
+		// Allow small m for durtion in minutes.
+		// Big M means 1e6.
+		// See parsePositiveNumber() and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664
+		if s[i] == 'm' {
+			return i + 1
+		}
+		return -1
 	case 's', 'h', 'd', 'w', 'y', 'i':
 		return i + 1
 	default:
@@ -699,25 +678,26 @@ func isIdentPrefix(s string) bool {
 	if len(s) == 0 {
 		return false
 	}
-	if s[0] == '\\' {
-		// Assume this is an escape char for the next char.
-		return true
+	r, size := utf8.DecodeRuneInString(s)
+	if r == '\\' {
+		r, _ = decodeEscapeSequence(s[size:])
+		return r != utf8.RuneError
 	}
-	return isFirstIdentChar(s[0])
+	return isFirstIdentChar(r)
 }
 
-func isFirstIdentChar(ch byte) bool {
-	if ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' {
+func isFirstIdentChar(r rune) bool {
+	if unicode.IsLetter(r) {
 		return true
 	}
-	return ch == '_' || ch == ':'
+	return r == '_' || r == ':'
 }
 
-func isIdentChar(ch byte) bool {
-	if isFirstIdentChar(ch) {
+func isIdentChar(r rune) bool {
+	if isFirstIdentChar(r) {
 		return true
 	}
-	return isDecimalChar(ch) || ch == '.'
+	return r < 256 && isDecimalChar(byte(r)) || r == '.'
 }
 
 func isSpaceChar(ch byte) bool {
@@ -728,3 +708,67 @@ func isSpaceChar(ch byte) bool {
 		return false
 	}
 }
+
+func appendEscapeSequence(dst []byte, r rune) []byte {
+	dst = append(dst, '\\')
+	if unicode.IsPrint(r) {
+		return utf8.AppendRune(dst, r)
+	}
+	// hex-encode non-printable chars
+	if r < 256 {
+		return append(dst, 'x', toHex(byte(r>>4)), toHex(byte(r&0xf)))
+	}
+	return append(dst, 'u', toHex(byte(r>>12)), toHex(byte((r>>8)&0xf)), toHex(byte(r>>4)), toHex(byte(r&0xf)))
+}
+
+func decodeEscapeSequence(s string) (rune, int) {
+	if strings.HasPrefix(s, "x") || strings.HasPrefix(s, "X") {
+		if len(s) >= 3 {
+			h1 := fromHex(s[1])
+			h2 := fromHex(s[2])
+			if h1 >= 0 && h2 >= 0 {
+				r := rune((h1 << 4) | h2)
+				return r, 3
+			}
+		}
+		return utf8.RuneError, 0
+	}
+	if strings.HasPrefix(s, "u") || strings.HasPrefix(s, "U") {
+		if len(s) >= 5 {
+			h1 := fromHex(s[1])
+			h2 := fromHex(s[2])
+			h3 := fromHex(s[3])
+			h4 := fromHex(s[4])
+			if h1 >= 0 && h2 >= 0 && h3 >= 0 && h4 >= 0 {
+				return rune((h1 << 12) | (h2 << 8) | (h3 << 4) | h4), 5
+			}
+		}
+		return utf8.RuneError, 0
+	}
+	r, size := utf8.DecodeRuneInString(s)
+	if unicode.IsPrint(r) {
+		return r, size
+	}
+	// Improperly escaped non-printable char
+	return utf8.RuneError, 0
+}
+
+func fromHex(ch byte) int {
+	if ch >= '0' && ch <= '9' {
+		return int(ch - '0')
+	}
+	if ch >= 'a' && ch <= 'f' {
+		return int((ch - 'a') + 10)
+	}
+	if ch >= 'A' && ch <= 'F' {
+		return int((ch - 'A') + 10)
+	}
+	return -1
+}
+
+func toHex(n byte) byte {
+	if n < 10 {
+		return '0' + n
+	}
+	return 'a' + (n - 10)
+}
diff --git a/lexer_test.go b/lexer_test.go
index 47cea27..e58e352 100644
--- a/lexer_test.go
+++ b/lexer_test.go
@@ -192,15 +192,23 @@ func TestUnescapeIdent(t *testing.T) {
 	}
 	f("", "")
 	f("a", "a")
-	f("\\", "")
+	f("\\", `\`)
 	f(`\\`, `\`)
 	f(`\foo\-bar`, `foo-bar`)
 	f(`a\\\\b\"c\d`, `a\\b"cd`)
 	f(`foo.bar:baz_123`, `foo.bar:baz_123`)
 	f(`foo\ bar`, `foo bar`)
 	f(`\x21`, `!`)
-	f(`\xeDfoo\x2Fbar\-\xqw\x`, "\xedfoo\x2fbar-xqwx")
+	f(`\X21`, `!`)
+	f(`\x7Dfoo\x2Fbar\-\xqw\x`, "}foo/bar-\\xqw\\x")
 	f(`\п\р\и\в\е\т123`, "привет123")
+	f(`123`, `123`)
+	f(`\123`, `123`)
+	f(`привет\-\foo`, "привет-foo")
+	f(`\u0965`, "\u0965")
+	f(`\U0965`, "\u0965")
+	f(`\u202c`, "\u202c")
+	f(`\U202ca`, "\u202ca")
 }
 
 func TestAppendEscapedIdent(t *testing.T) {
@@ -214,9 +222,13 @@ func TestAppendEscapedIdent(t *testing.T) {
 	f(`a`, `a`)
 	f(`a.b:c_23`, `a.b:c_23`)
 	f(`a b-cd+dd\`, `a\ b\-cd\+dd\\`)
-	f("a\x1E\x20\xee", `a\x1e\ \xee`)
-	f("\x2e\x2e", `\x2e.`)
-	f("привет123", `\п\р\и\в\е\т123`)
+	f("a\x1E\x20\x7e", `a\x1e\ \~`)
+	f("\x2e\x2e", `\..`)
+	f("123", `\123`)
+	f("+43.6", `\+43.6`)
+	f("привет123(a-b)", `привет123\(a\-b\)`)
+	f("\u0965", `\॥`)
+	f("\u202c", `\u202c`)
 }
 
 func TestScanIdent(t *testing.T) {
@@ -232,8 +244,23 @@ func TestScanIdent(t *testing.T) {
 	f("a+b", "a")
 	f("foo()", "foo")
 	f(`a\-b+c`, `a\-b`)
-	f(`a\ b\\\ c\`, `a\ b\\\ c\`)
+	f(`a\ b\\\ c\`, `a\ b\\\ c`)
 	f(`\п\р\и\в\е\т123`, `\п\р\и\в\е\т123`)
+	f(`привет123!foo`, `привет123`)
+	f(`\1fooЫ+bar`, `\1fooЫ`)
+	f(`\u7834*аа`, `\u7834`)
+	f(`\U7834*аа`, `\U7834`)
+	f(`\x7834*аа`, `\x7834`)
+	f(`\X7834*аа`, `\X7834`)
+	f(`a\x+b`, `a`)
+	f(`a\x1+b`, `a`)
+	f(`a\x12+b`, `a\x12`)
+	f(`a\u+b`, `a`)
+	f(`a\u1+b`, `a`)
+	f(`a\u12+b`, `a`)
+	f(`a\u123+b`, `a`)
+	f(`a\u1234+b`, `a\u1234`)
+	f("a\\\u202c", `a`)
 }
 
 func TestLexerNextPrev(t *testing.T) {
@@ -442,9 +469,6 @@ func TestLexerError(t *testing.T) {
 	testLexerError(t, `'`)
 	testLexerError(t, "`")
 
-	// Unrecognized char
-	testLexerError(t, "тест")
-
 	// Invalid numbers
 	testLexerError(t, `.`)
 	testLexerError(t, `12e`)
@@ -513,6 +537,13 @@ func TestPositiveDurationSuccess(t *testing.T) {
 	f("1.23", 45, 1230)
 	f("0.56", 12, 560)
 	f(".523e2", 21, 52300)
+
+	// Duration suffixes in mixed case.
+	f("1Ms", 45, 1)
+	f("1mS", 45, 1)
+	f("1H", 45, 1*60*60*1000)
+	f("1D", 45, 1*24*60*60*1000)
+	f("1Y", 45, 1*365*24*60*60*1000)
 }
 
 func TestPositiveDurationError(t *testing.T) {
@@ -532,9 +563,16 @@ func TestPositiveDurationError(t *testing.T) {
 	f("1.23mm")
 	f("123q")
 	f("-123s")
+	f("1.23.4434s")
+	f("1mi")
+	f("1mb")
 
 	// Too big duration
 	f("10000000000y")
+
+	// Uppercase M isn't a duration, but a 1e6 multiplier.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664
+	f("1M")
 }
 
 func TestDurationSuccess(t *testing.T) {
@@ -588,6 +626,15 @@ func TestDurationSuccess(t *testing.T) {
 	f("1.23", 45, 1230)
 	f("-0.56", 12, -560)
 	f("-.523e2", 21, -52300)
+
+	// Duration suffix in mixed case.
+	f("-1Ms", 10, -1)
+	f("-2.5mS", 10, -2)
+	f("-1mS", 10, -1)
+	f("-1H", 10, -1*60*60*1000)
+	f("-3.H", 10, -3*60*60*1000)
+	f("1D", 10, 1*24*60*60*1000)
+	f("-.1Y", 10, -0.1*365*24*60*60*1000)
 }
 
 func TestDurationError(t *testing.T) {
@@ -607,4 +654,10 @@ func TestDurationError(t *testing.T) {
 	f("1.23mm")
 	f("123q")
 	f("-123q")
+	f("-5.3mb")
+	f("-5.3mi")
+
+	// M isn't a duration, but a 1e6 multiplier.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664
+	f("-5.3M")
 }
diff --git a/optimizer.go b/optimizer.go
index 3a432e6..ff5ad35 100644
--- a/optimizer.go
+++ b/optimizer.go
@@ -392,7 +392,8 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int {
 		return -1
 	case "limit_offset":
 		return 2
-	case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile":
+	case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile",
+		"range_trim_outliers", "range_trim_spikes", "range_trim_zscore":
 		return 1
 	case "histogram_quantiles":
 		return len(args) - 1
diff --git a/parser_test.go b/parser_test.go
index 4245469..482cc48 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -25,6 +25,7 @@ func TestParseSuccess(t *testing.T) {
 	same(`{}`)
 	same(`{}[5m]`)
 	same(`{}[5m:]`)
+	same(`{}[5M:]`)
 	same(`{}[:]`)
 	another(`{}[: ]`, `{}[:]`)
 	same(`{}[:3s]`)
@@ -33,6 +34,7 @@ func TestParseSuccess(t *testing.T) {
 	another(`{}[ 5m : 3s ]`, `{}[5m:3s]`)
 	same(`{} offset 5m`)
 	same(`{} offset -5m`)
+	same(`{} offset 5M`)
 	same(`{}[5m] offset 10y`)
 	same(`{}[5.3m:3.4s] offset 10y`)
 	same(`{}[:3.4s] offset 10y`)
@@ -106,7 +108,9 @@ func TestParseSuccess(t *testing.T) {
 	// identifiers with with escape chars
 	same(`foo\ bar`)
 	same(`foo\-bar\{{baz\+bar="aa"}`)
-	another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\x2e.foo{b\xefar="aa"}`)
+	another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\..foo{bïar="aa"}`)
+	same(`温度{房间="水电费"}[5m] offset 10m`)
+	another(`\温\度{\房\间="水电费"}[5m] offset 10m`, `温度{房间="水电费"}[5m] offset 10m`)
 	same(`sum(fo\|o) by (b\|a, x)`)
 	another(`sum(x) by (b\x7Ca)`, `sum(x) by (b\|a)`)
 	// Duplicate filters
@@ -144,6 +148,14 @@ func TestParseSuccess(t *testing.T) {
 	same(`foo - 123.`)
 	same(`12.e+4`)
 	same(`12Ki`)
+	same(`12Kib`)
+	same(`12Mi`)
+	same(`12Mb`)
+	same(`12MB`)
+	same(`(rate(foo)[5m] * 8) > 45Mi`)
+	same(`(rate(foo)[5m] * 8) > 45mi`)
+	same(`(rate(foo)[5m] * 8) > 45mI`)
+	same(`(rate(foo)[5m] * 8) > 45Mib`)
 	same(`1.23Gb`)
 	same(`foo - 23M`)
 	another(`-1.23Gb`, `-1.23e+09`)
@@ -182,6 +194,7 @@ func TestParseSuccess(t *testing.T) {
 	same(`1h`)
 	another(`-1h`, `0 - 1h`)
 	same(`0.34h4m5s`)
+	same(`0.34H4m5S`)
 	another(`-0.34h4m5s`, `0 - 0.34h4m5s`)
 	same(`sum_over_tme(m[1h]) / 1h`)
 	same(`sum_over_time(m[3600]) / 3600`)
diff --git a/rollup.go b/rollup.go
index eb73d2f..b049508 100644
--- a/rollup.go
+++ b/rollup.go
@@ -42,6 +42,7 @@ var rollupFuncs = map[string]bool{
 	"lag":                     true,
 	"last_over_time":          true,
 	"lifetime":                true,
+	"mad_over_time":           true,
 	"max_over_time":           true,
 	"min_over_time":           true,
 	"mode_over_time":          true,
diff --git a/transform.go b/transform.go
index 5876c82..2276960 100644
--- a/transform.go
+++ b/transform.go
@@ -74,6 +74,7 @@ var transformFuncs = map[string]bool{
 	"range_first":                true,
 	"range_last":                 true,
 	"range_linear_regression":    true,
+	"range_mad":                  true,
 	"range_max":                  true,
 	"range_min":                  true,
 	"range_normalize":            true,
@@ -81,6 +82,10 @@ var transformFuncs = map[string]bool{
 	"range_stddev":               true,
 	"range_stdvar":               true,
 	"range_sum":                  true,
+	"range_trim_outliers":        true,
+	"range_trim_spikes":          true,
+	"range_trim_zscore":          true,
+	"range_zscore":               true,
 	"remove_resets":              true,
 	"round":                      true,
 	"running_avg":                true,

More details

Full run details

Historical runs