Skip to content
Snippets Groups Projects
Verified Commit 7f7e2394 authored by Volker Schukai's avatar Volker Schukai :alien:
Browse files

fix: update libs

parent ca96f3e3
Branches
Tags 0.4.17
No related merge requests found
Showing
with 3159 additions and 100 deletions
......@@ -73,6 +73,8 @@ const (
tabWidthKey
underlineSpacesKey
strikethroughSpacesKey
transformKey
)
// A set of properties.
......@@ -225,6 +227,8 @@ func (s Style) Render(strs ...string) string {
// Do we need to style spaces separately?
useSpaceStyler = underlineSpaces || strikethroughSpaces
transform = s.getAsTransform(transformKey)
)
if len(s.rules) == 0 {
......@@ -401,6 +405,10 @@ func (s Style) Render(strs ...string) string {
str = strings.Join(lines[:min(maxHeight, len(lines))], "\n")
}
if transform != nil {
return transform(str)
}
return str
}
......@@ -456,36 +464,23 @@ func (s Style) applyMargins(str string, inline bool) string {
// Apply left padding.
func padLeft(str string, n int, style *termenv.Style) string {
if n == 0 {
return str
}
sp := strings.Repeat(" ", n)
if style != nil {
sp = style.Styled(sp)
}
b := strings.Builder{}
l := strings.Split(str, "\n")
for i := range l {
b.WriteString(sp)
b.WriteString(l[i])
if i != len(l)-1 {
b.WriteRune('\n')
}
}
return b.String()
return pad(str, -n, style)
}
// Apply right padding.
func padRight(str string, n int, style *termenv.Style) string {
if n == 0 || str == "" {
return pad(str, n, style)
}
// pad adds padding to either the left or right side of a string.
// Positive values add to the right side while negative values
// add to the left side.
func pad(str string, n int, style *termenv.Style) string {
if n == 0 {
return str
}
sp := strings.Repeat(" ", n)
sp := strings.Repeat(" ", abs(n))
if style != nil {
sp = style.Styled(sp)
}
......@@ -494,8 +489,17 @@ func padRight(str string, n int, style *termenv.Style) string {
l := strings.Split(str, "\n")
for i := range l {
switch {
// pad right
case n > 0:
b.WriteString(l[i])
b.WriteString(sp)
// pad left
default:
b.WriteString(sp)
b.WriteString(l[i])
}
if i != len(l)-1 {
b.WriteRune('\n')
}
......@@ -517,3 +521,11 @@ func min(a, b int) int {
}
return b
}
func abs(a int) int {
if a < 0 {
return -a
}
return a
}
......@@ -305,6 +305,12 @@ func (s Style) UnsetStrikethroughSpaces() Style {
return s
}
// UnsetTransform removes the value set by Transform.
func (s Style) UnsetTransform() Style {
delete(s.rules, transformKey)
return s
}
// UnsetString sets the underlying string value to the empty string.
func (s Style) UnsetString() Style {
s.value = ""
......
......@@ -210,18 +210,18 @@ defined at a global level in [styles.go](./styles.go).
```go
// Override the default error level style.
log.ErrorLevelStyle = lipgloss.NewStyle().
styles := log.DefaultStyles()
styles.Levels[log.ErrorLevel] = lipgloss.NewStyle().
SetString("ERROR!!").
Padding(0, 1, 0, 1).
Background(lipgloss.AdaptiveColor{
Light: "203",
Dark: "204",
}).
Background(lipgloss.Color("204")).
Foreground(lipgloss.Color("0"))
// Add a custom style for key `err`
log.KeyStyles["err"] = lipgloss.NewStyle().Foreground(lipgloss.Color("204"))
log.ValueStyles["err"] = lipgloss.NewStyle().Bold(true)
log.Error("Whoops!", "err", "kitchen on fire")
styles.Keys["err"] = lipgloss.NewStyle().Foreground(lipgloss.Color("204"))
styles.Values["err"] = lipgloss.NewStyle().Bold(true)
logger := log.New(os.Stderr)
logger.SetStyles(styles)
logger.Error("Whoops!", "err", "kitchen on fire")
```
<picture>
......@@ -288,7 +288,7 @@ Skip caller frames in helper functions. Similar to what you can do with
`testing.TB().Helper()`.
```go
function startOven(degree int) {
func startOven(degree int) {
log.Helper()
log.Info("Starting oven", "degree", degree)
}
......
......@@ -14,7 +14,7 @@ func FromContext(ctx context.Context) *Logger {
if logger, ok := ctx.Value(ContextKey).(*Logger); ok {
return logger
}
return defaultLogger
return Default()
}
type contextKey struct{ string }
......
......@@ -15,10 +15,8 @@ import (
"github.com/muesli/termenv"
)
var (
// ErrMissingValue is returned when a key is missing a value.
ErrMissingValue = fmt.Errorf("missing value")
)
var ErrMissingValue = fmt.Errorf("missing value")
// LoggerOption is an option for a logger.
type LoggerOption = func(*Logger)
......@@ -49,7 +47,13 @@ type Logger struct {
styles *Styles
}
func (l *Logger) log(level Level, msg interface{}, keyvals ...interface{}) {
// Logf logs a message with formatting.
func (l *Logger) Logf(level Level, format string, args ...interface{}) {
l.Log(level, fmt.Sprintf(format, args...))
}
// Log logs the given message with the given keyvals for the given level.
func (l *Logger) Log(level Level, msg interface{}, keyvals ...interface{}) {
if atomic.LoadUint32(&l.isDiscard) != 0 {
return
}
......@@ -74,7 +78,7 @@ func (l *Logger) log(level Level, msg interface{}, keyvals ...interface{}) {
}
}
}
l.handle(level, l.timeFunc(), []runtime.Frame{frame}, msg, keyvals...)
l.handle(level, l.timeFunc(time.Now()), []runtime.Frame{frame}, msg, keyvals...)
}
func (l *Logger) handle(level Level, ts time.Time, frames []runtime.Frame, msg interface{}, keyvals ...interface{}) {
......@@ -83,7 +87,8 @@ func (l *Logger) handle(level Level, ts time.Time, frames []runtime.Frame, msg i
kvs = append(kvs, TimestampKey, ts)
}
if level != noLevel {
_, ok := l.styles.Levels[level]
if ok {
kvs = append(kvs, LevelKey, level)
}
......@@ -343,62 +348,62 @@ func (l *Logger) WithPrefix(prefix string) *Logger {
// Debug prints a debug message.
func (l *Logger) Debug(msg interface{}, keyvals ...interface{}) {
l.log(DebugLevel, msg, keyvals...)
l.Log(DebugLevel, msg, keyvals...)
}
// Info prints an info message.
func (l *Logger) Info(msg interface{}, keyvals ...interface{}) {
l.log(InfoLevel, msg, keyvals...)
l.Log(InfoLevel, msg, keyvals...)
}
// Warn prints a warning message.
func (l *Logger) Warn(msg interface{}, keyvals ...interface{}) {
l.log(WarnLevel, msg, keyvals...)
l.Log(WarnLevel, msg, keyvals...)
}
// Error prints an error message.
func (l *Logger) Error(msg interface{}, keyvals ...interface{}) {
l.log(ErrorLevel, msg, keyvals...)
l.Log(ErrorLevel, msg, keyvals...)
}
// Fatal prints a fatal message and exits.
func (l *Logger) Fatal(msg interface{}, keyvals ...interface{}) {
l.log(FatalLevel, msg, keyvals...)
l.Log(FatalLevel, msg, keyvals...)
os.Exit(1)
}
// Print prints a message with no level.
func (l *Logger) Print(msg interface{}, keyvals ...interface{}) {
l.log(noLevel, msg, keyvals...)
l.Log(noLevel, msg, keyvals...)
}
// Debugf prints a debug message with formatting.
func (l *Logger) Debugf(format string, args ...interface{}) {
l.log(DebugLevel, fmt.Sprintf(format, args...))
l.Log(DebugLevel, fmt.Sprintf(format, args...))
}
// Infof prints an info message with formatting.
func (l *Logger) Infof(format string, args ...interface{}) {
l.log(InfoLevel, fmt.Sprintf(format, args...))
l.Log(InfoLevel, fmt.Sprintf(format, args...))
}
// Warnf prints a warning message with formatting.
func (l *Logger) Warnf(format string, args ...interface{}) {
l.log(WarnLevel, fmt.Sprintf(format, args...))
l.Log(WarnLevel, fmt.Sprintf(format, args...))
}
// Errorf prints an error message with formatting.
func (l *Logger) Errorf(format string, args ...interface{}) {
l.log(ErrorLevel, fmt.Sprintf(format, args...))
l.Log(ErrorLevel, fmt.Sprintf(format, args...))
}
// Fatalf prints a fatal message with formatting and exits.
func (l *Logger) Fatalf(format string, args ...interface{}) {
l.log(FatalLevel, fmt.Sprintf(format, args...))
l.Log(FatalLevel, fmt.Sprintf(format, args...))
os.Exit(1)
}
// Printf prints a message with no level and formatting.
func (l *Logger) Printf(format string, args ...interface{}) {
l.log(noLevel, fmt.Sprintf(format, args...))
l.Log(noLevel, fmt.Sprintf(format, args...))
}
......@@ -5,10 +5,9 @@ package log
import (
"context"
"log/slog"
"runtime"
"sync/atomic"
"log/slog"
)
// Enabled reports whether the logger is enabled for the given level.
......@@ -21,7 +20,11 @@ func (l *Logger) Enabled(_ context.Context, level slog.Level) bool {
// Handle handles the Record. It will only be called if Enabled returns true.
//
// Implements slog.Handler.
func (l *Logger) Handle(_ context.Context, record slog.Record) error {
func (l *Logger) Handle(ctx context.Context, record slog.Record) error {
if !l.Enabled(ctx, record.Level) {
return nil
}
fields := make([]interface{}, 0, record.NumAttrs()*2)
record.Attrs(func(a slog.Attr) bool {
fields = append(fields, a.Key, a.Value.String())
......@@ -30,7 +33,7 @@ func (l *Logger) Handle(_ context.Context, record slog.Record) error {
// Get the caller frame using the record's PC.
frames := runtime.CallersFrames([]uintptr{record.PC})
frame, _ := frames.Next()
l.handle(fromSlogLevel[record.Level], record.Time, []runtime.Frame{frame}, record.Message, fields...)
l.handle(fromSlogLevel[record.Level], l.timeFunc(record.Time), []runtime.Frame{frame}, record.Message, fields...)
return nil
}
......
......@@ -30,7 +30,7 @@ func (l *Logger) Handle(_ context.Context, record slog.Record) error {
// Get the caller frame using the record's PC.
frames := runtime.CallersFrames([]uintptr{record.PC})
frame, _ := frames.Next()
l.handle(fromSlogLevel[record.Level], record.Time, []runtime.Frame{frame}, record.Message, fields...)
l.handle(fromSlogLevel[record.Level], l.timeFunc(record.Time), []runtime.Frame{frame}, record.Message, fields...)
return nil
}
......
......@@ -9,7 +9,7 @@ import (
const DefaultTimeFormat = "2006/01/02 15:04:05"
// TimeFunction is a function that returns a time.Time.
type TimeFunction = func() time.Time
type TimeFunction = func(time.Time) time.Time
// NowUTC is a convenient function that returns the
// current time in UTC timezone.
......@@ -18,8 +18,8 @@ type TimeFunction = func() time.Time
// For example:
//
// log.SetTimeFunction(log.NowUTC)
func NowUTC() time.Time {
return time.Now().UTC()
func NowUTC(t time.Time) time.Time {
return t.UTC()
}
// CallerFormatter is the caller formatter.
......@@ -50,7 +50,7 @@ type Options struct {
ReportTimestamp bool
// ReportCaller is whether the logger should report the caller location. The default is false.
ReportCaller bool
// CallerFormatter is the caller format for the logger. The default is CallerShort.
// CallerFormatter is the caller format for the logger. The default is ShortCallerFormatter.
CallerFormatter CallerFormatter
// CallerOffset is the caller format for the logger. The default is 0.
CallerOffset int
......
......@@ -17,11 +17,19 @@ var (
registry = sync.Map{}
// defaultLogger is the default global logger instance.
defaultLogger = NewWithOptions(os.Stderr, Options{ReportTimestamp: true})
defaultLoggerOnce sync.Once
defaultLogger *Logger
)
// Default returns the default logger. The default logger comes with timestamp enabled.
func Default() *Logger {
defaultLoggerOnce.Do(func() {
if defaultLogger != nil {
// already set via SetDefault.
return
}
defaultLogger = NewWithOptions(os.Stderr, Options{ReportTimestamp: true})
})
return defaultLogger
}
......@@ -62,7 +70,7 @@ func NewWithOptions(w io.Writer, o Options) *Logger {
}
if l.timeFunc == nil {
l.timeFunc = time.Now
l.timeFunc = func(t time.Time) time.Time { return t }
}
if l.timeFormat == "" {
......@@ -74,155 +82,165 @@ func NewWithOptions(w io.Writer, o Options) *Logger {
// SetReportTimestamp sets whether to report timestamp for the default logger.
func SetReportTimestamp(report bool) {
defaultLogger.SetReportTimestamp(report)
Default().SetReportTimestamp(report)
}
// SetReportCaller sets whether to report caller location for the default logger.
func SetReportCaller(report bool) {
defaultLogger.SetReportCaller(report)
Default().SetReportCaller(report)
}
// SetLevel sets the level for the default logger.
func SetLevel(level Level) {
defaultLogger.SetLevel(level)
Default().SetLevel(level)
}
// GetLevel returns the level for the default logger.
func GetLevel() Level {
return defaultLogger.GetLevel()
return Default().GetLevel()
}
// SetTimeFormat sets the time format for the default logger.
func SetTimeFormat(format string) {
defaultLogger.SetTimeFormat(format)
Default().SetTimeFormat(format)
}
// SetTimeFunction sets the time function for the default logger.
func SetTimeFunction(f TimeFunction) {
defaultLogger.SetTimeFunction(f)
Default().SetTimeFunction(f)
}
// SetOutput sets the output for the default logger.
func SetOutput(w io.Writer) {
defaultLogger.SetOutput(w)
Default().SetOutput(w)
}
// SetFormatter sets the formatter for the default logger.
func SetFormatter(f Formatter) {
defaultLogger.SetFormatter(f)
Default().SetFormatter(f)
}
// SetCallerFormatter sets the caller formatter for the default logger.
func SetCallerFormatter(f CallerFormatter) {
defaultLogger.SetCallerFormatter(f)
Default().SetCallerFormatter(f)
}
// SetCallerOffset sets the caller offset for the default logger.
func SetCallerOffset(offset int) {
defaultLogger.SetCallerOffset(offset)
Default().SetCallerOffset(offset)
}
// SetPrefix sets the prefix for the default logger.
func SetPrefix(prefix string) {
defaultLogger.SetPrefix(prefix)
Default().SetPrefix(prefix)
}
// SetColorProfile force sets the underlying Lip Gloss renderer color profile
// for the TextFormatter.
func SetColorProfile(profile termenv.Profile) {
defaultLogger.SetColorProfile(profile)
Default().SetColorProfile(profile)
}
// SetStyles sets the logger styles for the TextFormatter.
func SetStyles(s *Styles) {
defaultLogger.SetStyles(s)
Default().SetStyles(s)
}
// GetPrefix returns the prefix for the default logger.
func GetPrefix() string {
return defaultLogger.GetPrefix()
return Default().GetPrefix()
}
// With returns a new logger with the given keyvals.
func With(keyvals ...interface{}) *Logger {
return defaultLogger.With(keyvals...)
return Default().With(keyvals...)
}
// WithPrefix returns a new logger with the given prefix.
func WithPrefix(prefix string) *Logger {
return defaultLogger.WithPrefix(prefix)
return Default().WithPrefix(prefix)
}
// Helper marks the calling function as a helper
// and skips it for source location information.
// It's the equivalent of testing.TB.Helper().
func Helper() {
defaultLogger.helper(1)
Default().helper(1)
}
// Log logs a message with the given level.
func Log(level Level, msg interface{}, keyvals ...interface{}) {
Default().Log(level, msg, keyvals...)
}
// Debug logs a debug message.
func Debug(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(DebugLevel, msg, keyvals...)
Default().Log(DebugLevel, msg, keyvals...)
}
// Info logs an info message.
func Info(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(InfoLevel, msg, keyvals...)
Default().Log(InfoLevel, msg, keyvals...)
}
// Warn logs a warning message.
func Warn(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(WarnLevel, msg, keyvals...)
Default().Log(WarnLevel, msg, keyvals...)
}
// Error logs an error message.
func Error(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(ErrorLevel, msg, keyvals...)
Default().Log(ErrorLevel, msg, keyvals...)
}
// Fatal logs a fatal message and exit.
func Fatal(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(FatalLevel, msg, keyvals...)
Default().Log(FatalLevel, msg, keyvals...)
os.Exit(1)
}
// Print logs a message with no level.
func Print(msg interface{}, keyvals ...interface{}) {
defaultLogger.log(noLevel, msg, keyvals...)
Default().Log(noLevel, msg, keyvals...)
}
// Logf logs a message with formatting and level.
func Logf(level Level, format string, args ...interface{}) {
Default().Logf(level, format, args...)
}
// Debugf logs a debug message with formatting.
func Debugf(format string, args ...interface{}) {
defaultLogger.log(DebugLevel, fmt.Sprintf(format, args...))
Default().Log(DebugLevel, fmt.Sprintf(format, args...))
}
// Infof logs an info message with formatting.
func Infof(format string, args ...interface{}) {
defaultLogger.log(InfoLevel, fmt.Sprintf(format, args...))
Default().Log(InfoLevel, fmt.Sprintf(format, args...))
}
// Warnf logs a warning message with formatting.
func Warnf(format string, args ...interface{}) {
defaultLogger.log(WarnLevel, fmt.Sprintf(format, args...))
Default().Log(WarnLevel, fmt.Sprintf(format, args...))
}
// Errorf logs an error message with formatting.
func Errorf(format string, args ...interface{}) {
defaultLogger.log(ErrorLevel, fmt.Sprintf(format, args...))
Default().Log(ErrorLevel, fmt.Sprintf(format, args...))
}
// Fatalf logs a fatal message with formatting and exit.
func Fatalf(format string, args ...interface{}) {
defaultLogger.log(FatalLevel, fmt.Sprintf(format, args...))
Default().Log(FatalLevel, fmt.Sprintf(format, args...))
os.Exit(1)
}
// Printf logs a message with formatting and no level.
func Printf(format string, args ...interface{}) {
defaultLogger.log(noLevel, fmt.Sprintf(format, args...))
Default().Log(noLevel, fmt.Sprintf(format, args...))
}
// StandardLog returns a standard logger from the default logger.
func StandardLog(opts ...StandardLogOptions) *log.Logger {
return defaultLogger.StandardLog(opts...)
return Default().StandardLog(opts...)
}
......@@ -183,9 +183,12 @@ func (l *Logger) textFormatter(keyvals ...interface{}) {
case LevelKey:
if level, ok := keyvals[i+1].(Level); ok {
var lvl string
if lvlStyle, ok := st.Levels[level]; ok {
lvl = lvlStyle.Renderer(l.re).String()
lvlStyle, ok := st.Levels[level]
if !ok {
continue
}
lvl = lvlStyle.Renderer(l.re).String()
if lvl != "" {
writeSpace(&l.b, firstKey)
l.b.WriteString(lvl)
......
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
*.out
.DS_Store
language: go
arch:
- AMD64
- ppc64le
go:
- 1.9
- tip
============
These pieces of code were ported from dotnet/corefx:
syntax/charclass.go (from RegexCharClass.cs): ported to use the built-in Go unicode classes. Canonicalize is
a direct port, but most of the other code required large changes because the C# implementation
used a string to represent the CharSet data structure and I cleaned that up in my implementation.
syntax/code.go (from RegexCode.cs): ported literally with various cleanups and layout to make it more Go-ish.
syntax/escape.go (from RegexParser.cs): ported Escape method and added some optimizations. Unescape is inspired by
the C# implementation but couldn't be directly ported because of the lack of do-while syntax in Go.
syntax/parser.go (from RegexpParser.cs and RegexOptions.cs): ported parser struct and associated methods as
literally as possible. Several language differences required changes. E.g. lack pre/post-fix increments as
expressions, lack of do-while loops, lack of overloads, etc.
syntax/prefix.go (from RegexFCD.cs and RegexBoyerMoore.cs): ported as literally as possible and added support
for unicode chars that are longer than the 16-bit char in C# for the 32-bit rune in Go.
syntax/replacerdata.go (from RegexReplacement.cs): conceptually ported and re-organized to handle differences
in charclass implementation, and fix odd code layout between RegexParser.cs, Regex.cs, and RegexReplacement.cs.
syntax/tree.go (from RegexTree.cs and RegexNode.cs): ported literally as possible.
syntax/writer.go (from RegexWriter.cs): ported literally with minor changes to make it more Go-ish.
match.go (from RegexMatch.cs): ported, simplified, and changed to handle Go's lack of inheritence.
regexp.go (from Regex.cs and RegexOptions.cs): conceptually serves the same "starting point", but is simplified
and changed to handle differences in C# strings and Go strings/runes.
replace.go (from RegexReplacement.cs): ported closely and then cleaned up to combine the MatchEvaluator and
simple string replace implementations.
runner.go (from RegexRunner.cs): ported literally as possible.
regexp_test.go (from CaptureTests.cs and GroupNamesAndNumbers.cs): conceptually ported, but the code was
manually structured like Go tests.
replace_test.go (from RegexReplaceStringTest0.cs): conceptually ported
rtl_test.go (from RightToLeft.cs): conceptually ported
---
dotnet/corefx was released under this license:
The MIT License (MIT)
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
============
These pieces of code are copied from the Go framework:
- The overall directory structure of regexp2 was inspired by the Go runtime regexp package.
- The optimization in the escape method of syntax/escape.go is from the Go runtime QuoteMeta() func in regexp/regexp.go
- The method signatures in regexp.go are designed to match the Go framework regexp methods closely
- func regexp2.MustCompile and func quote are almost identifical to the regexp package versions
- BenchmarkMatch* and TestProgramTooLong* funcs in regexp_performance_test.go were copied from the framework
regexp/exec_test.go
---
The Go framework was released under this license:
Copyright (c) 2012 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
============
Some test data were gathered from the Mono project.
regexp_mono_test.go: ported from https://github.com/mono/mono/blob/master/mcs/class/System/Test/System.Text.RegularExpressions/PerlTrials.cs
---
Mono tests released under this license:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The MIT License (MIT)
Copyright (c) Doug Clark
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# regexp2 - full featured regular expressions for Go
Regexp2 is a feature-rich RegExp engine for Go. It doesn't have constant time guarantees like the built-in `regexp` package, but it allows backtracking and is compatible with Perl5 and .NET. You'll likely be better off with the RE2 engine from the `regexp` package and should only use this if you need to write very complex patterns or require compatibility with .NET.
## Basis of the engine
The engine is ported from the .NET framework's System.Text.RegularExpressions.Regex engine. That engine was open sourced in 2015 under the MIT license. There are some fundamental differences between .NET strings and Go strings that required a bit of borrowing from the Go framework regex engine as well. I cleaned up a couple of the dirtier bits during the port (regexcharclass.cs was terrible), but the parse tree, code emmitted, and therefore patterns matched should be identical.
## Installing
This is a go-gettable library, so install is easy:
go get github.com/dlclark/regexp2/...
## Usage
Usage is similar to the Go `regexp` package. Just like in `regexp`, you start by converting a regex into a state machine via the `Compile` or `MustCompile` methods. They ultimately do the same thing, but `MustCompile` will panic if the regex is invalid. You can then use the provided `Regexp` struct to find matches repeatedly. A `Regexp` struct is safe to use across goroutines.
```go
re := regexp2.MustCompile(`Your pattern`, 0)
if isMatch, _ := re.MatchString(`Something to match`); isMatch {
//do something
}
```
The only error that the `*Match*` methods *should* return is a Timeout if you set the `re.MatchTimeout` field. Any other error is a bug in the `regexp2` package. If you need more details about capture groups in a match then use the `FindStringMatch` method, like so:
```go
if m, _ := re.FindStringMatch(`Something to match`); m != nil {
// the whole match is always group 0
fmt.Printf("Group 0: %v\n", m.String())
// you can get all the groups too
gps := m.Groups()
// a group can be captured multiple times, so each cap is separately addressable
fmt.Printf("Group 1, first capture", gps[1].Captures[0].String())
fmt.Printf("Group 1, second capture", gps[1].Captures[1].String())
}
```
Group 0 is embedded in the Match. Group 0 is an automatically-assigned group that encompasses the whole pattern. This means that `m.String()` is the same as `m.Group.String()` and `m.Groups()[0].String()`
The __last__ capture is embedded in each group, so `g.String()` will return the same thing as `g.Capture.String()` and `g.Captures[len(g.Captures)-1].String()`.
If you want to find multiple matches from a single input string you should use the `FindNextMatch` method. For example, to implement a function similar to `regexp.FindAllString`:
```go
func regexp2FindAllString(re *regexp2.Regexp, s string) []string {
var matches []string
m, _ := re.FindStringMatch(s)
for m != nil {
matches = append(matches, m.String())
m, _ = re.FindNextMatch(m)
}
return matches
}
```
`FindNextMatch` is optmized so that it re-uses the underlying string/rune slice.
The internals of `regexp2` always operate on `[]rune` so `Index` and `Length` data in a `Match` always reference a position in `rune`s rather than `byte`s (even if the input was given as a string). This is a dramatic difference between `regexp` and `regexp2`. It's advisable to use the provided `String()` methods to avoid having to work with indices.
## Compare `regexp` and `regexp2`
| Category | regexp | regexp2 |
| --- | --- | --- |
| Catastrophic backtracking possible | no, constant execution time guarantees | yes, if your pattern is at risk you can use the `re.MatchTimeout` field |
| Python-style capture groups `(?P<name>re)` | yes | no (yes in RE2 compat mode) |
| .NET-style capture groups `(?<name>re)` or `(?'name're)` | no | yes |
| comments `(?#comment)` | no | yes |
| branch numbering reset `(?\|a\|b)` | no | no |
| possessive match `(?>re)` | no | yes |
| positive lookahead `(?=re)` | no | yes |
| negative lookahead `(?!re)` | no | yes |
| positive lookbehind `(?<=re)` | no | yes |
| negative lookbehind `(?<!re)` | no | yes |
| back reference `\1` | no | yes |
| named back reference `\k'name'` | no | yes |
| named ascii character class `[[:foo:]]`| yes | no (yes in RE2 compat mode) |
| conditionals `(?(expr)yes\|no)` | no | yes |
## RE2 compatibility mode
The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2. Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:
* add support for named ascii character classes (e.g. `[[:foo:]]`)
* add support for python-style capture groups (e.g. `(P<name>re)`)
* change singleline behavior for `$` to only match end of string (like RE2) (see [#24](https://github.com/dlclark/regexp2/issues/24))
* change the character classes `\d` `\s` and `\w` to match the same characters as RE2. NOTE: if you also use the `ECMAScript` option then this will change the `\s` character class to match ECMAScript instead of RE2. ECMAScript allows more whitespace characters in `\s` than RE2 (but still fewer than the the default behavior).
* allow character escape sequences to have defaults. For example, by default `\_` isn't a known character escape and will fail to compile, but in RE2 mode it will match the literal character `_`
```go
re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
if isMatch, _ := re.MatchString(`Something to match`); isMatch {
//do something
}
```
This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
## Catastrophic Backtracking and Timeouts
`regexp2` supports features that can lead to catastrophic backtracking.
`Regexp.MatchTimeout` can be set to to limit the impact of such behavior; the
match will fail with an error after approximately MatchTimeout. No timeout
checks are done by default.
Timeout checking is not free. The current timeout checking implementation starts
a background worker that updates a clock value approximately once every 100
milliseconds. The matching code compares this value against the precomputed
deadline for the match. The performance impact is as follows.
1. A match with a timeout runs almost as fast as a match without a timeout.
2. If any live matches have a timeout, there will be a background CPU load
(`~0.15%` currently on a modern machine). This load will remain constant
regardless of the number of matches done including matches done in parallel.
3. If no live matches are using a timeout, the background load will remain
until the longest deadline (match timeout + the time when the match started)
is reached. E.g., if you set a timeout of one minute the load will persist
for approximately a minute even if the match finishes quickly.
See [PR #58](https://github.com/dlclark/regexp2/pull/58) for more details and
alternatives considered.
## Goroutine leak error
If you're using a library during unit tests (e.g. https://github.com/uber-go/goleak) that validates all goroutines are exited then you'll likely get an error if you or any of your dependencies use regex's with a MatchTimeout.
To remedy the problem you'll need to tell the unit test to wait until the backgroup timeout goroutine is exited.
```go
func TestSomething(t *testing.T) {
defer goleak.VerifyNone(t)
defer regexp2.StopTimeoutClock()
// ... test
}
//or
func TestMain(m *testing.M) {
// setup
// ...
// run
m.Run()
//tear down
regexp2.StopTimeoutClock()
goleak.VerifyNone(t)
}
```
This will add ~100ms runtime to each test (or TestMain). If that's too much time you can set the clock cycle rate of the timeout goroutine in an init function in a test file. `regexp2.SetTimeoutCheckPeriod` isn't threadsafe so it must be setup before starting any regex's with Timeouts.
```go
func init() {
//speed up testing by making the timeout clock 1ms
regexp2.SetTimeoutCheckPeriod(time.Millisecond)
}
```
## ECMAScript compatibility mode
In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.
Additionally a Unicode mode is provided which allows parsing of `\u{CodePoint}` syntax that is only when both are provided.
## Library features that I'm still working on
- Regex split
## Potential bugs
I've run a battery of tests against regexp2 from various sources and found the debug output matches the .NET engine, but .NET and Go handle strings very differently. I've attempted to handle these differences, but most of my testing deals with basic ASCII with a little bit of multi-byte Unicode. There's a chance that there are bugs in the string handling related to character sets with supplementary Unicode chars. Right-to-Left support is coded, but not well tested either.
## Find a bug?
I'm open to new issues and pull requests with tests if you find something odd!
package regexp2
import (
"sync"
"sync/atomic"
"time"
)
// fasttime holds a time value (ticks since clock initialization)
type fasttime int64
// fastclock provides a fast clock implementation.
//
// A background goroutine periodically stores the current time
// into an atomic variable.
//
// A deadline can be quickly checked for expiration by comparing
// its value to the clock stored in the atomic variable.
//
// The goroutine automatically stops once clockEnd is reached.
// (clockEnd covers the largest deadline seen so far + some
// extra time). This ensures that if regexp2 with timeouts
// stops being used we will stop background work.
type fastclock struct {
// instances of atomicTime must be at the start of the struct (or at least 64-bit aligned)
// otherwise 32-bit architectures will panic
current atomicTime // Current time (approximate)
clockEnd atomicTime // When clock updater is supposed to stop (>= any existing deadline)
// current and clockEnd can be read via atomic loads.
// Reads and writes of other fields require mu to be held.
mu sync.Mutex
start time.Time // Time corresponding to fasttime(0)
running bool // Is a clock updater running?
}
var fast fastclock
// reached returns true if current time is at or past t.
func (t fasttime) reached() bool {
return fast.current.read() >= t
}
// makeDeadline returns a time that is approximately time.Now().Add(d)
func makeDeadline(d time.Duration) fasttime {
// Increase the deadline since the clock we are reading may be
// just about to tick forwards.
end := fast.current.read() + durationToTicks(d+clockPeriod)
// Start or extend clock if necessary.
if end > fast.clockEnd.read() {
extendClock(end)
}
return end
}
// extendClock ensures that clock is live and will run until at least end.
func extendClock(end fasttime) {
fast.mu.Lock()
defer fast.mu.Unlock()
if fast.start.IsZero() {
fast.start = time.Now()
}
// Extend the running time to cover end as well as a bit of slop.
if shutdown := end + durationToTicks(time.Second); shutdown > fast.clockEnd.read() {
fast.clockEnd.write(shutdown)
}
// Start clock if necessary
if !fast.running {
fast.running = true
go runClock()
}
}
// stop the timeout clock in the background
// should only used for unit tests to abandon the background goroutine
func stopClock() {
fast.mu.Lock()
if fast.running {
fast.clockEnd.write(fasttime(0))
}
fast.mu.Unlock()
// pause until not running
// get and release the lock
isRunning := true
for isRunning {
time.Sleep(clockPeriod / 2)
fast.mu.Lock()
isRunning = fast.running
fast.mu.Unlock()
}
}
func durationToTicks(d time.Duration) fasttime {
// Downscale nanoseconds to approximately a millisecond so that we can avoid
// overflow even if the caller passes in math.MaxInt64.
return fasttime(d) >> 20
}
const DefaultClockPeriod = 100 * time.Millisecond
// clockPeriod is the approximate interval between updates of approximateClock.
var clockPeriod = DefaultClockPeriod
func runClock() {
fast.mu.Lock()
defer fast.mu.Unlock()
for fast.current.read() <= fast.clockEnd.read() {
// Unlock while sleeping.
fast.mu.Unlock()
time.Sleep(clockPeriod)
fast.mu.Lock()
newTime := durationToTicks(time.Since(fast.start))
fast.current.write(newTime)
}
fast.running = false
}
type atomicTime struct{ v int64 } // Should change to atomic.Int64 when we can use go 1.19
func (t *atomicTime) read() fasttime { return fasttime(atomic.LoadInt64(&t.v)) }
func (t *atomicTime) write(v fasttime) { atomic.StoreInt64(&t.v, int64(v)) }
package regexp2
import (
"bytes"
"fmt"
)
// Match is a single regex result match that contains groups and repeated captures
// -Groups
// -Capture
type Match struct {
Group //embeded group 0
regex *Regexp
otherGroups []Group
// input to the match
textpos int
textstart int
capcount int
caps []int
sparseCaps map[int]int
// output from the match
matches [][]int
matchcount []int
// whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().
balancing bool
}
// Group is an explicit or implit (group 0) matched group within the pattern
type Group struct {
Capture // the last capture of this group is embeded for ease of use
Name string // group name
Captures []Capture // captures of this group
}
// Capture is a single capture of text within the larger original string
type Capture struct {
// the original string
text []rune
// the position in the original string where the first character of
// captured substring was found.
Index int
// the length of the captured substring.
Length int
}
// String returns the captured text as a String
func (c *Capture) String() string {
return string(c.text[c.Index : c.Index+c.Length])
}
// Runes returns the captured text as a rune slice
func (c *Capture) Runes() []rune {
return c.text[c.Index : c.Index+c.Length]
}
func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
m := Match{
regex: regex,
matchcount: make([]int, capcount),
matches: make([][]int, capcount),
textstart: startpos,
balancing: false,
}
m.Name = "0"
m.text = text
m.matches[0] = make([]int, 2)
return &m
}
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
m := newMatch(regex, capcount, text, startpos)
m.sparseCaps = caps
return m
}
func (m *Match) reset(text []rune, textstart int) {
m.text = text
m.textstart = textstart
for i := 0; i < len(m.matchcount); i++ {
m.matchcount[i] = 0
}
m.balancing = false
}
func (m *Match) tidy(textpos int) {
interval := m.matches[0]
m.Index = interval[0]
m.Length = interval[1]
m.textpos = textpos
m.capcount = m.matchcount[0]
//copy our root capture to the list
m.Group.Captures = []Capture{m.Group.Capture}
if m.balancing {
// The idea here is that we want to compact all of our unbalanced captures. To do that we
// use j basically as a count of how many unbalanced captures we have at any given time
// (really j is an index, but j/2 is the count). First we skip past all of the real captures
// until we find a balance captures. Then we check each subsequent entry. If it's a balance
// capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
// it down to the last free position.
for cap := 0; cap < len(m.matchcount); cap++ {
limit := m.matchcount[cap] * 2
matcharray := m.matches[cap]
var i, j int
for i = 0; i < limit; i++ {
if matcharray[i] < 0 {
break
}
}
for j = i; i < limit; i++ {
if matcharray[i] < 0 {
// skip negative values
j--
} else {
// but if we find something positive (an actual capture), copy it back to the last
// unbalanced position.
if i != j {
matcharray[j] = matcharray[i]
}
j++
}
}
m.matchcount[cap] = j / 2
}
m.balancing = false
}
}
// isMatched tells if a group was matched by capnum
func (m *Match) isMatched(cap int) bool {
return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
}
// matchIndex returns the index of the last specified matched group by capnum
func (m *Match) matchIndex(cap int) int {
i := m.matches[cap][m.matchcount[cap]*2-2]
if i >= 0 {
return i
}
return m.matches[cap][-3-i]
}
// matchLength returns the length of the last specified matched group by capnum
func (m *Match) matchLength(cap int) int {
i := m.matches[cap][m.matchcount[cap]*2-1]
if i >= 0 {
return i
}
return m.matches[cap][-3-i]
}
// Nonpublic builder: add a capture to the group specified by "c"
func (m *Match) addMatch(c, start, l int) {
if m.matches[c] == nil {
m.matches[c] = make([]int, 2)
}
capcount := m.matchcount[c]
if capcount*2+2 > len(m.matches[c]) {
oldmatches := m.matches[c]
newmatches := make([]int, capcount*8)
copy(newmatches, oldmatches[:capcount*2])
m.matches[c] = newmatches
}
m.matches[c][capcount*2] = start
m.matches[c][capcount*2+1] = l
m.matchcount[c] = capcount + 1
//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
}
// Nonpublic builder: Add a capture to balance the specified group. This is used by the
// balanced match construct. (?<foo-foo2>...)
//
// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
// However, since we have backtracking, we need to keep track of everything.
func (m *Match) balanceMatch(c int) {
m.balancing = true
// we'll look at the last capture first
capcount := m.matchcount[c]
target := capcount*2 - 2
// first see if it is negative, and therefore is a reference to the next available
// capture group for balancing. If it is, we'll reset target to point to that capture.
if m.matches[c][target] < 0 {
target = -3 - m.matches[c][target]
}
// move back to the previous capture
target -= 2
// if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
if target >= 0 && m.matches[c][target] < 0 {
m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
} else {
m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
}
}
// Nonpublic builder: removes a group match by capnum
func (m *Match) removeMatch(c int) {
m.matchcount[c]--
}
// GroupCount returns the number of groups this match has matched
func (m *Match) GroupCount() int {
return len(m.matchcount)
}
// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
func (m *Match) GroupByName(name string) *Group {
num := m.regex.GroupNumberFromName(name)
if num < 0 {
return nil
}
return m.GroupByNumber(num)
}
// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
func (m *Match) GroupByNumber(num int) *Group {
// check our sparse map
if m.sparseCaps != nil {
if newNum, ok := m.sparseCaps[num]; ok {
num = newNum
}
}
if num >= len(m.matchcount) || num < 0 {
return nil
}
if num == 0 {
return &m.Group
}
m.populateOtherGroups()
return &m.otherGroups[num-1]
}
// Groups returns all the capture groups, starting with group 0 (the full match)
func (m *Match) Groups() []Group {
m.populateOtherGroups()
g := make([]Group, len(m.otherGroups)+1)
g[0] = m.Group
copy(g[1:], m.otherGroups)
return g
}
func (m *Match) populateOtherGroups() {
// Construct all the Group objects first time called
if m.otherGroups == nil {
m.otherGroups = make([]Group, len(m.matchcount)-1)
for i := 0; i < len(m.otherGroups); i++ {
m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
}
}
}
func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
c := m.matchcount[groupnum]
if c == 0 {
return
}
matches := m.matches[groupnum]
index := matches[(c-1)*2]
last := index + matches[(c*2)-1]
for ; index < last; index++ {
buf.WriteRune(m.text[index])
}
}
func newGroup(name string, text []rune, caps []int, capcount int) Group {
g := Group{}
g.text = text
if capcount > 0 {
g.Index = caps[(capcount-1)*2]
g.Length = caps[(capcount*2)-1]
}
g.Name = name
g.Captures = make([]Capture, capcount)
for i := 0; i < capcount; i++ {
g.Captures[i] = Capture{
text: text,
Index: caps[i*2],
Length: caps[i*2+1],
}
}
//log.Printf("newGroup! capcount %v, %+v", capcount, g)
return g
}
func (m *Match) dump() string {
buf := &bytes.Buffer{}
buf.WriteRune('\n')
if len(m.sparseCaps) > 0 {
for k, v := range m.sparseCaps {
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
}
}
for i, g := range m.Groups() {
fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
for _, c := range g.Captures {
fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
}
}
/*
for i := 0; i < len(m.matchcount); i++ {
fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
for j := 0; j < m.matchcount[i]; j++ {
text := ""
if m.matches[i][j*2] >= 0 {
start := m.matches[i][j*2]
text = m.text[start : start+m.matches[i][j*2+1]]
}
fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
}
}
*/
return buf.String()
}
/*
Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
more feature full regex engine behind the scenes.
It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
need to write very complex patterns or require compatibility with .NET.
*/
package regexp2
import (
"errors"
"math"
"strconv"
"sync"
"time"
"github.com/dlclark/regexp2/syntax"
)
var (
// DefaultMatchTimeout used when running regexp matches -- "forever"
DefaultMatchTimeout = time.Duration(math.MaxInt64)
// DefaultUnmarshalOptions used when unmarshaling a regex from text
DefaultUnmarshalOptions = None
)
// Regexp is the representation of a compiled regular expression.
// A Regexp is safe for concurrent use by multiple goroutines.
type Regexp struct {
// A match will time out if it takes (approximately) more than
// MatchTimeout. This is a safety check in case the match
// encounters catastrophic backtracking. The default value
// (DefaultMatchTimeout) causes all time out checking to be
// suppressed.
MatchTimeout time.Duration
// read-only after Compile
pattern string // as passed to Compile
options RegexOptions // options
caps map[int]int // capnum->index
capnames map[string]int //capture group name -> index
capslist []string //sorted list of capture group names
capsize int // size of the capture array
code *syntax.Code // compiled program
// cache of machines for running regexp
muRun *sync.Mutex
runner []*runner
}
// Compile parses a regular expression and returns, if successful,
// a Regexp object that can be used to match against text.
func Compile(expr string, opt RegexOptions) (*Regexp, error) {
// parse it
tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
if err != nil {
return nil, err
}
// translate it to code
code, err := syntax.Write(tree)
if err != nil {
return nil, err
}
// return it
return &Regexp{
pattern: expr,
options: opt,
caps: code.Caps,
capnames: tree.Capnames,
capslist: tree.Caplist,
capsize: code.Capsize,
code: code,
MatchTimeout: DefaultMatchTimeout,
muRun: &sync.Mutex{},
}, nil
}
// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func MustCompile(str string, opt RegexOptions) *Regexp {
regexp, error := Compile(str, opt)
if error != nil {
panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
}
return regexp
}
// Escape adds backslashes to any special characters in the input string
func Escape(input string) string {
return syntax.Escape(input)
}
// Unescape removes any backslashes from previously-escaped special characters in the input string
func Unescape(input string) (string, error) {
return syntax.Unescape(input)
}
// SetTimeoutPeriod is a debug function that sets the frequency of the timeout goroutine's sleep cycle.
// Defaults to 100ms. The only benefit of setting this lower is that the 1 background goroutine that manages
// timeouts may exit slightly sooner after all the timeouts have expired. See Github issue #63
func SetTimeoutCheckPeriod(d time.Duration) {
clockPeriod = d
}
// StopTimeoutClock should only be used in unit tests to prevent the timeout clock goroutine
// from appearing like a leaking goroutine
func StopTimeoutClock() {
stopClock()
}
// String returns the source text used to compile the regular expression.
func (re *Regexp) String() string {
return re.pattern
}
func quote(s string) string {
if strconv.CanBackquote(s) {
return "`" + s + "`"
}
return strconv.Quote(s)
}
// RegexOptions impact the runtime and parsing behavior
// for each specific regex. They are setable in code as well
// as in the regex pattern itself.
type RegexOptions int32
const (
None RegexOptions = 0x0
IgnoreCase = 0x0001 // "i"
Multiline = 0x0002 // "m"
ExplicitCapture = 0x0004 // "n"
Compiled = 0x0008 // "c"
Singleline = 0x0010 // "s"
IgnorePatternWhitespace = 0x0020 // "x"
RightToLeft = 0x0040 // "r"
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
Unicode = 0x0400 // "u"
)
func (re *Regexp) RightToLeft() bool {
return re.options&RightToLeft != 0
}
func (re *Regexp) Debug() bool {
return re.options&Debug != 0
}
// Replace searches the input string and replaces each match found with the replacement text.
// Count will limit the number of matches attempted and startAt will allow
// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
// Set startAt and count to -1 to go through the whole string
func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
if err != nil {
return "", err
}
//TODO: cache ReplacerData
return replace(re, data, nil, input, startAt, count)
}
// ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
// Count will limit the number of matches attempted and startAt will allow
// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
// Set startAt and count to -1 to go through the whole string.
func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
return replace(re, nil, evaluator, input, startAt, count)
}
// FindStringMatch searches the input string for a Regexp match
func (re *Regexp) FindStringMatch(s string) (*Match, error) {
// convert string to runes
return re.run(false, -1, getRunes(s))
}
// FindRunesMatch searches the input rune slice for a Regexp match
func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
return re.run(false, -1, r)
}
// FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
if startAt > len(s) {
return nil, errors.New("startAt must be less than the length of the input string")
}
r, startAt := re.getRunesAndStart(s, startAt)
if startAt == -1 {
// we didn't find our start index in the string -- that's a problem
return nil, errors.New("startAt must align to the start of a valid rune in the input string")
}
return re.run(false, startAt, r)
}
// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
return re.run(false, startAt, r)
}
// FindNextMatch returns the next match in the same input string as the match parameter.
// Will return nil if there is no next match or if given a nil match.
func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
if m == nil {
return nil, nil
}
// If previous match was empty, advance by one before matching to prevent
// infinite loop
startAt := m.textpos
if m.Length == 0 {
if m.textpos == len(m.text) {
return nil, nil
}
if re.RightToLeft() {
startAt--
} else {
startAt++
}
}
return re.run(false, startAt, m.text)
}
// MatchString return true if the string matches the regex
// error will be set if a timeout occurs
func (re *Regexp) MatchString(s string) (bool, error) {
m, err := re.run(true, -1, getRunes(s))
if err != nil {
return false, err
}
return m != nil, nil
}
func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
if startAt < 0 {
if re.RightToLeft() {
r := getRunes(s)
return r, len(r)
}
return getRunes(s), 0
}
ret := make([]rune, len(s))
i := 0
runeIdx := -1
for strIdx, r := range s {
if strIdx == startAt {
runeIdx = i
}
ret[i] = r
i++
}
if startAt == len(s) {
runeIdx = i
}
return ret[:i], runeIdx
}
func getRunes(s string) []rune {
return []rune(s)
}
// MatchRunes return true if the runes matches the regex
// error will be set if a timeout occurs
func (re *Regexp) MatchRunes(r []rune) (bool, error) {
m, err := re.run(true, -1, r)
if err != nil {
return false, err
}
return m != nil, nil
}
// GetGroupNames Returns the set of strings used to name capturing groups in the expression.
func (re *Regexp) GetGroupNames() []string {
var result []string
if re.capslist == nil {
result = make([]string, re.capsize)
for i := 0; i < len(result); i++ {
result[i] = strconv.Itoa(i)
}
} else {
result = make([]string, len(re.capslist))
copy(result, re.capslist)
}
return result
}
// GetGroupNumbers returns the integer group numbers corresponding to a group name.
func (re *Regexp) GetGroupNumbers() []int {
var result []int
if re.caps == nil {
result = make([]int, re.capsize)
for i := 0; i < len(result); i++ {
result[i] = i
}
} else {
result = make([]int, len(re.caps))
for k, v := range re.caps {
result[v] = k
}
}
return result
}
// GroupNameFromNumber retrieves a group name that corresponds to a group number.
// It will return "" for and unknown group number. Unnamed groups automatically
// receive a name that is the decimal string equivalent of its number.
func (re *Regexp) GroupNameFromNumber(i int) string {
if re.capslist == nil {
if i >= 0 && i < re.capsize {
return strconv.Itoa(i)
}
return ""
}
if re.caps != nil {
var ok bool
if i, ok = re.caps[i]; !ok {
return ""
}
}
if i >= 0 && i < len(re.capslist) {
return re.capslist[i]
}
return ""
}
// GroupNumberFromName returns a group number that corresponds to a group name.
// Returns -1 if the name is not a recognized group name. Numbered groups
// automatically get a group name that is the decimal string equivalent of its number.
func (re *Regexp) GroupNumberFromName(name string) int {
// look up name if we have a hashtable of names
if re.capnames != nil {
if k, ok := re.capnames[name]; ok {
return k
}
return -1
}
// convert to an int if it looks like a number
result := 0
for i := 0; i < len(name); i++ {
ch := name[i]
if ch > '9' || ch < '0' {
return -1
}
result *= 10
result += int(ch - '0')
}
// return int if it's in range
if result >= 0 && result < re.capsize {
return result
}
return -1
}
// MarshalText implements [encoding.TextMarshaler]. The output
// matches that of calling the [Regexp.String] method.
func (re *Regexp) MarshalText() ([]byte, error) {
return []byte(re.String()), nil
}
// UnmarshalText implements [encoding.TextUnmarshaler] by calling
// [Compile] on the encoded value.
func (re *Regexp) UnmarshalText(text []byte) error {
newRE, err := Compile(string(text), DefaultUnmarshalOptions)
if err != nil {
return err
}
*re = *newRE
return nil
}
package regexp2
import (
"bytes"
"errors"
"github.com/dlclark/regexp2/syntax"
)
const (
replaceSpecials = 4
replaceLeftPortion = -1
replaceRightPortion = -2
replaceLastGroup = -3
replaceWholeString = -4
)
// MatchEvaluator is a function that takes a match and returns a replacement string to be used
type MatchEvaluator func(Match) string
// Three very similar algorithms appear below: replace (pattern),
// replace (evaluator), and split.
// Replace Replaces all occurrences of the regex in the string with the
// replacement pattern.
//
// Note that the special case of no matches is handled on its own:
// with no matches, the input string is returned unchanged.
// The right-to-left case is split out because StringBuilder
// doesn't handle right-to-left string building directly very well.
func replace(regex *Regexp, data *syntax.ReplacerData, evaluator MatchEvaluator, input string, startAt, count int) (string, error) {
if count < -1 {
return "", errors.New("Count too small")
}
if count == 0 {
return "", nil
}
m, err := regex.FindStringMatchStartingAt(input, startAt)
if err != nil {
return "", err
}
if m == nil {
return input, nil
}
buf := &bytes.Buffer{}
text := m.text
if !regex.RightToLeft() {
prevat := 0
for m != nil {
if m.Index != prevat {
buf.WriteString(string(text[prevat:m.Index]))
}
prevat = m.Index + m.Length
if evaluator == nil {
replacementImpl(data, buf, m)
} else {
buf.WriteString(evaluator(*m))
}
count--
if count == 0 {
break
}
m, err = regex.FindNextMatch(m)
if err != nil {
return "", nil
}
}
if prevat < len(text) {
buf.WriteString(string(text[prevat:]))
}
} else {
prevat := len(text)
var al []string
for m != nil {
if m.Index+m.Length != prevat {
al = append(al, string(text[m.Index+m.Length:prevat]))
}
prevat = m.Index
if evaluator == nil {
replacementImplRTL(data, &al, m)
} else {
al = append(al, evaluator(*m))
}
count--
if count == 0 {
break
}
m, err = regex.FindNextMatch(m)
if err != nil {
return "", nil
}
}
if prevat > 0 {
buf.WriteString(string(text[:prevat]))
}
for i := len(al) - 1; i >= 0; i-- {
buf.WriteString(al[i])
}
}
return buf.String(), nil
}
// Given a Match, emits into the StringBuilder the evaluated
// substitution pattern.
func replacementImpl(data *syntax.ReplacerData, buf *bytes.Buffer, m *Match) {
for _, r := range data.Rules {
if r >= 0 { // string lookup
buf.WriteString(data.Strings[r])
} else if r < -replaceSpecials { // group lookup
m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
} else {
switch -replaceSpecials - 1 - r { // special insertion patterns
case replaceLeftPortion:
for i := 0; i < m.Index; i++ {
buf.WriteRune(m.text[i])
}
case replaceRightPortion:
for i := m.Index + m.Length; i < len(m.text); i++ {
buf.WriteRune(m.text[i])
}
case replaceLastGroup:
m.groupValueAppendToBuf(m.GroupCount()-1, buf)
case replaceWholeString:
for i := 0; i < len(m.text); i++ {
buf.WriteRune(m.text[i])
}
}
}
}
}
func replacementImplRTL(data *syntax.ReplacerData, al *[]string, m *Match) {
l := *al
buf := &bytes.Buffer{}
for _, r := range data.Rules {
buf.Reset()
if r >= 0 { // string lookup
l = append(l, data.Strings[r])
} else if r < -replaceSpecials { // group lookup
m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
l = append(l, buf.String())
} else {
switch -replaceSpecials - 1 - r { // special insertion patterns
case replaceLeftPortion:
for i := 0; i < m.Index; i++ {
buf.WriteRune(m.text[i])
}
case replaceRightPortion:
for i := m.Index + m.Length; i < len(m.text); i++ {
buf.WriteRune(m.text[i])
}
case replaceLastGroup:
m.groupValueAppendToBuf(m.GroupCount()-1, buf)
case replaceWholeString:
for i := 0; i < len(m.text); i++ {
buf.WriteRune(m.text[i])
}
}
l = append(l, buf.String())
}
}
*al = l
}
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment