package data import ( "crypto/md5" "crypto/sha1" "crypto/sha256" "encoding/base64" "encoding/hex" "encoding/json" "errors" "fmt" "github.com/volker-schukai/tokenizer" "gitlab.schukai.com/oss/libraries/go/utilities/pathfinder" "html" "math" "net/url" "reflect" "regexp" "strconv" "strings" ) type Transformer struct { dataset *map[any]any errors []error parser *tokenizer.Tokenizer } // //type Number interface { // int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 //} func NewTransformer(dataset *map[any]any) *Transformer { parser := initTokenizer() return &Transformer{ dataset: dataset, parser: parser, } } // DefineStringToken defines a token string. // For example, a piece of data surrounded by quotes: "string in quotes" or 'string on sigle quotes'. // Arguments startToken and endToken defines open and close "quotes". // - t.DefineStringToken("`", "`") - parse string "one `two three`" will be parsed as // [{key: TokenKeyword, value: "one"}, {key: TokenString, value: "`two three`"}] // - t.DefineStringToken("//", "\n") - parse string "parse // like comment\n" will be parsed as // [{key: TokenKeyword, value: "parse"}, {key: TokenString, value: "// like comment"}] //func (t *Tokenizer) DefineStringToken(key TokenKey, startToken, endToken string) *StringSettings { type TokenList []*tokenizer.Token func (t *Transformer) Transform(pipe string) (interface{}, error) { return t.tokenize(pipe) } func (t *Transformer) Dataset() *map[any]any { return t.dataset } func (t *Transformer) tokenize(pipe string) (any, error) { // create tokens stream stream := t.parser.ParseString(pipe) defer stream.Close() tokenMap := make([]TokenList, 0) currentList := make(TokenList, 0) // iterate over each token for stream.IsValid() { token := stream.CurrentToken() if token == nil { break } stream.GoNext() if token.Is(PipeSymbol) { tokenMap = append(tokenMap, currentList) currentList = make(TokenList, 0) continue } if token.Is(PipeCmdDelimiter) { continue } currentList = append(currentList, token) } if len(currentList) > 0 { tokenMap = append(tokenMap, currentList) } var currentValue any //datasetAvailable := true currentValue = t.dataset //if reflect.ValueOf(currentValue).IsNil() { // datasetAvailable = false //} var err error var ok bool for _, tokens := range tokenMap { if len(tokens) == 0 { continue } if tokens[0].Is(PipeReflectionIsNil) { currentValue = currentValue == nil continue } if tokens[0].Is(PipeCmdStatic) { if currentValue, err = handleStaticCommand(tokens); err != nil { return nil, err } continue } if tokens[0].Is(PipeCmdPath) { if len(tokens) > 1 { parts := []string{} for _, token := range tokens[1:] { parts = append(parts, token.ValueUnescapedString()) } path := strings.Join(parts, "") currentValue, err = pathfinder.GetValue[any](currentValue, path) if err != nil { return nil, err } } else { return nil, errors.New("invalid path command") } continue } else if tokens[0].Is(PipeCmdIndex) { var index string if index, err = handleIndexCommand(tokens); err != nil { return nil, err } switch currentValue.(type) { case *map[any]any: if reflect.ValueOf(currentValue).IsNil() { return nil, errors.New("index command on nil map") } currentValue, ok = (*currentValue.(*map[any]any))[index] if !ok { return nil, errors.New("index " + index + " not found") } case map[any]any: currentValue, ok = currentValue.(map[any]any)[index] if !ok { t.errors = append(t.errors, errors.New("index not found: "+index)) } case *map[string]any: if reflect.ValueOf(currentValue).IsNil() { return nil, errors.New("index command on nil map") } currentValue, ok = (*currentValue.(*map[string]any))[index] if !ok { t.errors = append(t.errors, errors.New("index not found: "+index)) } case map[string]any: currentValue, ok = currentValue.(map[string]any)[index] if !ok { t.errors = append(t.errors, errors.New("index not found: "+index)) } case *[]string: indexInt, err := strconv.Atoi(index) if err != nil { return nil, errors.New("index must be an integer") } currentValue = (*currentValue.(*[]string))[indexInt] case []string: indexInt, err := strconv.Atoi(index) if err != nil { return nil, err } currentValue = currentValue.([]string)[indexInt] case *[]any: indexInt, err := strconv.Atoi(index) if err != nil { return nil, err } currentValue = (*currentValue.(*[]any))[indexInt] case []any: indexInt, err := strconv.Atoi(index) if err != nil { return nil, err } currentValue = currentValue.([]any)[indexInt] if currentValue == nil { t.errors = append(t.errors, errors.New("index not found: "+index)) } default: //var value generic[currentValue] // //genericValue := reflect.ValueOf(currentValue) vxx, err := pathfinder.GetValue[any](currentValue, index) fmt.Println(vxx, err) //case struct{}: // return nil, errors.New("index command on struct") // //default: // return nil, errors.New("unsupported type " + reflect.TypeOf(currentValue).String()) } continue } switch currentValue.(type) { case string: if currentValue, err = handleStrings(tokens, currentValue.(string)); err != nil { return nil, err } continue case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: number := reflect.ValueOf(currentValue) if currentValue, err = handleInteger(tokens, number.Int()); err != nil { return nil, err } continue case float32, float64: number := reflect.ValueOf(currentValue) if currentValue, err = handleFloat(tokens, number.Float()); err != nil { return nil, err } continue case bool: if currentValue, err = handleBoolean(tokens, currentValue.(bool)); err != nil { return nil, err } continue case *map[any]any: v, ok := currentValue.(*map[any]any) if !ok { return nil, errors.New("invalid map") } if currentValue, err = handleMap(tokens, v); err != nil { return nil, err } case map[any]any, map[string]any: v, ok := currentValue.(map[any]any) if !ok { m := make(map[any]any) for k, vv := range currentValue.(map[string]any) { m[k] = vv } v = m } if currentValue, err = handleMap(tokens, &v); err != nil { return nil, err } continue case []any: if currentValue, err = handleArray(tokens, currentValue.([]any)); err != nil { return nil, err } continue case nil: if currentValue, err = handleNil(tokens); err != nil { return nil, err } continue default: return nil, errors.New("the type " + reflect.TypeOf(currentValue).String() + " is not supported") } return nil, errors.New("unknown command " + tokens[0].ValueUnescapedString()) } return currentValue, nil } func handleIndexCommand(tokens TokenList) (string, error) { if len(tokens) == 2 { return tokens[1].ValueUnescapedString(), nil } return "", errors.New("invalid index command") } func handleArray(tokens TokenList, array []any) (any, error) { return nil, errors.New(tokens[0].ValueString() + " is not a valid command for string") } func handleFloat(tokens TokenList, currentValue float64) (any, error) { if tokens[0].Is(PipeCmdRound) { factor := 1.0 if len(tokens) < 2 { factor = 1.0 } if len(tokens) == 2 { s, err := strconv.ParseFloat(tokens[1].ValueUnescapedString(), 64) if err != nil { return nil, err } factor = math.Pow10(int(s)) } return math.Round(currentValue*factor) / factor, nil } else if tokens[0].Is(PipeCmdEquals) { if len(tokens) < 2 { return nil, errors.New("missing value for equals command") } if currentValue == tokens[1].ValueFloat() { if len(tokens) > 2 { return tokens[2].ValueUnescapedString(), nil } return true, nil } if len(tokens) > 3 { return tokens[3].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdToInteger) { return int(currentValue), nil } else if tokens[0].Is(PipeCmdToString) { return strconv.FormatFloat(currentValue, 'f', -1, 64), nil } else if tokens[0].Is(PipeCmdToFloat) { return currentValue, nil } else if tokens[0].Is(PipeCmdToBoolean) { return currentValue != 0, nil } else if tokens[0].Is(PipeCmdToNumber) { return currentValue, nil } else if tokens[0].Is(PipeCmdFloor) { return math.Floor(currentValue), nil } else if tokens[0].Is(PipeCmdCeil) { return math.Ceil(currentValue), nil } else if tokens[0].Is(PipeCmdToJSON) { jsonBytes, err := json.Marshal(currentValue) if err != nil { return nil, err } return string(jsonBytes), nil } else if tokens[0].Is(PipeCmdAdd) { if len(tokens) < 2 { return nil, errors.New("missing argument for add command") } return currentValue + tokens[1].ValueFloat(), nil } else if tokens[0].Is(PipeCmdSubtract) { if len(tokens) < 2 { return nil, errors.New("missing argument for subtract command") } return currentValue - tokens[1].ValueFloat(), nil } else if tokens[0].Is(PipeCmdMultiply) { if len(tokens) < 2 { return nil, errors.New("missing argument for multiply command") } return currentValue * tokens[1].ValueFloat(), nil } else if tokens[0].Is(PipeCmdDivide) { if len(tokens) < 2 { return nil, errors.New("missing argument for divide command") } if tokens[1].ValueInt() == 0 { return nil, errors.New("divide by zero") } return currentValue / tokens[1].ValueFloat(), nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type float (" + strconv.FormatFloat(currentValue, 'f', -1, 64) + ")") } func handleMap(tokens TokenList, currentValue *map[any]any) (any, error) { if tokens[0].Is(PipeCmdToJSON) { // convert to string map stringMap := make(map[string]any) for k, v := range *currentValue { key, ok := k.(string) if !ok { return nil, errors.New("invalid key type for json conversion") } stringMap[key] = v } jsonBytes, err := json.Marshal(stringMap) if err != nil { return nil, err } return string(jsonBytes), nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type map") } func handleBoolean(tokens TokenList, currentValue bool) (any, error) { if tokens[0].Is(PipeCmdEquals) { if len(tokens) < 2 { return nil, errors.New("missing value for equals command") } if currentValue == (tokens[1].ValueUnescapedString() == "true") { if len(tokens) > 2 { return tokens[2].ValueUnescapedString(), nil } return true, nil } if len(tokens) > 3 { return tokens[3].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdNot) { return !currentValue, nil } else if tokens[0].Is(PipeCmdToInteger) { if currentValue { return 1, nil } return 0, nil } else if tokens[0].Is(PipeCmdToString) { if currentValue { return "true", nil } return "false", nil } else if tokens[0].Is(PipeCmdToFloat) { if currentValue { return 1.0, nil } return 0.0, nil } else if tokens[0].Is(PipeCmdToBoolean) { return currentValue, nil } else if tokens[0].Is(PipeCmdToNumber) { if currentValue { return 1, nil } return 0, nil } else if tokens[0].Is(PipeCmdToJSON) { jsonBytes, err := json.Marshal(currentValue) if err != nil { return nil, err } return string(jsonBytes), nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type boolean (" + strconv.FormatBool(currentValue) + ")") } func handleNil(tokens TokenList) (any, error) { if tokens[0].Is(PipeCmdEquals) { // nil is always false, because it is not a value if len(tokens) < 2 { return nil, errors.New("missing value for equals command") } if len(tokens) > 3 { return tokens[3].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdToJSON) { return "null", nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type nil") } func handleInteger(tokens TokenList, currentValue int64) (any, error) { if tokens[0].Is(PipeCmdChar) { return fmt.Sprintf("%c", currentValue), nil } else if tokens[0].Is(PipeCmdEquals) { if len(tokens) < 2 { return nil, errors.New("missing value for equals command") } if currentValue == tokens[1].ValueInt() { if len(tokens) > 2 { return tokens[2].ValueUnescapedString(), nil } return true, nil } if len(tokens) > 3 { return tokens[3].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdToInteger) { return currentValue, nil } else if tokens[0].Is(PipeCmdToString) { return strconv.FormatInt(currentValue, 10), nil } else if tokens[0].Is(PipeCmdToFloat) { return float64(currentValue), nil } else if tokens[0].Is(PipeCmdToBoolean) { return currentValue != 0, nil } else if tokens[0].Is(PipeCmdToNumber) { return float64(currentValue), nil } else if tokens[0].Is(PipeCmdToJSON) { jsonBytes, err := json.Marshal(currentValue) if err != nil { return nil, err } return string(jsonBytes), nil } else if tokens[0].Is(PipeCmdAdd) { if len(tokens) < 2 { return nil, errors.New("missing argument for add command") } return currentValue + tokens[1].ValueInt(), nil } else if tokens[0].Is(PipeCmdSubtract) { if len(tokens) < 2 { return nil, errors.New("missing argument for subtract command") } return currentValue - tokens[1].ValueInt(), nil } else if tokens[0].Is(PipeCmdMultiply) { if len(tokens) < 2 { return nil, errors.New("missing argument for multiply command") } return currentValue * tokens[1].ValueInt(), nil } else if tokens[0].Is(PipeCmdDivide) { if len(tokens) < 2 { return nil, errors.New("missing argument for divide command") } if tokens[1].ValueInt() == 0 { return nil, errors.New("divide by zero") } return currentValue / tokens[1].ValueInt(), nil } else if tokens[0].Is(PipeCmdModulo) { if len(tokens) < 2 { return nil, errors.New("missing argument for modulo command") } return currentValue % tokens[1].ValueInt(), nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type integer (" + strconv.FormatInt(currentValue, 10) + ")") } func handleStrings(tokens TokenList, currentValue string) (any, error) { if tokens[0].Is(PipeCmdToLower) { return strings.ToLower(currentValue), nil } else if tokens[0].Is(PipeCmdToUpper) { return strings.ToUpper(currentValue), nil } else if tokens[0].Is(PipeCmdEmpty) { if currentValue == "" { if len(tokens) >= 2 { return tokens[1].ValueUnescapedString(), nil } return true, nil } if len(tokens) == 3 { return tokens[2].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdPlaintext) { re := regexp.MustCompile(`<(.|\n)*?>`) return re.ReplaceAllString(currentValue, ""), nil } else if tokens[0].Is(PipeCmdEquals) { if len(tokens) < 2 { return nil, errors.New("missing argument for equals command") } if currentValue == tokens[1].ValueUnescapedString() { if len(tokens) > 2 { return tokens[2].ValueUnescapedString(), nil } return true, nil } if len(tokens) > 3 { return tokens[3].ValueUnescapedString(), nil } return false, nil } else if tokens[0].Is(PipeCmdUCFirst) { return strings.ToUpper(currentValue[0:1]) + currentValue[1:], nil } else if tokens[0].Is(PipeCmdUCWords) { for i, v := range currentValue { if i == 0 || currentValue[i-1] == ' ' { currentValue = currentValue[:i] + strings.ToUpper(string(v)) + currentValue[i+1:] } } return currentValue, nil } else if tokens[0].Is(PipeCmdLength) { return len(currentValue), nil } else if tokens[0].Is(PipeCmdBase64Encode) { return base64.StdEncoding.EncodeToString([]byte(currentValue)), nil } else if tokens[0].Is(PipeCmdBase64Decode) { bytes, err := base64.StdEncoding.DecodeString(currentValue) if err != nil { return nil, err } return string(bytes), nil } else if tokens[0].Is(PipeCmdTrim) { return strings.TrimSpace(currentValue), nil } else if tokens[0].Is(PipeCmdHTMLSpecialChars) { return html.EscapeString(currentValue), nil } else if tokens[0].Is(PipeCmdHTMLEntityEncode) { return html.UnescapeString(currentValue), nil } else if tokens[0].Is(PipeCmdHTMLEntityDecode) { return html.UnescapeString(currentValue), nil } else if tokens[0].Is(PipeCmdRawUrlEncode) { return strings.Replace(url.QueryEscape(currentValue), "+", "%20", -1), nil } else if tokens[0].Is(PipeCmdUrlEncode) { return url.QueryEscape(currentValue), nil } else if tokens[0].Is(PipeCmdUrlDecode) { return url.QueryUnescape(currentValue) } else if tokens[0].Is(PipeCmdMD5) { d := md5.Sum([]byte(currentValue)) return hex.EncodeToString(d[:]), nil } else if tokens[0].Is(PipeCmdSHA1) { d := sha1.Sum([]byte(currentValue)) return hex.EncodeToString(d[:]), nil } else if tokens[0].Is(PipeCmdSHA256) { d := sha256.Sum256([]byte(currentValue)) return hex.EncodeToString(d[:]), nil } else if tokens[0].Is(PipeCmdSplit) { if len(tokens) < 2 { return nil, errors.New("missing argument for split command") } return strings.Split(currentValue, tokens[1].ValueUnescapedString()), nil } else if tokens[0].Is(PipeCmdReplace) { if len(tokens) < 3 { return nil, errors.New("missing argument for replace command") } return strings.Replace(currentValue, tokens[1].ValueUnescapedString(), tokens[2].ValueUnescapedString(), -1), nil } else if tokens[0].Is(PipeCmdNop) { return currentValue, nil } else if tokens[0].Is(PipeCmdStringPad) { if len(tokens) < 2 { return nil, errors.New("missing argument for pad command") } padLength := int(tokens[1].ValueInt()) if padLength < 0 { return nil, errors.New("pad length must be greater than 0") } if padLength < len(currentValue) { return currentValue, nil } padLength -= len(currentValue) padString := " " padType := "right" if len(tokens) > 2 { padString = tokens[2].ValueUnescapedString() } if len(tokens) > 3 { padType = tokens[3].ValueUnescapedString() } if padType == "left" { return strings.Repeat(padString, padLength) + currentValue, nil } else if padType == "both" { leftPad := padLength / 2 rightPad := padLength - leftPad return strings.Repeat(padString, leftPad) + currentValue + strings.Repeat(padString, rightPad), nil } return currentValue + strings.Repeat(padString, padLength), nil } else if tokens[0].Is(PipeCmdStringRepeat) { return strings.Repeat(currentValue, int(tokens[1].ValueInt())), nil } else if tokens[0].Is(PipeCmdReverse) { rns := []rune(currentValue) for i, j := 0, len(rns)-1; i < j; i, j = i+1, j-1 { rns[i], rns[j] = rns[j], rns[i] } // return the reversed string. return string(rns), nil } else if tokens[0].Is(PipeCmdSubstring) { if len(tokens) < 2 { return nil, errors.New("missing argument for substring command") } start := int(tokens[1].ValueInt()) if start > len(currentValue) { return "", nil } if len(tokens) == 2 { return currentValue[start:], nil } offset := int(tokens[2].ValueInt()) offset += start if offset < 0 { offset = len(currentValue) + offset } if len(currentValue) < offset { offset = len(currentValue) } if offset < start { return "", nil } return currentValue[start:offset], nil } else if tokens[0].Is(PipeCmdWordwrap) { if len(tokens) < 2 { return nil, errors.New("missing arguments for wordwrap command") } width := int(tokens[1].ValueInt()) breakString := "\n" if len(tokens) > 2 { breakString = tokens[2].ValueUnescapedString() } return wordWrap(currentValue, uint(width), breakString), nil } else if tokens[0].Is(PipeCmdPrefix) { if len(tokens) < 2 { return nil, errors.New("missing argument for prefix command") } return tokens[1].ValueUnescapedString() + currentValue, nil } else if tokens[0].Is(PipeCmdSuffix) { if len(tokens) < 2 { return nil, errors.New("missing argument for suffix command") } return currentValue + tokens[1].ValueUnescapedString(), nil } else if tokens[0].Is(PipeCmdToInteger) { return strconv.ParseInt(currentValue, 10, 64) } else if tokens[0].Is(PipeCmdToString) { return currentValue, nil } else if tokens[0].Is(PipeCmdToFloat) { return strconv.ParseFloat(currentValue, 64) } else if tokens[0].Is(PipeCmdToBoolean) { return strconv.ParseBool(currentValue) } else if tokens[0].Is(PipeCmdToNumber) { return strconv.ParseFloat(currentValue, 64) } else if tokens[0].Is(PipeCmdToJSON) { jsonValue, err := json.Marshal(currentValue) if err != nil { return nil, err } return string(jsonValue), nil } else if tokens[0].Is(PipeCmdFromJSON) { jsonValue := interface{}(nil) err := json.Unmarshal([]byte(currentValue), &jsonValue) if err != nil { return nil, err } return jsonValue, nil } return nil, errors.New(tokens[0].ValueString() + " is not a valid command for value of type string (" + currentValue + ")") } func handleStaticCommand(tokens TokenList) (any, error) { if len(tokens) == 2 { if tokens[1].Is(tokenizer.TokenFloat) { return tokens[1].ValueFloat(), nil } else if tokens[1].Is(tokenizer.TokenInteger) { return tokens[1].ValueInt(), nil } else if tokens[1].Is(tokenizer.TokenString) { return tokens[1].ValueUnescapedString(), nil } } value := "" for _, token := range tokens[1:] { value += token.ValueUnescapedString() } return value, nil }