From a364a86d41e860404f0d47461d0e72d36eab21d7 Mon Sep 17 00:00:00 2001 From: Volker Schukai <volker.schukai@schukai.com> Date: Sat, 8 Feb 2025 14:05:54 +0100 Subject: [PATCH] feat: new translate command --- source/command.go | 44 ++++++++ source/translate/aws.go | 62 ++++++++++++ source/translate/deepl.go | 62 ++++++++++++ source/translate/main.go | 196 ++++++++++++++++++++++++++++++++++++ source/translate/open-ai.go | 88 ++++++++++++++++ 5 files changed, 452 insertions(+) create mode 100644 source/translate/aws.go create mode 100644 source/translate/deepl.go create mode 100644 source/translate/main.go create mode 100644 source/translate/open-ai.go diff --git a/source/command.go b/source/command.go index bd88abe..8dc235c 100644 --- a/source/command.go +++ b/source/command.go @@ -8,6 +8,7 @@ import ( "gitlab.schukai.com/oss/bob/release" "gitlab.schukai.com/oss/bob/style" template2 "gitlab.schukai.com/oss/bob/template" + "gitlab.schukai.com/oss/bob/translate" "gitlab.schukai.com/oss/bob/types" xflags "gitlab.schukai.com/oss/libraries/go/application/xflags.git" "gopkg.in/yaml.v3" @@ -53,10 +54,53 @@ type Definition struct { Output string `short:"o" long:"output" description:"File to save generated css file" required:"true"` } `command:"generate" description:"Generate css files from a file" call:"GenerateCSS"` } `command:"css" description:"CSS related commands"` + Translate struct { + Pages struct { + DataFile string `short:"d" long:"data-file" description:"Name of the main data file" default:"data.yaml"` + Languages string `short:"l" long:"languages" description:"Languages to translate to" default:"de"` + API string `short:"a" long:"api" description:"API to use (aws, deepl, openai)" default:"aws"` + } `command:"pages" description:"Translate pages" call:"TranslatePages"` + } `command:"translate" description:"Translate commands"` Version struct { } `command:"version" description:"Prints the version" call:"PrintVersion"` } +func (d *Definition) TranslatePages(s *xflags.Settings[Definition]) { + + o := d.Translate.Pages.DataFile + if o == "" { + o = "data.yaml" + } + + if !path.IsAbs(o) { + o = path.Join(d.Template.Prepare.Output, o) + } + + api := d.Translate.Pages.API + if api == "" { + api = "aws" + } + + if api != "aws" && api != "deepl" && api != "openai" { + s.AddError(fmt.Errorf("Invalid API %s", api)) + return + } + + l := strings.TrimSpace(d.Translate.Pages.Languages) + if l == "" { + l = "de" + } + + languages := strings.Split(l, ",") + if len(languages) == 0 { + s.AddError(fmt.Errorf("No languages to translate to")) + return + } + + translate.Do(o, api, languages) + +} + func (d *Definition) GenerateCSS(s *xflags.Settings[Definition]) { err := style.GenerateCSS(d.CSS.Generate.Input, d.CSS.Generate.Output) if err != nil { diff --git a/source/translate/aws.go b/source/translate/aws.go new file mode 100644 index 0000000..21bf0c9 --- /dev/null +++ b/source/translate/aws.go @@ -0,0 +1,62 @@ +package translate + +import ( + "context" + "fmt" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/translate" + "regexp" + "strings" +) + +func translateTextsAws(texts []string, targetLang string) (map[string]string, error) { + // Stelle sicher, dass targetLang in Kleinbuchstaben vorliegt. + targetLang = strings.ToLower(targetLang) + + // AWS-Konfiguration laden (unterstützt z. B. Umgebungsvariablen, Shared Credentials etc.) + cfg, err := config.LoadDefaultConfig(context.Background()) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // AWS Translate Client erstellen + client := translate.NewFromConfig(cfg) + + translations := make(map[string]string) + + // Für jeden Text wird die Übersetzung einzeln angefordert. + for _, text := range texts { + // Leere Texte überspringen bzw. direkt übernehmen + if strings.TrimSpace(text) == "" { + translations[text] = text + continue + } + + // Länge muss größer als 2 Zeichen sein + if len(text) < 2 { + translations[text] = text + continue + } + + // Überprüfen, ob TargetLanguageCode das erforderliche Muster erfüllt. + if match, _ := regexp.MatchString(`^(?:[a-zA-Z]{2,4}|[a-zA-Z]{2}-[a-zA-Z]{2})$`, targetLang); !match { + return nil, fmt.Errorf("invalid target language code: %s", targetLang) + } + + input := &translate.TranslateTextInput{ + Text: aws.String(text), + SourceLanguageCode: aws.String("en"), // Annahme: Quellsprache ist Englisch + TargetLanguageCode: aws.String(targetLang), + } + + resp, err := client.TranslateText(context.Background(), input) + if err != nil { + return nil, fmt.Errorf("error translating text '%s': %w", text, err) + } + + translations[text] = *resp.TranslatedText + } + + return translations, nil +} diff --git a/source/translate/deepl.go b/source/translate/deepl.go new file mode 100644 index 0000000..4071569 --- /dev/null +++ b/source/translate/deepl.go @@ -0,0 +1,62 @@ +package translate + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" +) + +// translateTexts sendet einen Batch-Request an die DEEPL API, um alle übergebenen Texte in die Zielsprache zu übersetzen. +func translateTextsDeepl(texts []string, targetLang string) (map[string]string, error) { + apiKey := os.Getenv("DEEPL_API_KEY") + if apiKey == "" { + return nil, errors.New("Missing DEEPL_API_KEY environment variable") + } + + endpoint := "https://api.deepl.com/v2/translate" + form := url.Values{} + form.Set("auth_key", apiKey) + form.Set("target_lang", strings.ToUpper(targetLang)) // DEEPL erwartet Großbuchstaben + + for _, text := range texts { + form.Add("text", text) + } + + resp, err := http.PostForm(endpoint, form) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("DEEPL API returned status %d: %s", resp.StatusCode, body) + } + + // Antwort-JSON dekodieren + var deeplResp struct { + Translations []struct { + DetectedSourceLanguage string `json:"detected_source_language"` + Text string `json:"text"` + } `json:"translations"` + } + + if err := json.NewDecoder(resp.Body).Decode(&deeplResp); err != nil { + return nil, err + } + + if len(deeplResp.Translations) != len(texts) { + return nil, fmt.Errorf("DEEPL API returned %d translations, expected %d", len(deeplResp.Translations), len(texts)) + } + + result := make(map[string]string) + for i, orig := range texts { + result[orig] = deeplResp.Translations[i].Text + } + return result, nil +} diff --git a/source/translate/main.go b/source/translate/main.go new file mode 100644 index 0000000..e3a4f54 --- /dev/null +++ b/source/translate/main.go @@ -0,0 +1,196 @@ +package translate + +import ( + "fmt" + "github.com/charmbracelet/log" + "gitlab.schukai.com/oss/bob/types" + "gopkg.in/yaml.v3" + "os" + "strings" +) + +func Do(dataFile, api string, targetLanguages []string) { + + data, err := os.ReadFile(dataFile) + if err != nil { + log.Fatalf("Error reading data file %s: %v", dataFile, err) + } + + storage := types.NewPageDataStorage() + err = yaml.Unmarshal(data, storage) + if err != nil { + log.Fatalf("Error unmarshalling data file %s: %v", dataFile, err) + } + + // 2. Alle zu übersetzenden Zeichenketten sammeln (nur unique) + uniqueTexts := collectUniqueTexts(&storage) + fmt.Printf("Found %d unique texts to translate.\n", len(uniqueTexts)) + + // 3. Für die definierten Zielsprachen (de, it, pl, fr) die Übersetzung per DEEPL API holen + translationsByLang := make(map[string]map[string]string) + + for _, lang := range targetLanguages { + log.Info("Translating to %s...\n", lang) + + var trans map[string]string + var err error + + switch api { + case "aws": + trans, err = translateTextsAws(uniqueTexts, lang) + case "deepl": + trans, err = translateTextsDeepl(uniqueTexts, lang) + case "openai": + trans, err = translateTextsOpenAI(uniqueTexts, lang) + } + + if err != nil { + log.Fatalf("Error translating to %s: %v", lang, err) + return + } + + translationsByLang[lang] = trans + } + + // 4. Für jede Sprache: Original-Struktur kopieren, übersetzte Texte an derselben Stelle eintragen und als YAML speichern. + for _, lang := range targetLanguages { + + for _, page := range storage { + + translatedPage := applyTranslations(*page, translationsByLang[lang]) + // Optional: Sprache im PageData anpassen + translatedPage.Lang = lang + + outData, err := yaml.Marshal(&translatedPage) + if err != nil { + log.Fatalf("Error marshalling translated data for %s: %v", lang, err) + } + + filename := fmt.Sprintf("%s.yaml", lang) + err = os.WriteFile(filename, outData, 0644) + if err != nil { + log.Fatalf("Error writing translated data for %s: %v", lang, err) + } + log.Info("Translated data for %s written to %s.\n", lang, filename) + } + } +} + +// collectUniqueTexts sammelt alle eindeutigen Zeichenketten aus den Feldern, die übersetzt werden sollen. +func collectUniqueTexts(storage *types.PageDataStorage) []string { + unique := make(map[string]struct{}) + add := func(s string) { + s = strings.TrimSpace(s) + if s != "" { + unique[s] = struct{}{} + } + } + + for _, page := range *storage { + + // PageData.Title + add(page.Title) + + // PageData.Meta (nur Werte) + for _, v := range page.Meta { + add(v) + } + + // Text.Text + for _, t := range page.Text { + add(t.Text) + } + + // Image: Alt und Title + for _, img := range page.Images { + add(img.Alt) + add(img.Title) + } + + // Anchor: Title + for _, anc := range page.Anchors { + add(anc.Title) + } + + // Translations: Werte in KeyValues (falls String) + for _, trans := range page.Translations { + for _, v := range trans.KeyValues { + if s, ok := v.(string); ok { + add(s) + } + } + } + + } + + // In Slice umwandeln + texts := make([]string, 0, len(unique)) + for s := range unique { + texts = append(texts, s) + } + return texts +} + +// applyTranslations nimmt die Originalstruktur und ersetzt alle übersetzbaren Felder durch die entsprechenden Übersetzungen. +func applyTranslations(page types.PageData, translations map[string]string) types.PageData { + newPage := page + + // PageData.Title + if t, ok := translations[page.Title]; ok { + newPage.Title = t + } + + // PageData.Meta + newMeta := make(map[string]string) + for k, v := range page.Meta { + if t, ok := translations[v]; ok { + newMeta[k] = t + } else { + newMeta[k] = v + } + } + newPage.Meta = newMeta + + // Text.Text + for i, t := range newPage.Text { + if tr, ok := translations[t.Text]; ok { + newPage.Text[i].Text = tr + } + } + + // Images: Alt und Title + for i, img := range newPage.Images { + if tr, ok := translations[img.Alt]; ok { + newPage.Images[i].Alt = tr + } + if tr, ok := translations[img.Title]; ok { + newPage.Images[i].Title = tr + } + } + + // Anchors: Title + for i, anc := range newPage.Anchors { + if tr, ok := translations[anc.Title]; ok { + newPage.Anchors[i].Title = tr + } + } + + // Translations: KeyValues (nur falls Value ein String ist) + for i, t := range newPage.Translations { + newKV := make(map[string]interface{}) + for k, v := range t.KeyValues { + if s, ok := v.(string); ok { + if tr, found := translations[s]; found { + newKV[k] = tr + } else { + newKV[k] = s + } + } else { + newKV[k] = v + } + } + newPage.Translations[i].KeyValues = newKV + } + + return newPage +} diff --git a/source/translate/open-ai.go b/source/translate/open-ai.go new file mode 100644 index 0000000..646c4aa --- /dev/null +++ b/source/translate/open-ai.go @@ -0,0 +1,88 @@ +package translate + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" +) + +// translateTextsOpenAI übersetzt eine Liste von Texten mittels der OpenAI API. +// Es wird für jeden Text ein eigener API-Aufruf gemacht. +// Beachte: Für jeden API-Aufruf muss der OPENAI_API_KEY in der Umgebung gesetzt sein. +func translateTextsOpenAI(texts []string, targetLang string) (map[string]string, error) { + apiKey := os.Getenv("OPENAI_API_KEY") + if apiKey == "" { + return nil, errors.New("Missing OPENAI_API_KEY environment variable") + } + + translations := make(map[string]string) + + for _, text := range texts { + // Leere Texte überspringen. + if strings.TrimSpace(text) == "" { + translations[text] = text + continue + } + + // Prompt formulieren: Übersetze den Text von Englisch in die Zielsprache. + prompt := fmt.Sprintf("Translate the following text from English to %s:\n\n%s", targetLang, text) + + // Anfrage-Payload aufbauen. + requestBody := map[string]interface{}{ + "model": "text-davinci-003", + "prompt": prompt, + "max_tokens": 1024, + "temperature": 0.3, + } + + requestBytes, err := json.Marshal(requestBody) + if err != nil { + return nil, err + } + + // HTTP-Request an die OpenAI API. + req, err := http.NewRequest("POST", "https://api.openai.com/v1/completions", bytes.NewBuffer(requestBytes)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+apiKey) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("OpenAI API returned status %d: %s", resp.StatusCode, body) + } + + // Antwort-JSON dekodieren. + var responseData struct { + Choices []struct { + Text string `json:"text"` + } `json:"choices"` + } + if err := json.NewDecoder(resp.Body).Decode(&responseData); err != nil { + return nil, err + } + + if len(responseData.Choices) == 0 { + return nil, fmt.Errorf("no translation received for text: %s", text) + } + + // Übersetzung aus der Antwort extrahieren. + translation := strings.TrimSpace(responseData.Choices[0].Text) + translations[text] = translation + } + + return translations, nil +} -- GitLab