From a364a86d41e860404f0d47461d0e72d36eab21d7 Mon Sep 17 00:00:00 2001
From: Volker Schukai <volker.schukai@schukai.com>
Date: Sat, 8 Feb 2025 14:05:54 +0100
Subject: [PATCH] feat: new translate command

---
 source/command.go           |  44 ++++++++
 source/translate/aws.go     |  62 ++++++++++++
 source/translate/deepl.go   |  62 ++++++++++++
 source/translate/main.go    | 196 ++++++++++++++++++++++++++++++++++++
 source/translate/open-ai.go |  88 ++++++++++++++++
 5 files changed, 452 insertions(+)
 create mode 100644 source/translate/aws.go
 create mode 100644 source/translate/deepl.go
 create mode 100644 source/translate/main.go
 create mode 100644 source/translate/open-ai.go

diff --git a/source/command.go b/source/command.go
index bd88abe..8dc235c 100644
--- a/source/command.go
+++ b/source/command.go
@@ -8,6 +8,7 @@ import (
 	"gitlab.schukai.com/oss/bob/release"
 	"gitlab.schukai.com/oss/bob/style"
 	template2 "gitlab.schukai.com/oss/bob/template"
+	"gitlab.schukai.com/oss/bob/translate"
 	"gitlab.schukai.com/oss/bob/types"
 	xflags "gitlab.schukai.com/oss/libraries/go/application/xflags.git"
 	"gopkg.in/yaml.v3"
@@ -53,10 +54,53 @@ type Definition struct {
 			Output string `short:"o" long:"output" description:"File to save generated css file" required:"true"`
 		} `command:"generate" description:"Generate css files from a file" call:"GenerateCSS"`
 	} `command:"css" description:"CSS related commands"`
+	Translate struct {
+		Pages struct {
+			DataFile  string `short:"d" long:"data-file" description:"Name of the main data file" default:"data.yaml"`
+			Languages string `short:"l" long:"languages" description:"Languages to translate to" default:"de"`
+			API       string `short:"a" long:"api" description:"API to use (aws, deepl, openai)" default:"aws"`
+		} `command:"pages" description:"Translate pages" call:"TranslatePages"`
+	} `command:"translate" description:"Translate commands"`
 	Version struct {
 	} `command:"version" description:"Prints the version" call:"PrintVersion"`
 }
 
+func (d *Definition) TranslatePages(s *xflags.Settings[Definition]) {
+
+	o := d.Translate.Pages.DataFile
+	if o == "" {
+		o = "data.yaml"
+	}
+
+	if !path.IsAbs(o) {
+		o = path.Join(d.Template.Prepare.Output, o)
+	}
+
+	api := d.Translate.Pages.API
+	if api == "" {
+		api = "aws"
+	}
+
+	if api != "aws" && api != "deepl" && api != "openai" {
+		s.AddError(fmt.Errorf("Invalid API %s", api))
+		return
+	}
+
+	l := strings.TrimSpace(d.Translate.Pages.Languages)
+	if l == "" {
+		l = "de"
+	}
+
+	languages := strings.Split(l, ",")
+	if len(languages) == 0 {
+		s.AddError(fmt.Errorf("No languages to translate to"))
+		return
+	}
+
+	translate.Do(o, api, languages)
+
+}
+
 func (d *Definition) GenerateCSS(s *xflags.Settings[Definition]) {
 	err := style.GenerateCSS(d.CSS.Generate.Input, d.CSS.Generate.Output)
 	if err != nil {
diff --git a/source/translate/aws.go b/source/translate/aws.go
new file mode 100644
index 0000000..21bf0c9
--- /dev/null
+++ b/source/translate/aws.go
@@ -0,0 +1,62 @@
+package translate
+
+import (
+	"context"
+	"fmt"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/service/translate"
+	"regexp"
+	"strings"
+)
+
+func translateTextsAws(texts []string, targetLang string) (map[string]string, error) {
+	// Stelle sicher, dass targetLang in Kleinbuchstaben vorliegt.
+	targetLang = strings.ToLower(targetLang)
+
+	// AWS-Konfiguration laden (unterstützt z. B. Umgebungsvariablen, Shared Credentials etc.)
+	cfg, err := config.LoadDefaultConfig(context.Background())
+	if err != nil {
+		return nil, fmt.Errorf("failed to load AWS config: %w", err)
+	}
+
+	// AWS Translate Client erstellen
+	client := translate.NewFromConfig(cfg)
+
+	translations := make(map[string]string)
+
+	// Für jeden Text wird die Übersetzung einzeln angefordert.
+	for _, text := range texts {
+		// Leere Texte überspringen bzw. direkt übernehmen
+		if strings.TrimSpace(text) == "" {
+			translations[text] = text
+			continue
+		}
+
+		// Länge muss größer als 2 Zeichen sein
+		if len(text) < 2 {
+			translations[text] = text
+			continue
+		}
+
+		// Überprüfen, ob TargetLanguageCode das erforderliche Muster erfüllt.
+		if match, _ := regexp.MatchString(`^(?:[a-zA-Z]{2,4}|[a-zA-Z]{2}-[a-zA-Z]{2})$`, targetLang); !match {
+			return nil, fmt.Errorf("invalid target language code: %s", targetLang)
+		}
+
+		input := &translate.TranslateTextInput{
+			Text:               aws.String(text),
+			SourceLanguageCode: aws.String("en"), // Annahme: Quellsprache ist Englisch
+			TargetLanguageCode: aws.String(targetLang),
+		}
+
+		resp, err := client.TranslateText(context.Background(), input)
+		if err != nil {
+			return nil, fmt.Errorf("error translating text '%s': %w", text, err)
+		}
+
+		translations[text] = *resp.TranslatedText
+	}
+
+	return translations, nil
+}
diff --git a/source/translate/deepl.go b/source/translate/deepl.go
new file mode 100644
index 0000000..4071569
--- /dev/null
+++ b/source/translate/deepl.go
@@ -0,0 +1,62 @@
+package translate
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+)
+
+// translateTexts sendet einen Batch-Request an die DEEPL API, um alle übergebenen Texte in die Zielsprache zu übersetzen.
+func translateTextsDeepl(texts []string, targetLang string) (map[string]string, error) {
+	apiKey := os.Getenv("DEEPL_API_KEY")
+	if apiKey == "" {
+		return nil, errors.New("Missing DEEPL_API_KEY environment variable")
+	}
+
+	endpoint := "https://api.deepl.com/v2/translate"
+	form := url.Values{}
+	form.Set("auth_key", apiKey)
+	form.Set("target_lang", strings.ToUpper(targetLang)) // DEEPL erwartet Großbuchstaben
+
+	for _, text := range texts {
+		form.Add("text", text)
+	}
+
+	resp, err := http.PostForm(endpoint, form)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("DEEPL API returned status %d: %s", resp.StatusCode, body)
+	}
+
+	// Antwort-JSON dekodieren
+	var deeplResp struct {
+		Translations []struct {
+			DetectedSourceLanguage string `json:"detected_source_language"`
+			Text                   string `json:"text"`
+		} `json:"translations"`
+	}
+
+	if err := json.NewDecoder(resp.Body).Decode(&deeplResp); err != nil {
+		return nil, err
+	}
+
+	if len(deeplResp.Translations) != len(texts) {
+		return nil, fmt.Errorf("DEEPL API returned %d translations, expected %d", len(deeplResp.Translations), len(texts))
+	}
+
+	result := make(map[string]string)
+	for i, orig := range texts {
+		result[orig] = deeplResp.Translations[i].Text
+	}
+	return result, nil
+}
diff --git a/source/translate/main.go b/source/translate/main.go
new file mode 100644
index 0000000..e3a4f54
--- /dev/null
+++ b/source/translate/main.go
@@ -0,0 +1,196 @@
+package translate
+
+import (
+	"fmt"
+	"github.com/charmbracelet/log"
+	"gitlab.schukai.com/oss/bob/types"
+	"gopkg.in/yaml.v3"
+	"os"
+	"strings"
+)
+
+func Do(dataFile, api string, targetLanguages []string) {
+
+	data, err := os.ReadFile(dataFile)
+	if err != nil {
+		log.Fatalf("Error reading data file %s: %v", dataFile, err)
+	}
+
+	storage := types.NewPageDataStorage()
+	err = yaml.Unmarshal(data, storage)
+	if err != nil {
+		log.Fatalf("Error unmarshalling data file %s: %v", dataFile, err)
+	}
+
+	// 2. Alle zu übersetzenden Zeichenketten sammeln (nur unique)
+	uniqueTexts := collectUniqueTexts(&storage)
+	fmt.Printf("Found %d unique texts to translate.\n", len(uniqueTexts))
+
+	// 3. Für die definierten Zielsprachen (de, it, pl, fr) die Übersetzung per DEEPL API holen
+	translationsByLang := make(map[string]map[string]string)
+
+	for _, lang := range targetLanguages {
+		log.Info("Translating to %s...\n", lang)
+
+		var trans map[string]string
+		var err error
+
+		switch api {
+		case "aws":
+			trans, err = translateTextsAws(uniqueTexts, lang)
+		case "deepl":
+			trans, err = translateTextsDeepl(uniqueTexts, lang)
+		case "openai":
+			trans, err = translateTextsOpenAI(uniqueTexts, lang)
+		}
+
+		if err != nil {
+			log.Fatalf("Error translating to %s: %v", lang, err)
+			return
+		}
+
+		translationsByLang[lang] = trans
+	}
+
+	// 4. Für jede Sprache: Original-Struktur kopieren, übersetzte Texte an derselben Stelle eintragen und als YAML speichern.
+	for _, lang := range targetLanguages {
+
+		for _, page := range storage {
+
+			translatedPage := applyTranslations(*page, translationsByLang[lang])
+			// Optional: Sprache im PageData anpassen
+			translatedPage.Lang = lang
+
+			outData, err := yaml.Marshal(&translatedPage)
+			if err != nil {
+				log.Fatalf("Error marshalling translated data for %s: %v", lang, err)
+			}
+
+			filename := fmt.Sprintf("%s.yaml", lang)
+			err = os.WriteFile(filename, outData, 0644)
+			if err != nil {
+				log.Fatalf("Error writing translated data for %s: %v", lang, err)
+			}
+			log.Info("Translated data for %s written to %s.\n", lang, filename)
+		}
+	}
+}
+
+// collectUniqueTexts sammelt alle eindeutigen Zeichenketten aus den Feldern, die übersetzt werden sollen.
+func collectUniqueTexts(storage *types.PageDataStorage) []string {
+	unique := make(map[string]struct{})
+	add := func(s string) {
+		s = strings.TrimSpace(s)
+		if s != "" {
+			unique[s] = struct{}{}
+		}
+	}
+
+	for _, page := range *storage {
+
+		// PageData.Title
+		add(page.Title)
+
+		// PageData.Meta (nur Werte)
+		for _, v := range page.Meta {
+			add(v)
+		}
+
+		// Text.Text
+		for _, t := range page.Text {
+			add(t.Text)
+		}
+
+		// Image: Alt und Title
+		for _, img := range page.Images {
+			add(img.Alt)
+			add(img.Title)
+		}
+
+		// Anchor: Title
+		for _, anc := range page.Anchors {
+			add(anc.Title)
+		}
+
+		// Translations: Werte in KeyValues (falls String)
+		for _, trans := range page.Translations {
+			for _, v := range trans.KeyValues {
+				if s, ok := v.(string); ok {
+					add(s)
+				}
+			}
+		}
+
+	}
+
+	// In Slice umwandeln
+	texts := make([]string, 0, len(unique))
+	for s := range unique {
+		texts = append(texts, s)
+	}
+	return texts
+}
+
+// applyTranslations nimmt die Originalstruktur und ersetzt alle übersetzbaren Felder durch die entsprechenden Übersetzungen.
+func applyTranslations(page types.PageData, translations map[string]string) types.PageData {
+	newPage := page
+
+	// PageData.Title
+	if t, ok := translations[page.Title]; ok {
+		newPage.Title = t
+	}
+
+	// PageData.Meta
+	newMeta := make(map[string]string)
+	for k, v := range page.Meta {
+		if t, ok := translations[v]; ok {
+			newMeta[k] = t
+		} else {
+			newMeta[k] = v
+		}
+	}
+	newPage.Meta = newMeta
+
+	// Text.Text
+	for i, t := range newPage.Text {
+		if tr, ok := translations[t.Text]; ok {
+			newPage.Text[i].Text = tr
+		}
+	}
+
+	// Images: Alt und Title
+	for i, img := range newPage.Images {
+		if tr, ok := translations[img.Alt]; ok {
+			newPage.Images[i].Alt = tr
+		}
+		if tr, ok := translations[img.Title]; ok {
+			newPage.Images[i].Title = tr
+		}
+	}
+
+	// Anchors: Title
+	for i, anc := range newPage.Anchors {
+		if tr, ok := translations[anc.Title]; ok {
+			newPage.Anchors[i].Title = tr
+		}
+	}
+
+	// Translations: KeyValues (nur falls Value ein String ist)
+	for i, t := range newPage.Translations {
+		newKV := make(map[string]interface{})
+		for k, v := range t.KeyValues {
+			if s, ok := v.(string); ok {
+				if tr, found := translations[s]; found {
+					newKV[k] = tr
+				} else {
+					newKV[k] = s
+				}
+			} else {
+				newKV[k] = v
+			}
+		}
+		newPage.Translations[i].KeyValues = newKV
+	}
+
+	return newPage
+}
diff --git a/source/translate/open-ai.go b/source/translate/open-ai.go
new file mode 100644
index 0000000..646c4aa
--- /dev/null
+++ b/source/translate/open-ai.go
@@ -0,0 +1,88 @@
+package translate
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+)
+
+// translateTextsOpenAI übersetzt eine Liste von Texten mittels der OpenAI API.
+// Es wird für jeden Text ein eigener API-Aufruf gemacht.
+// Beachte: Für jeden API-Aufruf muss der OPENAI_API_KEY in der Umgebung gesetzt sein.
+func translateTextsOpenAI(texts []string, targetLang string) (map[string]string, error) {
+	apiKey := os.Getenv("OPENAI_API_KEY")
+	if apiKey == "" {
+		return nil, errors.New("Missing OPENAI_API_KEY environment variable")
+	}
+
+	translations := make(map[string]string)
+
+	for _, text := range texts {
+		// Leere Texte überspringen.
+		if strings.TrimSpace(text) == "" {
+			translations[text] = text
+			continue
+		}
+
+		// Prompt formulieren: Übersetze den Text von Englisch in die Zielsprache.
+		prompt := fmt.Sprintf("Translate the following text from English to %s:\n\n%s", targetLang, text)
+
+		// Anfrage-Payload aufbauen.
+		requestBody := map[string]interface{}{
+			"model":       "text-davinci-003",
+			"prompt":      prompt,
+			"max_tokens":  1024,
+			"temperature": 0.3,
+		}
+
+		requestBytes, err := json.Marshal(requestBody)
+		if err != nil {
+			return nil, err
+		}
+
+		// HTTP-Request an die OpenAI API.
+		req, err := http.NewRequest("POST", "https://api.openai.com/v1/completions", bytes.NewBuffer(requestBytes))
+		if err != nil {
+			return nil, err
+		}
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("Authorization", "Bearer "+apiKey)
+
+		client := &http.Client{}
+		resp, err := client.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			body, _ := io.ReadAll(resp.Body)
+			return nil, fmt.Errorf("OpenAI API returned status %d: %s", resp.StatusCode, body)
+		}
+
+		// Antwort-JSON dekodieren.
+		var responseData struct {
+			Choices []struct {
+				Text string `json:"text"`
+			} `json:"choices"`
+		}
+		if err := json.NewDecoder(resp.Body).Decode(&responseData); err != nil {
+			return nil, err
+		}
+
+		if len(responseData.Choices) == 0 {
+			return nil, fmt.Errorf("no translation received for text: %s", text)
+		}
+
+		// Übersetzung aus der Antwort extrahieren.
+		translation := strings.TrimSpace(responseData.Choices[0].Text)
+		translations[text] = translation
+	}
+
+	return translations, nil
+}
-- 
GitLab