From f981f94857d166aa2b54cee8cae353bb53e425e8 Mon Sep 17 00:00:00 2001
From: Gabriel Einsdorf <gabriel.einsdorf@knime.com>
Date: Tue, 2 May 2023 18:00:14 +0200
Subject: [PATCH] Encode binary request bodies as bas64 data URLs (#117)

This brings go-httpbin's behavior more in line with original httpbin.

Fixes #90.
---
 httpbin/handlers_test.go | 59 ++++++++++++++++++++++++++++++++++++++--
 httpbin/helpers.go       | 36 ++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/httpbin/handlers_test.go b/httpbin/handlers_test.go
index 397a151..26d4ea9 100644
--- a/httpbin/handlers_test.go
+++ b/httpbin/handlers_test.go
@@ -6,6 +6,7 @@ import (
 	"compress/gzip"
 	"compress/zlib"
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -544,6 +545,7 @@ func testRequestWithBody(t *testing.T, verb, path string) {
 		testRequestWithBodyMultiPartBody,
 		testRequestWithBodyQueryParams,
 		testRequestWithBodyQueryParamsAndBody,
+		testRequestWithBodyBinaryBody,
 	}
 	for _, testFunc := range testFuncs {
 		testFunc := testFunc
@@ -555,6 +557,57 @@ func testRequestWithBody(t *testing.T, verb, path string) {
 	}
 }
 
+func testRequestWithBodyBinaryBody(t *testing.T, verb string, path string) {
+	tests := []struct {
+		contentType string
+		requestBody string
+	}{
+		{"application/octet-stream", "encodeMe"},
+		{"image/png", "encodeMe-png"},
+		{"image/webp", "encodeMe-webp"},
+		{"image/jpeg", "encodeMe-jpeg"},
+		{"unknown", "encodeMe-unknown"},
+	}
+	for _, test := range tests {
+		test := test
+		t.Run("content type/"+test.contentType, func(t *testing.T) {
+			t.Parallel()
+
+			testBody := bytes.NewReader([]byte(test.requestBody))
+
+			r, _ := http.NewRequest(verb, path, testBody)
+			r.Header.Set("Content-Type", test.contentType)
+			w := httptest.NewRecorder()
+			app.ServeHTTP(w, r)
+
+			assertStatusCode(t, w, http.StatusOK)
+			assertContentType(t, w, jsonContentType)
+
+			var resp *bodyResponse
+			err := json.Unmarshal(w.Body.Bytes(), &resp)
+			if err != nil {
+				t.Fatalf("failed to unmarshal body %s from JSON: %s", w.Body, err)
+			}
+
+			expected := "data:" + test.contentType + ";base64," + base64.StdEncoding.EncodeToString([]byte(test.requestBody))
+
+			if resp.Data != expected {
+				t.Fatalf("expected binary encoded response data: %#v got %#v", expected, resp.Data)
+			}
+			if resp.JSON != nil {
+				t.Fatalf("expected nil response json, got %#v", resp.JSON)
+			}
+
+			if len(resp.Args) > 0 {
+				t.Fatalf("expected no query params, got %#v", resp.Args)
+			}
+			if len(resp.Form) > 0 {
+				t.Fatalf("expected no form data, got %#v", resp.Form)
+			}
+		})
+	}
+}
+
 func testRequestWithBodyEmptyBody(t *testing.T, verb string, path string) {
 	tests := []struct {
 		contentType string
@@ -681,8 +734,10 @@ func testRequestWithBodyFormEncodedBodyNoContentType(t *testing.T, verb, path st
 	if len(resp.Form) != 0 {
 		t.Fatalf("expected no form values, got %d", len(resp.Form))
 	}
-	if string(resp.Data) != params.Encode() {
-		t.Fatalf("response data mismatch, %#v != %#v", string(resp.Data), params.Encode())
+	// Because we did not set an content type, httpbin will return the base64 encoded data.
+	expectedBody := "data:application/octet-stream;base64," + base64.StdEncoding.EncodeToString([]byte(params.Encode()))
+	if string(resp.Data) != expectedBody {
+		t.Fatalf("response data mismatch, %#v != %#v", string(resp.Data), expectedBody)
 	}
 }
 
diff --git a/httpbin/helpers.go b/httpbin/helpers.go
index a8bf963..b99f76e 100644
--- a/httpbin/helpers.go
+++ b/httpbin/helpers.go
@@ -132,8 +132,21 @@ func parseBody(w http.ResponseWriter, r *http.Request, resp *bodyResponse) error
 	r.Body = io.NopCloser(bytes.NewBuffer(body))
 
 	ct := r.Header.Get("Content-Type")
+
+	// Strip of charset encoding, if present
+	if strings.Contains(ct, ";") {
+		ct = strings.Split(ct, ";")[0]
+	}
+
 	switch {
-	case strings.HasPrefix(ct, "application/x-www-form-urlencoded"):
+	// cases where we don't need to parse the body
+	case strings.HasPrefix(ct, "html/"):
+		fallthrough
+	case strings.HasPrefix(ct, "text/"):
+		// string body is already set above
+		return nil
+
+	case ct == "application/x-www-form-urlencoded":
 		// r.ParseForm() does not populate r.PostForm for DELETE or GET requests, but
 		// we need it to for compatibility with the httpbin implementation, so
 		// we trick it with this ugly hack.
@@ -146,7 +159,7 @@ func parseBody(w http.ResponseWriter, r *http.Request, resp *bodyResponse) error
 			return err
 		}
 		resp.Form = r.PostForm
-	case strings.HasPrefix(ct, "multipart/form-data"):
+	case ct == "multipart/form-data":
 		// The memory limit here only restricts how many parts will be kept in
 		// memory before overflowing to disk:
 		// https://golang.org/pkg/net/http/#Request.ParseMultipartForm
@@ -154,16 +167,33 @@ func parseBody(w http.ResponseWriter, r *http.Request, resp *bodyResponse) error
 			return err
 		}
 		resp.Form = r.PostForm
-	case strings.HasPrefix(ct, "application/json"):
+	case ct == "application/json":
 		err := json.NewDecoder(r.Body).Decode(&resp.JSON)
 		if err != nil && err != io.EOF {
 			return err
 		}
+
+	default:
+		// If we don't have a special case for the content type, we'll just return it encoded as base64 data url
+		// we strip off any charset information, since we will re-encode the body
+		resp.Data = encodeData(body, ct)
 	}
 
 	return nil
 }
 
+// return provided string as base64 encoded data url, with the given content type
+func encodeData(body []byte, contentType string) string {
+	data := base64.StdEncoding.EncodeToString(body)
+
+	// If no content type is provided, default to application/octet-stream
+	if contentType == "" {
+		contentType = "application/octet-stream"
+	}
+
+	return string("data:" + contentType + ";base64," + data)
+}
+
 // parseDuration takes a user's input as a string and attempts to convert it
 // into a time.Duration. If not given as a go-style duration string, the input
 // is assumed to be seconds as a float.
-- 
GitLab