storage: add header sanitisation for SignedURL.

- SignedURL should sanitise headers provided via 'opts' according to the specification provided at:
  https://cloud.google.com/storage/docs/access-control/signed-urls#about-canonical-extension-headers.
- Drive-by fix in 'internal/rcpreplay' for a 't.Fatalf()' format string.

Change-Id: Ic12ec062a64a5e329d64ab915ff69def20cda287
Reviewed-on: https://code-review.googlesource.com/20510
Reviewed-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/rpcreplay/rpcreplay_test.go b/internal/rpcreplay/rpcreplay_test.go
index 516fec5..46cc279 100644
--- a/internal/rpcreplay/rpcreplay_test.go
+++ b/internal/rpcreplay/rpcreplay_test.go
@@ -224,7 +224,7 @@
 	for i, w := range wantEntries {
 		g, err := readEntry(buf)
 		if err != nil {
-			t.Fatalf("%#d: %v", i+1, err)
+			t.Fatalf("#%d: %v", i+1, err)
 		}
 		if !g.equal(w) {
 			t.Errorf("#%d:\ngot  %+v\nwant %+v", i+1, g, w)
diff --git a/storage/storage.go b/storage/storage.go
index c6ba1cb..d778480 100644
--- a/storage/storage.go
+++ b/storage/storage.go
@@ -30,6 +30,8 @@
 	"net/http"
 	"net/url"
 	"reflect"
+	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -182,6 +184,61 @@
 	MD5 string
 }
 
+var (
+	canonicalHeaderRegexp    = regexp.MustCompile(`(?i)^(x-goog-[^:]+):(.*)?$`)
+	excludedCanonicalHeaders = map[string]bool{
+		"x-goog-encryption-key":        true,
+		"x-goog-encryption-key-sha256": true,
+	}
+)
+
+// Headers used for signing need to follow the specifications set out at:
+// https://cloud.google.com/storage/docs/access-control/signed-urls#about-canonical-extension-headers.
+func sanitizeHeaders(hdrs []string) []string {
+	headerMap := map[string][]string{}
+	for _, hdr := range hdrs {
+		// No leading or trailing whitespaces.
+		sanitizedHeader := strings.TrimSpace(hdr)
+
+		// Only keep canonical headers, discard any others.
+		headerMatches := canonicalHeaderRegexp.FindStringSubmatch(sanitizedHeader)
+		if len(headerMatches) == 0 {
+			continue
+		}
+
+		header := strings.ToLower(strings.TrimSpace(headerMatches[1]))
+		value := strings.TrimSpace(headerMatches[2])
+		if excludedCanonicalHeaders[headerMatches[1]] {
+			// Do not keep any deliberately excluded canonical headers when signing.
+			continue
+		}
+
+		if len(value) > 0 {
+			// Remove duplicate headers by appending the values of duplicates
+			// in their order of appearance.
+			headerMap[header] = append(headerMap[header], value)
+		}
+	}
+
+	var sanitizedHeaders []string
+	for header, values := range headerMap {
+		// There should be no spaces around the colon separating the
+		// header name from the header value or around the values
+		// themselves. The values should be separated by commas.
+		// NOTE: The semantics for headers without a value are not clear.
+		//       However from specifications these should be edge-cases
+		//       anyway and we should assume that there will be no
+		//       canonical headers using empty values. Any such headers
+		//       are discarded at the regexp stage above.
+		sanitizedHeaders = append(
+			sanitizedHeaders,
+			fmt.Sprintf("%s:%s", header, strings.Join(values, ",")),
+		)
+	}
+	sort.Strings(sanitizedHeaders)
+	return sanitizedHeaders
+}
+
 // SignedURL returns a URL for the specified object. Signed URLs allow
 // the users access to a restricted resource for a limited time without having a
 // Google account or signing in. For more information about the signed
@@ -209,6 +266,8 @@
 		}
 	}
 
+	opts.Headers = sanitizeHeaders(opts.Headers)
+
 	signBytes := opts.SignBytes
 	if opts.PrivateKey != nil {
 		key, err := parseKey(opts.PrivateKey)
diff --git a/storage/storage_test.go b/storage/storage_test.go
index 26e971c..e511cfa 100644
--- a/storage/storage_test.go
+++ b/storage/storage_test.go
@@ -40,6 +40,57 @@
 	raw "google.golang.org/api/storage/v1"
 )
 
+func TestHeaderSanitization(t *testing.T) {
+	t.Parallel()
+	var tests = []struct {
+		input    []string
+		expected []string
+		errMsg   string
+	}{
+		{
+			input:    []string{"x-goog-header1:true", "x-goog-header2:0"},
+			expected: []string{"x-goog-header1:true", "x-goog-header2:0"},
+			errMsg:   "already sanitized headers should not be modified",
+		},
+		{
+			input:    []string{"x-goog-header2:0", "x-goog-header1:true"},
+			expected: []string{"x-goog-header1:true", "x-goog-header2:0"},
+			errMsg:   "sanitized headers should be sorted",
+		},
+		{
+			input:    []string{"x-goog-header1:true", "x-goog-no-value", "non-canonical-header:not-of-use"},
+			expected: []string{"x-goog-header1:true"},
+			errMsg:   "non-canonical headers should be removed",
+		},
+		{
+			input:    []string{"x-goog-header1:true", "x-goog-encryption-key:my_key", "x-goog-encryption-key-sha256:my_sha256"},
+			expected: []string{"x-goog-header1:true"},
+			errMsg:   "excluded canonical headers should be removed",
+		},
+		{
+			input:    []string{"x-goog-header1 : \textra-spaces ", "X-Goog-Header2:CamelCaseValue"},
+			expected: []string{"x-goog-header1:extra-spaces", "x-goog-header2:CamelCaseValue"},
+			errMsg:   "dirty headers should be formatted correctly",
+		},
+		{
+			input:    []string{"x-goog-header1:value1", "X-Goog-Header1:value2"},
+			expected: []string{"x-goog-header1:value1,value2"},
+			errMsg:   "duplicated headers should be merged",
+		},
+	}
+	for _, test := range tests {
+		actual := sanitizeHeaders(test.input)
+		if len(actual) != len(test.expected) {
+			t.Fatalf("%s; wanted %v but found %v", test.errMsg, test.expected, actual)
+		}
+		for i := 0; i < len(test.expected); i++ {
+			if actual[i] != test.expected[i] {
+				t.Fatalf("%s; wanted %v but found %v", test.errMsg, test.expected[i], actual[i])
+			}
+		}
+	}
+}
+
 func TestSignedURL(t *testing.T) {
 	t.Parallel()
 	expires, _ := time.Parse(time.RFC3339, "2002-10-02T10:00:00-05:00")
@@ -50,20 +101,20 @@
 		MD5:            "ICy5YqxZB1uWSwcVLSNLcA==",
 		Expires:        expires,
 		ContentType:    "application/json",
-		Headers:        []string{"x-header1", "x-header2"},
+		Headers:        []string{"x-goog-header1:true", "x-goog-header2:false"},
 	})
 	if err != nil {
 		t.Error(err)
 	}
 	want := "https://storage.googleapis.com/bucket-name/object-name?" +
 		"Expires=1033570800&GoogleAccessId=xxx%40clientid&Signature=" +
-		"ZMw18bZVhySNYAMEX87RMyuZCUMtGLVi%2B2zU2ByiQ0Rxgij%2BhFZ5LsT" +
-		"5ZPIH5h3QXB%2BiSb1URJnZo3aF0exVP%2FYR1hpg2e65w9HHt7yYjIqcg" +
-		"%2FfAOIyxriFtgRYk3oAv%2FFLF62fI8iF%2BCp0fWSm%2FHggz22blVnQz" +
-		"EtSP%2BuRhFle4172L%2B710sfMDtyQLKTz6W4TmRjC9ymTi8mVj95dZgyF" +
-		"RXbibTdtw0JzndE0Ig4c6pU4xDPPiyaziUSVDMIpzZDJH1GYOGHxbFasba4" +
-		"1rRoWWkdBnsMtHm2ck%2FsFD2leL6u8q0OpVAc4ZdxseucL4OpCy%2BCLhQ" +
-		"JFQT5bqSljP0g%3D%3D"
+		"RfsHlPtbB2JUYjzCgNr2Mi%2BjggdEuL1V7E6N9o6aaqwVLBDuTv3I0%2B9" +
+		"x94E6rmmr%2FVgnmZigkIUxX%2Blfl7LgKf30uPGLt0mjKGH2p7r9ey1ONJ" +
+		"%2BhVec23FnTRcSgopglvHPuCMWU2oNJE%2F1y8EwWE27baHrG1RhRHbLVF" +
+		"bPpLZ9xTRFK20pluIkfHV00JGljB1imqQHXM%2B2XPWqBngLr%2FwqxLN7i" +
+		"FcUiqR8xQEOHF%2F2e7fbkTHPNq4TazaLZ8X0eZ3eFdJ55A5QmNi8atlN4W" +
+		"5q7Hvs0jcxElG3yqIbx439A995BkspLiAcA%2Fo4%2BxAwEMkGLICdbvakq" +
+		"3eEprNCojw%3D%3D"
 	if url != want {
 		t.Fatalf("Unexpected signed URL; found %v", url)
 	}
@@ -79,16 +130,17 @@
 		MD5:            "ICy5YqxZB1uWSwcVLSNLcA==",
 		Expires:        expires,
 		ContentType:    "application/json",
-		Headers:        []string{"x-header1", "x-header2"},
+		Headers:        []string{"x-goog-header1:true", "x-goog-header2:false"},
 	})
 	if err != nil {
 		t.Error(err)
 	}
 	want := "https://storage.googleapis.com/bucket-name/object-name?" +
 		"Expires=1033570800&GoogleAccessId=xxx%40clientid&Signature=" +
-		"gHlh63sOxJnNj22X%2B%2F4kwOSNMeqwXWr4udEfrzJPQcq1xzxA8ovMM5SOrOc%" +
-		"2FuE%2Ftc9%2Bq7a42CDBwZff1PsvuJMBDaPbluU257h%2Bvxx8lHMnb%2Bg1wD1" +
-		"99FiCE014MRH9TlIg%2FdXRkErosVWTy4GqAgZemmKHo0HwDGT6IovB9mdg%3D"
+		"TiyKD%2FgGb6Kh0kkb2iF%2FfF%2BnTx7L0J4YiZua8AcTmnidutePEGIU5" +
+		"NULYlrGl6l52gz4zqFb3VFfIRTcPXMdXnnFdMCDhz2QuJBUpsU1Ai9zlyTQ" +
+		"dkb6ShG03xz9%2BEXWAUQO4GBybJw%2FULASuv37xA00SwLdkqj8YdyS5II" +
+		"1lro%3D"
 	if url != want {
 		t.Fatalf("Unexpected signed URL; found %v", url)
 	}
@@ -106,7 +158,7 @@
 		MD5:         "ICy5YqxZB1uWSwcVLSNLcA==",
 		Expires:     expires,
 		ContentType: "application/json",
-		Headers:     []string{"x-header1", "x-header2"},
+		Headers:     []string{"x-goog-header1:true", "x-goog-header2:false"},
 	})
 	if err != nil {
 		t.Error(err)
@@ -129,16 +181,16 @@
 		MD5:            "ICy5YqxZB1uWSwcVLSNLcA==",
 		Expires:        expires,
 		ContentType:    "application/json",
-		Headers:        []string{"x-header1", "x-header2"},
+		Headers:        []string{"x-goog-header1:true", "x-goog-header2:false"},
 	})
 	if err != nil {
 		t.Error(err)
 	}
 	want := "https://storage.googleapis.com/bucket-name/object%20name%E7%95%8C?" +
-		"Expires=1033570800&GoogleAccessId=xxx%40clientid&Signature=" +
-		"LSxs1YwXNKOa7mQv1ZAI2ao0Fuv6yXLLU7%2BQ97z2B7hYZ57OiFwQ72EdGXSiIM" +
-		"JwLisEKkwoSlYCMm3uuTdgJtXXVi7SYXMfdeKaonyQwMv531KETCBTSewt8CW%2B" +
-		"FaUJ5SEYG44SeJCiqeIr3GF7t90UNWs6TdFXDaKShpQzBGg%3D"
+		"Expires=1033570800&GoogleAccessId=xxx%40clientid&Signature=bxVH1%2Bl%2" +
+		"BSxpnj3XuqKz6mOFk6M94Y%2B4w85J6FCmJan%2FNhGSpndP6fAw1uLHlOn%2F8xUaY%2F" +
+		"SfZ5GzcQ%2BbxOL1WA37yIwZ7xgLYlO%2ByAi3GuqMUmHZiNCai28emODXQ8RtWHvgv6dE" +
+		"SQ%2F0KpDMIWW7rYCaUa63UkUyeSQsKhrVqkIA%3D"
 	if url != want {
 		t.Fatalf("Unexpected signed URL; found %v", url)
 	}
@@ -325,7 +377,7 @@
 		MD5:            "ICy5YqxZB1uWSwcVLSNLcA==",
 		Expires:        time.Date(2002, time.October, 2, 10, 0, 0, 0, time.UTC),
 		ContentType:    "application/json",
-		Headers:        []string{"x-header1", "x-header2"},
+		Headers:        []string{"x-goog-header1", "x-goog-header2"},
 	}
 
 	for _, test := range tests {