storage: select which attributes are returned for listing

This is added as an optimization, since a lot of the time taken for
listing large buckets is spent encoding and decoding attributes in JSON.
Clients who are only interested in listing object names don't need a lot
of the data that gets encoded and sent down the wire, so using a new
selection in Query they can speed up their listing by up to 33%

Change-Id: I52b2e357bf85a716af0301616eec7cd949f3330e
Reviewed-on: https://code-review.googlesource.com/c/gocloud/+/47892
Reviewed-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Frank Natividad <franknatividad@google.com>
diff --git a/storage/bucket.go b/storage/bucket.go
index dccfc98..3cea6be 100644
--- a/storage/bucket.go
+++ b/storage/bucket.go
@@ -1150,6 +1150,9 @@
 	req.Delimiter(it.query.Delimiter)
 	req.Prefix(it.query.Prefix)
 	req.Versions(it.query.Versions)
+	if len(it.query.fieldSelection) > 0 {
+		req.Fields("nextPageToken", googleapi.Field(it.query.fieldSelection))
+	}
 	req.PageToken(pageToken)
 	if it.bucket.userProject != "" {
 		req.UserProject(it.bucket.userProject)
diff --git a/storage/doc.go b/storage/doc.go
index 88f6459..5f9a62b 100644
--- a/storage/doc.go
+++ b/storage/doc.go
@@ -117,6 +117,33 @@
     fmt.Printf("object %s has size %d and can be read using %s\n",
         objAttrs.Name, objAttrs.Size, objAttrs.MediaLink)
 
+Listing objects
+
+Listing objects in a bucket is done with the Bucket.Objects method:
+
+    query := &storage.Query{Prefix: ""}
+
+    var names []string
+    it := bkt.Objects(ctx, query)
+    for {
+        attrs, err := it.Next()
+        if err == iterator.Done {
+            break
+        }
+        if err != nil {
+            log.Fatal(err)
+        }
+        names = append(names, attrs.Name)
+    }
+
+If only a subset of object attributes is needed when listing, specifying this
+subset using Query.SetAttrSelection may speed up the listing process:
+
+    query := &storage.Query{Prefix: ""}
+    query.SetAttrSelection([]string{"Name"})
+
+    // ... as before
+
 ACLs
 
 Both objects and buckets have ACLs (Access Control Lists). An ACL is a list of
diff --git a/storage/integration_test.go b/storage/integration_test.go
index 0783bee..c272ca4 100644
--- a/storage/integration_test.go
+++ b/storage/integration_test.go
@@ -639,6 +639,7 @@
 	}
 
 	testObjectIterator(t, bkt, objects)
+	testObjectsIterateSelectedAttrs(t, bkt, objects)
 
 	// Test Reader.
 	for _, obj := range objects {
@@ -1041,6 +1042,39 @@
 	// TODO(jba): test query.Delimiter != ""
 }
 
+func testObjectsIterateSelectedAttrs(t *testing.T, bkt *BucketHandle, objects []string) {
+	// Create a query that will only select the "Name" attr of objects, and
+	// invoke object listing.
+	query := &Query{Prefix: ""}
+	query.SetAttrSelection([]string{"Name"})
+
+	var gotNames []string
+	it := bkt.Objects(context.Background(), query)
+	for {
+		attrs, err := it.Next()
+		if err == iterator.Done {
+			break
+		}
+		if err != nil {
+			log.Fatal(err)
+		}
+		gotNames = append(gotNames, attrs.Name)
+
+		if len(attrs.Bucket) > 0 {
+			t.Errorf("Bucket field not selected, want empty, got = %v", attrs.Bucket)
+		}
+	}
+
+	sortedNames := make([]string, len(objects))
+	copy(sortedNames, objects)
+	sort.Strings(sortedNames)
+	sort.Strings(gotNames)
+
+	if !cmp.Equal(sortedNames, gotNames) {
+		t.Errorf("names = %v, want %v", gotNames, sortedNames)
+	}
+}
+
 func TestIntegration_SignedURL(t *testing.T) {
 	if testing.Short() { // do not test during replay
 		t.Skip("Integration tests skipped in short mode")
diff --git a/storage/storage.go b/storage/storage.go
index 59a859b..c85cc83 100644
--- a/storage/storage.go
+++ b/storage/storage.go
@@ -1131,6 +1131,78 @@
 	// Versions indicates whether multiple versions of the same
 	// object will be included in the results.
 	Versions bool
+
+	// fieldSelection is used to select only specific fields to be returned by
+	// the query. It's used internally and is populated for the user by
+	// calling Query.SetAttrSelection
+	fieldSelection string
+}
+
+// attrToFieldMap maps the field names of ObjectAttrs to the underlying field
+// names in the API call. Only the ObjectAttrs field names are visible to users
+// because they are already part of the public API of the package.
+var attrToFieldMap = map[string]string{
+	"Bucket":                  "bucket",
+	"Name":                    "name",
+	"ContentType":             "contentType",
+	"ContentLanguage":         "contentLanguage",
+	"CacheControl":            "cacheControl",
+	"EventBasedHold":          "eventBasedHold",
+	"TemporaryHold":           "temporaryHold",
+	"RetentionExpirationTime": "retentionExpirationTime",
+	"ACL":                     "acl",
+	"Owner":                   "owner",
+	"ContentEncoding":         "contentEncoding",
+	"ContentDisposition":      "contentDisposition",
+	"Size":                    "size",
+	"MD5":                     "md5hash",
+	"CRC32C":                  "crc32c",
+	"MediaLink":               "mediaLink",
+	"Metadata":                "metadata",
+	"Generation":              "generation",
+	"Metageneration":          "metageneration",
+	"StorageClass":            "storageClass",
+	"CustomerKeySHA256":       "customerEncryption",
+	"KMSKeyName":              "kmsKeyName",
+	"Created":                 "timeCreated",
+	"Deleted":                 "timeDeleted",
+	"Updated":                 "timeUpdated",
+	"Etag":                    "etag",
+}
+
+// SetAttrSelection makes the query populate only specific attributes of
+// objects. When iterating over objects, if you only need each object's name
+// and size, pass []string{"Name", "Size"} to this method. Only these fields
+// will be fetched for each object across the network; the other fields of
+// ObjectAttr will remain at their default values. This is a performance
+// optimization; for more information, see
+// https://cloud.google.com/storage/docs/json_api/v1/how-tos/performance
+func (q *Query) SetAttrSelection(attrs []string) error {
+	fieldSet := make(map[string]bool)
+
+	for _, attr := range attrs {
+		field, ok := attrToFieldMap[attr]
+		if !ok {
+			return fmt.Errorf("storage: attr %v is not valid", attr)
+		}
+		fieldSet[field] = true
+	}
+
+	if len(fieldSet) > 0 {
+		var b strings.Builder
+		b.WriteString("items(")
+		first := true
+		for field := range fieldSet {
+			if !first {
+				b.WriteString(",")
+			}
+			first = false
+			b.WriteString(field)
+		}
+		b.WriteString(")")
+		q.fieldSelection = b.String()
+	}
+	return nil
 }
 
 // Conditions constrain methods to act on specific generations of
diff --git a/storage/storage_test.go b/storage/storage_test.go
index 8b3b1fd..2b12e41 100644
--- a/storage/storage_test.go
+++ b/storage/storage_test.go
@@ -26,6 +26,7 @@
 	"net/http"
 	"net/http/httptest"
 	"net/url"
+	"reflect"
 	"regexp"
 	"sort"
 	"strings"
@@ -1112,3 +1113,33 @@
 		}
 	}
 }
+
+func TestAttrToFieldMapCoverage(t *testing.T) {
+	t.Parallel()
+
+	oa := reflect.TypeOf((*ObjectAttrs)(nil)).Elem()
+	oaFields := make(map[string]bool)
+
+	for i := 0; i < oa.NumField(); i++ {
+		fieldName := oa.Field(i).Name
+		oaFields[fieldName] = true
+	}
+
+	// Check that all fields of attrToFieldMap exist in ObjectAttrs.
+	for k := range attrToFieldMap {
+		if _, ok := oaFields[k]; !ok {
+			t.Errorf("%v is not an ObjectAttrs field", k)
+		}
+	}
+
+	// Check that all fields of ObjectAttrs exist in attrToFieldMap, with
+	// known exceptions which aren't sent over the wire but are settable by
+	// the user.
+	for k := range oaFields {
+		if _, ok := attrToFieldMap[k]; !ok {
+			if k != "Prefix" && k != "PredefinedACL" {
+				t.Errorf("ObjectAttrs.%v is not in attrToFieldMap", k)
+			}
+		}
+	}
+}