storage: select which attributes are returned for listing
This is added as an optimization, since a lot of the time taken for
listing large buckets is spent encoding and decoding attributes in JSON.
Clients who are only interested in listing object names don't need a lot
of the data that gets encoded and sent down the wire, so using a new
selection in Query they can speed up their listing by up to 33%
Change-Id: I52b2e357bf85a716af0301616eec7cd949f3330e
Reviewed-on: https://code-review.googlesource.com/c/gocloud/+/47892
Reviewed-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Frank Natividad <franknatividad@google.com>
diff --git a/storage/bucket.go b/storage/bucket.go
index dccfc98..3cea6be 100644
--- a/storage/bucket.go
+++ b/storage/bucket.go
@@ -1150,6 +1150,9 @@
req.Delimiter(it.query.Delimiter)
req.Prefix(it.query.Prefix)
req.Versions(it.query.Versions)
+ if len(it.query.fieldSelection) > 0 {
+ req.Fields("nextPageToken", googleapi.Field(it.query.fieldSelection))
+ }
req.PageToken(pageToken)
if it.bucket.userProject != "" {
req.UserProject(it.bucket.userProject)
diff --git a/storage/doc.go b/storage/doc.go
index 88f6459..5f9a62b 100644
--- a/storage/doc.go
+++ b/storage/doc.go
@@ -117,6 +117,33 @@
fmt.Printf("object %s has size %d and can be read using %s\n",
objAttrs.Name, objAttrs.Size, objAttrs.MediaLink)
+Listing objects
+
+Listing objects in a bucket is done with the Bucket.Objects method:
+
+ query := &storage.Query{Prefix: ""}
+
+ var names []string
+ it := bkt.Objects(ctx, query)
+ for {
+ attrs, err := it.Next()
+ if err == iterator.Done {
+ break
+ }
+ if err != nil {
+ log.Fatal(err)
+ }
+ names = append(names, attrs.Name)
+ }
+
+If only a subset of object attributes is needed when listing, specifying this
+subset using Query.SetAttrSelection may speed up the listing process:
+
+ query := &storage.Query{Prefix: ""}
+ query.SetAttrSelection([]string{"Name"})
+
+ // ... as before
+
ACLs
Both objects and buckets have ACLs (Access Control Lists). An ACL is a list of
diff --git a/storage/integration_test.go b/storage/integration_test.go
index 0783bee..c272ca4 100644
--- a/storage/integration_test.go
+++ b/storage/integration_test.go
@@ -639,6 +639,7 @@
}
testObjectIterator(t, bkt, objects)
+ testObjectsIterateSelectedAttrs(t, bkt, objects)
// Test Reader.
for _, obj := range objects {
@@ -1041,6 +1042,39 @@
// TODO(jba): test query.Delimiter != ""
}
+func testObjectsIterateSelectedAttrs(t *testing.T, bkt *BucketHandle, objects []string) {
+ // Create a query that will only select the "Name" attr of objects, and
+ // invoke object listing.
+ query := &Query{Prefix: ""}
+ query.SetAttrSelection([]string{"Name"})
+
+ var gotNames []string
+ it := bkt.Objects(context.Background(), query)
+ for {
+ attrs, err := it.Next()
+ if err == iterator.Done {
+ break
+ }
+ if err != nil {
+ log.Fatal(err)
+ }
+ gotNames = append(gotNames, attrs.Name)
+
+ if len(attrs.Bucket) > 0 {
+ t.Errorf("Bucket field not selected, want empty, got = %v", attrs.Bucket)
+ }
+ }
+
+ sortedNames := make([]string, len(objects))
+ copy(sortedNames, objects)
+ sort.Strings(sortedNames)
+ sort.Strings(gotNames)
+
+ if !cmp.Equal(sortedNames, gotNames) {
+ t.Errorf("names = %v, want %v", gotNames, sortedNames)
+ }
+}
+
func TestIntegration_SignedURL(t *testing.T) {
if testing.Short() { // do not test during replay
t.Skip("Integration tests skipped in short mode")
diff --git a/storage/storage.go b/storage/storage.go
index 59a859b..c85cc83 100644
--- a/storage/storage.go
+++ b/storage/storage.go
@@ -1131,6 +1131,78 @@
// Versions indicates whether multiple versions of the same
// object will be included in the results.
Versions bool
+
+ // fieldSelection is used to select only specific fields to be returned by
+ // the query. It's used internally and is populated for the user by
+ // calling Query.SetAttrSelection
+ fieldSelection string
+}
+
+// attrToFieldMap maps the field names of ObjectAttrs to the underlying field
+// names in the API call. Only the ObjectAttrs field names are visible to users
+// because they are already part of the public API of the package.
+var attrToFieldMap = map[string]string{
+ "Bucket": "bucket",
+ "Name": "name",
+ "ContentType": "contentType",
+ "ContentLanguage": "contentLanguage",
+ "CacheControl": "cacheControl",
+ "EventBasedHold": "eventBasedHold",
+ "TemporaryHold": "temporaryHold",
+ "RetentionExpirationTime": "retentionExpirationTime",
+ "ACL": "acl",
+ "Owner": "owner",
+ "ContentEncoding": "contentEncoding",
+ "ContentDisposition": "contentDisposition",
+ "Size": "size",
+ "MD5": "md5hash",
+ "CRC32C": "crc32c",
+ "MediaLink": "mediaLink",
+ "Metadata": "metadata",
+ "Generation": "generation",
+ "Metageneration": "metageneration",
+ "StorageClass": "storageClass",
+ "CustomerKeySHA256": "customerEncryption",
+ "KMSKeyName": "kmsKeyName",
+ "Created": "timeCreated",
+ "Deleted": "timeDeleted",
+ "Updated": "timeUpdated",
+ "Etag": "etag",
+}
+
+// SetAttrSelection makes the query populate only specific attributes of
+// objects. When iterating over objects, if you only need each object's name
+// and size, pass []string{"Name", "Size"} to this method. Only these fields
+// will be fetched for each object across the network; the other fields of
+// ObjectAttr will remain at their default values. This is a performance
+// optimization; for more information, see
+// https://cloud.google.com/storage/docs/json_api/v1/how-tos/performance
+func (q *Query) SetAttrSelection(attrs []string) error {
+ fieldSet := make(map[string]bool)
+
+ for _, attr := range attrs {
+ field, ok := attrToFieldMap[attr]
+ if !ok {
+ return fmt.Errorf("storage: attr %v is not valid", attr)
+ }
+ fieldSet[field] = true
+ }
+
+ if len(fieldSet) > 0 {
+ var b strings.Builder
+ b.WriteString("items(")
+ first := true
+ for field := range fieldSet {
+ if !first {
+ b.WriteString(",")
+ }
+ first = false
+ b.WriteString(field)
+ }
+ b.WriteString(")")
+ q.fieldSelection = b.String()
+ }
+ return nil
}
// Conditions constrain methods to act on specific generations of
diff --git a/storage/storage_test.go b/storage/storage_test.go
index 8b3b1fd..2b12e41 100644
--- a/storage/storage_test.go
+++ b/storage/storage_test.go
@@ -26,6 +26,7 @@
"net/http"
"net/http/httptest"
"net/url"
+ "reflect"
"regexp"
"sort"
"strings"
@@ -1112,3 +1113,33 @@
}
}
}
+
+func TestAttrToFieldMapCoverage(t *testing.T) {
+ t.Parallel()
+
+ oa := reflect.TypeOf((*ObjectAttrs)(nil)).Elem()
+ oaFields := make(map[string]bool)
+
+ for i := 0; i < oa.NumField(); i++ {
+ fieldName := oa.Field(i).Name
+ oaFields[fieldName] = true
+ }
+
+ // Check that all fields of attrToFieldMap exist in ObjectAttrs.
+ for k := range attrToFieldMap {
+ if _, ok := oaFields[k]; !ok {
+ t.Errorf("%v is not an ObjectAttrs field", k)
+ }
+ }
+
+ // Check that all fields of ObjectAttrs exist in attrToFieldMap, with
+ // known exceptions which aren't sent over the wire but are settable by
+ // the user.
+ for k := range oaFields {
+ if _, ok := attrToFieldMap[k]; !ok {
+ if k != "Prefix" && k != "PredefinedACL" {
+ t.Errorf("ObjectAttrs.%v is not in attrToFieldMap", k)
+ }
+ }
+ }
+}