bigquery/storage/managedwriter/adapt/protoconversion.go - gocloud - Git at Google

 // Copyright 2021 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package adapt

 import (
 	"encoding/base64"
 	"fmt"
 	"sort"
 	"strings"

 	"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/reflect/protodesc"
 	"google.golang.org/protobuf/reflect/protoreflect"
 	"google.golang.org/protobuf/types/descriptorpb"
 	"google.golang.org/protobuf/types/known/wrapperspb"
 )

 var bqModeToFieldLabelMapProto2 = map[storagepb.TableFieldSchema_Mode]descriptorpb.FieldDescriptorProto_Label{
 	storagepb.TableFieldSchema_NULLABLE: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL,
 	storagepb.TableFieldSchema_REPEATED: descriptorpb.FieldDescriptorProto_LABEL_REPEATED,
 	storagepb.TableFieldSchema_REQUIRED: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED,
 }

 var bqModeToFieldLabelMapProto3 = map[storagepb.TableFieldSchema_Mode]descriptorpb.FieldDescriptorProto_Label{
 	storagepb.TableFieldSchema_NULLABLE: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL,
 	storagepb.TableFieldSchema_REPEATED: descriptorpb.FieldDescriptorProto_LABEL_REPEATED,
 	storagepb.TableFieldSchema_REQUIRED: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL,
 }

 func convertModeToLabel(mode storagepb.TableFieldSchema_Mode, useProto3 bool) *descriptorpb.FieldDescriptorProto_Label {
 	if useProto3 {
 		return bqModeToFieldLabelMapProto3[mode].Enum()
 	}
 	return bqModeToFieldLabelMapProto2[mode].Enum()
 }

 // Allows conversion between BQ schema type and FieldDescriptorProto's type.
 var bqTypeToFieldTypeMap = map[storagepb.TableFieldSchema_Type]descriptorpb.FieldDescriptorProto_Type{
 	storagepb.TableFieldSchema_BIGNUMERIC: descriptorpb.FieldDescriptorProto_TYPE_BYTES,
 	storagepb.TableFieldSchema_BOOL:       descriptorpb.FieldDescriptorProto_TYPE_BOOL,
 	storagepb.TableFieldSchema_BYTES:      descriptorpb.FieldDescriptorProto_TYPE_BYTES,
 	storagepb.TableFieldSchema_DATE:       descriptorpb.FieldDescriptorProto_TYPE_INT32,
 	storagepb.TableFieldSchema_DATETIME:   descriptorpb.FieldDescriptorProto_TYPE_INT64,
 	storagepb.TableFieldSchema_DOUBLE:     descriptorpb.FieldDescriptorProto_TYPE_DOUBLE,
 	storagepb.TableFieldSchema_GEOGRAPHY:  descriptorpb.FieldDescriptorProto_TYPE_STRING,
 	storagepb.TableFieldSchema_INT64:      descriptorpb.FieldDescriptorProto_TYPE_INT64,
 	storagepb.TableFieldSchema_NUMERIC:    descriptorpb.FieldDescriptorProto_TYPE_BYTES,
 	storagepb.TableFieldSchema_STRING:     descriptorpb.FieldDescriptorProto_TYPE_STRING,
 	storagepb.TableFieldSchema_STRUCT:     descriptorpb.FieldDescriptorProto_TYPE_MESSAGE,
 	storagepb.TableFieldSchema_TIME:       descriptorpb.FieldDescriptorProto_TYPE_INT64,
 	storagepb.TableFieldSchema_TIMESTAMP:  descriptorpb.FieldDescriptorProto_TYPE_INT64,
 	storagepb.TableFieldSchema_RANGE:      descriptorpb.FieldDescriptorProto_TYPE_MESSAGE,
 }

 var allowedRangeTypes = []storagepb.TableFieldSchema_Type{
 	storagepb.TableFieldSchema_DATE,
 	storagepb.TableFieldSchema_DATETIME,
 	storagepb.TableFieldSchema_TIMESTAMP,
 }

 // Primitive types which can leverage packed encoding when repeated/arrays.
 //
 // Note: many/most of these aren't used when doing schema to proto conversion, but
 // are included for completeness.
 var packedTypes = []descriptorpb.FieldDescriptorProto_Type{
 	descriptorpb.FieldDescriptorProto_TYPE_INT32,
 	descriptorpb.FieldDescriptorProto_TYPE_INT64,
 	descriptorpb.FieldDescriptorProto_TYPE_UINT32,
 	descriptorpb.FieldDescriptorProto_TYPE_UINT64,
 	descriptorpb.FieldDescriptorProto_TYPE_SINT32,
 	descriptorpb.FieldDescriptorProto_TYPE_SINT64,
 	descriptorpb.FieldDescriptorProto_TYPE_FIXED32,
 	descriptorpb.FieldDescriptorProto_TYPE_FIXED64,
 	descriptorpb.FieldDescriptorProto_TYPE_SFIXED32,
 	descriptorpb.FieldDescriptorProto_TYPE_SFIXED64,
 	descriptorpb.FieldDescriptorProto_TYPE_FLOAT,
 	descriptorpb.FieldDescriptorProto_TYPE_DOUBLE,
 	descriptorpb.FieldDescriptorProto_TYPE_BOOL,
 	descriptorpb.FieldDescriptorProto_TYPE_ENUM,
 }

 // For TableFieldSchema OPTIONAL mode, we use the wrapper types to allow for the
 // proper representation of NULL values, as proto3 semantics would just use default value.
 var bqTypeToWrapperMap = map[storagepb.TableFieldSchema_Type]string{
 	storagepb.TableFieldSchema_BIGNUMERIC: ".google.protobuf.BytesValue",
 	storagepb.TableFieldSchema_BOOL:       ".google.protobuf.BoolValue",
 	storagepb.TableFieldSchema_BYTES:      ".google.protobuf.BytesValue",
 	storagepb.TableFieldSchema_DATE:       ".google.protobuf.Int32Value",
 	storagepb.TableFieldSchema_DATETIME:   ".google.protobuf.Int64Value",
 	storagepb.TableFieldSchema_DOUBLE:     ".google.protobuf.DoubleValue",
 	storagepb.TableFieldSchema_GEOGRAPHY:  ".google.protobuf.StringValue",
 	storagepb.TableFieldSchema_INT64:      ".google.protobuf.Int64Value",
 	storagepb.TableFieldSchema_NUMERIC:    ".google.protobuf.BytesValue",
 	storagepb.TableFieldSchema_STRING:     ".google.protobuf.StringValue",
 	storagepb.TableFieldSchema_TIME:       ".google.protobuf.Int64Value",
 	storagepb.TableFieldSchema_TIMESTAMP:  ".google.protobuf.Int64Value",
 }

 // filename used by well known types proto
 var wellKnownTypesWrapperName = "google/protobuf/wrappers.proto"

 var rangeTypesPrefix = "rangemessage_range_"

 // dependencyCache is used to reduce the number of unique messages we generate by caching based on the tableschema.
 //
 // Keys are based on the base64-encoded serialized tableschema value.
 type dependencyCache struct {
 	// keyed by element type
 	rangeTypes map[storagepb.TableFieldSchema_Type]protoreflect.MessageDescriptor
 	// general cache
 	msgs map[string]protoreflect.MessageDescriptor
 }

 func newDependencyCache() *dependencyCache {
 	return &dependencyCache{
 		rangeTypes: make(map[storagepb.TableFieldSchema_Type]protoreflect.MessageDescriptor),
 		msgs:       make(map[string]protoreflect.MessageDescriptor),
 	}
 }

 func (dm *dependencyCache) get(schema *storagepb.TableSchema) protoreflect.MessageDescriptor {
 	if dm == nil {
 		return nil
 	}
 	b, err := proto.Marshal(schema)
 	if err != nil {
 		return nil
 	}
 	encoded := base64.StdEncoding.EncodeToString(b)
 	if desc, ok := dm.msgs[encoded]; ok {
 		return desc
 	}
 	return nil
 }

 func (dm *dependencyCache) getFileDescriptorProtos() []*descriptorpb.FileDescriptorProto {
 	var fdpList []*descriptorpb.FileDescriptorProto
 	// emit encountered messages.
 	for _, d := range dm.msgs {
 		if fd := d.ParentFile(); fd != nil {
 			fdp := protodesc.ToFileDescriptorProto(fd)
 			fdpList = append(fdpList, fdp)
 		}
 	}
 	// emit any range value types used.
 	for _, d := range dm.rangeTypes {
 		if fd := d.ParentFile(); fd != nil {
 			fdp := protodesc.ToFileDescriptorProto(fd)
 			fdpList = append(fdpList, fdp)
 		}
 	}
 	return fdpList
 }

 func (dm *dependencyCache) add(schema *storagepb.TableSchema, descriptor protoreflect.MessageDescriptor) error {
 	if dm == nil {
 		return fmt.Errorf("cache is nil")
 	}
 	b, err := proto.Marshal(schema)
 	if err != nil {
 		return fmt.Errorf("failed to serialize tableschema: %w", err)
 	}
 	encoded := base64.StdEncoding.EncodeToString(b)
 	dm.msgs[encoded] = descriptor
 	return nil
 }

 func (dm *dependencyCache) addRangeByElementType(typ storagepb.TableFieldSchema_Type, useProto3 bool) (protoreflect.MessageDescriptor, error) {
 	if md, present := dm.rangeTypes[typ]; present {
 		// already added, do nothing.
 		return md, nil
 	}
 	// Not yet present.  Build the message.
 	allowed := false
 	for _, a := range allowedRangeTypes {
 		if typ == a {
 			allowed = true
 		}
 	}
 	if !allowed {
 		return nil, fmt.Errorf("range does not support %q as a valid element type", typ.String())
 	}
 	ts := &storagepb.TableSchema{
 		Fields: []*storagepb.TableFieldSchema{
 			{
 				Name: "start",
 				Type: typ,
 				Mode: storagepb.TableFieldSchema_NULLABLE,
 			},
 			{
 				Name: "end",
 				Type: typ,
 				Mode: storagepb.TableFieldSchema_NULLABLE,
 			},
 		},
 	}
 	// we put the range types outside the hierarchical namespace as they're effectively BQ-specific well-known types.
 	msgTypeName := fmt.Sprintf("%s%s", rangeTypesPrefix, strings.ToLower(typ.String()))
 	// use a new dependency cache, as we don't want to taint the main one due to matching schema
 	md, err := storageSchemaToDescriptorInternal(ts, msgTypeName, newDependencyCache(), useProto3)
 	if err != nil {
 		return nil, fmt.Errorf("failed to generate range descriptor %q: %v", msgTypeName, err)
 	}
 	dm.rangeTypes[typ] = md
 	return md, nil
 }

 func (dm *dependencyCache) getRange(typ storagepb.TableFieldSchema_Type) protoreflect.MessageDescriptor {
 	md, ok := dm.rangeTypes[typ]
 	if !ok {
 		return nil
 	}
 	return md
 }

 // StorageSchemaToProto2Descriptor builds a protoreflect.Descriptor for a given table schema using proto2 syntax.
 func StorageSchemaToProto2Descriptor(inSchema *storagepb.TableSchema, scope string) (protoreflect.Descriptor, error) {
 	dc := newDependencyCache()
 	// TODO: b/193064992 tracks support for wrapper types.  In the interim, disable wrapper usage.
 	return storageSchemaToDescriptorInternal(inSchema, scope, dc, false)
 }

 // StorageSchemaToProto3Descriptor builds a protoreflect.Descriptor for a given table schema using proto3 syntax.
 //
 // NOTE: Currently the write API doesn't yet support proto3 behaviors (default value, wrapper types, etc), but this is provided for
 // completeness.
 func StorageSchemaToProto3Descriptor(inSchema *storagepb.TableSchema, scope string) (protoreflect.Descriptor, error) {
 	dc := newDependencyCache()
 	return storageSchemaToDescriptorInternal(inSchema, scope, dc, true)
 }

 // Internal implementation of the conversion code.
 func storageSchemaToDescriptorInternal(inSchema *storagepb.TableSchema, scope string, cache *dependencyCache, useProto3 bool) (protoreflect.MessageDescriptor, error) {
 	if inSchema == nil {
 		return nil, newConversionError(scope, fmt.Errorf("no input schema was provided"))
 	}

 	var fields []*descriptorpb.FieldDescriptorProto
 	var deps []protoreflect.FileDescriptor
 	var fNumber int32

 	for _, f := range inSchema.GetFields() {
 		fNumber = fNumber + 1
 		currentScope := fmt.Sprintf("%s__%s", scope, f.GetName())

 		if f.Type == storagepb.TableFieldSchema_STRUCT {
 			// If we're dealing with a STRUCT type, we must deal with sub messages.
 			// As multiple submessages may share the same type definition, we use a dependency cache
 			// and interrogate it / populate it as we're going.
 			foundDesc := cache.get(&storagepb.TableSchema{Fields: f.GetFields()})
 			if foundDesc != nil {
 				// check to see if we already have this in current dependency list
 				haveDep := false
 				for _, dep := range deps {
 					if messageDependsOnFile(foundDesc, dep) {
 						haveDep = true
 						break
 					}
 				}
 				// If dep is missing, add to current dependencies.
 				if !haveDep {
 					deps = append(deps, foundDesc.ParentFile())
 				}
 				// Construct field descriptor for the message.
 				fdp, err := tableFieldSchemaToFieldDescriptorProto(f, fNumber, string(foundDesc.FullName()), useProto3)
 				if err != nil {
 					return nil, newConversionError(scope, fmt.Errorf("couldn't convert field to FieldDescriptorProto: %w", err))
 				}
 				fields = append(fields, fdp)
 			} else {
 				// Wrap the current struct's fields in a TableSchema outer message, and then build the submessage.
 				ts := &storagepb.TableSchema{
 					Fields: f.GetFields(),
 				}
 				desc, err := storageSchemaToDescriptorInternal(ts, currentScope, cache, useProto3)
 				if err != nil {
 					return nil, newConversionError(currentScope, fmt.Errorf("couldn't convert message: %w", err))
 				}
 				// Now that we have the submessage definition, we append it both to the local dependencies, as well
 				// as inserting it into the cache for possible reuse elsewhere.
 				deps = append(deps, desc.ParentFile())
 				err = cache.add(ts, desc)
 				if err != nil {
 					return nil, newConversionError(currentScope, fmt.Errorf("failed to add descriptor to dependency cache: %w", err))
 				}
 				fdp, err := tableFieldSchemaToFieldDescriptorProto(f, fNumber, currentScope, useProto3)
 				if err != nil {
 					return nil, newConversionError(currentScope, fmt.Errorf("couldn't compute field schema : %w", err))
 				}
 				fields = append(fields, fdp)
 			}
 		} else {
 			if f.Type == storagepb.TableFieldSchema_RANGE {
 				// Range handling is a special case of general struct handling.
 				ret := f.GetRangeElementType()
 				if ret == nil {
 					return nil, fmt.Errorf("field %q is a RANGE, but doesn't include RangeElementType info", f.GetName())
 				}
 				foundDesc, err := cache.addRangeByElementType(ret.GetType(), useProto3)
 				if err != nil {
 					return nil, err
 				}
 				if foundDesc != nil {
 					haveDep := false
 					for _, dep := range deps {
 						if messageDependsOnFile(foundDesc, dep) {
 							haveDep = true
 							break
 						}
 					}
 					// If dep is missing, add to current dependencies.
 					if !haveDep {
 						deps = append(deps, foundDesc.ParentFile())
 					}
 				}
 			}
 			fd, err := tableFieldSchemaToFieldDescriptorProto(f, fNumber, currentScope, useProto3)
 			if err != nil {
 				return nil, newConversionError(currentScope, err)
 			}
 			fields = append(fields, fd)
 		}
 	}
 	// Start constructing a DescriptorProto.
 	dp := &descriptorpb.DescriptorProto{
 		Name:  proto.String(scope),
 		Field: fields,
 	}

 	// Use the local dependencies to generate a list of filenames.
 	depNames := []string{wellKnownTypesWrapperName}
 	for _, d := range deps {
 		depNames = append(depNames, d.ParentFile().Path())
 	}

 	// Now, construct a FileDescriptorProto.
 	fdp := &descriptorpb.FileDescriptorProto{
 		MessageType: []*descriptorpb.DescriptorProto{dp},
 		Name:        proto.String(fmt.Sprintf("%s.proto", scope)),
 		Syntax:      proto.String("proto3"),
 		Dependency:  depNames,
 	}
 	if !useProto3 {
 		fdp.Syntax = proto.String("proto2")
 	}

 	// We'll need a FileDescriptorSet as we have a FileDescriptorProto for the current
 	// descriptor we're building, but we need to include all the referenced dependencies.

 	fdpList := []*descriptorpb.FileDescriptorProto{
 		fdp,
 		protodesc.ToFileDescriptorProto(wrapperspb.File_google_protobuf_wrappers_proto),
 	}
 	fdpList = append(fdpList, cache.getFileDescriptorProtos()...)

 	fds := &descriptorpb.FileDescriptorSet{
 		File: fdpList,
 	}

 	// Load the set into a registry, then interrogate it for the descriptor corresponding to the top level message.
 	files, err := protodesc.NewFiles(fds)
 	if err != nil {
 		return nil, err
 	}
 	found, err := files.FindDescriptorByName(protoreflect.FullName(scope))
 	if err != nil {
 		return nil, err
 	}
 	return found.(protoreflect.MessageDescriptor), nil
 }

 // messageDependsOnFile checks if the given message descriptor already belongs to the file descriptor.
 // To check for that, first we check if the message descriptor parent file is the same as the file descriptor.
 // If not, check if the message descriptor belongs is contained as a child of the file descriptor.
 func messageDependsOnFile(msg protoreflect.MessageDescriptor, file protoreflect.FileDescriptor) bool {
 	parentFile := msg.ParentFile()
 	parentFileName := parentFile.FullName()
 	if parentFileName != "" {
 		if parentFileName == file.FullName() {
 			return true
 		}
 	}
 	fileMessages := file.Messages()
 	for i := 0; i < fileMessages.Len(); i++ {
 		childMsg := fileMessages.Get(i)
 		if msg.FullName() == childMsg.FullName() {
 			return true
 		}
 	}
 	return false
 }

 // tableFieldSchemaToFieldDescriptorProto builds individual field descriptors for a proto message.
 //
 // For proto3, in cases where the mode is nullable we use the well known wrapper types.
 // For proto2, we propagate the mode->label annotation as expected.
 //
 // Messages are always nullable, and repeated fields are as well.
 func tableFieldSchemaToFieldDescriptorProto(field *storagepb.TableFieldSchema, idx int32, scope string, useProto3 bool) (*descriptorpb.FieldDescriptorProto, error) {
 	name := field.GetName()
 	var fdp *descriptorpb.FieldDescriptorProto

 	if field.GetType() == storagepb.TableFieldSchema_STRUCT {
 		fdp = &descriptorpb.FieldDescriptorProto{
 			Name:     proto.String(name),
 			Number:   proto.Int32(idx),
 			TypeName: proto.String(scope),
 			Label:    convertModeToLabel(field.GetMode(), useProto3),
 		}
 	} else if field.GetType() == storagepb.TableFieldSchema_RANGE {
 		fdp = &descriptorpb.FieldDescriptorProto{
 			Name:     proto.String(name),
 			Number:   proto.Int32(idx),
 			TypeName: proto.String(fmt.Sprintf("%s%s", rangeTypesPrefix, strings.ToLower(field.GetRangeElementType().GetType().String()))),
 			Label:    convertModeToLabel(field.GetMode(), useProto3),
 		}
 	} else {
 		// For (REQUIRED||REPEATED) fields for proto3, or all cases for proto2, we can use the expected scalar types.
 		if field.GetMode() != storagepb.TableFieldSchema_NULLABLE || !useProto3 {
 			outType := bqTypeToFieldTypeMap[field.GetType()]
 			fdp = &descriptorpb.FieldDescriptorProto{
 				Name:   proto.String(name),
 				Number: proto.Int32(idx),
 				Type:   outType.Enum(),
 				Label:  convertModeToLabel(field.GetMode(), useProto3),
 			}

 			// Special case: proto2 repeated fields may benefit from using packed annotation.
 			if field.GetMode() == storagepb.TableFieldSchema_REPEATED && !useProto3 {
 				for _, v := range packedTypes {
 					if outType == v {
 						fdp.Options = &descriptorpb.FieldOptions{
 							Packed: proto.Bool(true),
 						}
 						break
 					}
 				}
 			}
 		} else {
 			// For NULLABLE proto3 fields, use a wrapper type.
 			fdp = &descriptorpb.FieldDescriptorProto{
 				Name:     proto.String(name),
 				Number:   proto.Int32(idx),
 				Type:     descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
 				TypeName: proto.String(bqTypeToWrapperMap[field.GetType()]),
 				Label:    descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
 			}
 		}
 	}
 	if nameRequiresAnnotation(name) {
 		// Use a prefix + base64 encoded name when annotations bear the actual name.
 		// Base 64 standard encoding may also contain certain characters (+,/,=) which
 		// we remove from the generated name.
 		encoded := strings.Trim(base64.StdEncoding.EncodeToString([]byte(name)), "+/=")
 		fdp.Name = proto.String(fmt.Sprintf("col_%s", encoded))
 		opts := fdp.GetOptions()
 		if opts == nil {
 			fdp.Options = &descriptorpb.FieldOptions{}
 		}
 		proto.SetExtension(fdp.Options, storagepb.E_ColumnName, name)
 	}
 	return fdp, nil
 }

 // nameRequiresAnnotation determines whether a field name requires unicode-annotation.
 func nameRequiresAnnotation(in string) bool {
 	return !protoreflect.Name(in).IsValid()
 }

 // NormalizeDescriptor builds a self-contained DescriptorProto suitable for communicating schema
 // information with the BigQuery Storage write API.  It's primarily used for cases where users are
 // interested in sending data using a predefined protocol buffer message.
 //
 // The storage API accepts a single DescriptorProto for decoding message data.  In many cases, a message
 // is comprised of multiple independent messages, from the same .proto file or from multiple sources.  Rather
 // than being forced to communicate all these messages independently, what this method does is rewrite the
 // DescriptorProto to inline all messages as nested submessages.  As the backend only cares about the types
 // and not the namespaces when decoding, this is sufficient for the needs of the API's representation.
 //
 // In addition to nesting messages, this method also handles some encapsulation of enum types to avoid possible
 // conflicts due to ambiguities, and clears oneof indices as oneof isn't a concept that maps into BigQuery
 // schemas.
 //
 // To enable proto3 usage, this function will also rewrite proto3 descriptors into equivalent proto2 form.
 // Such rewrites include setting the appropriate default values for proto3 fields.
 func NormalizeDescriptor(in protoreflect.MessageDescriptor) (*descriptorpb.DescriptorProto, error) {
 	return normalizeDescriptorInternal(in, newStringSet(), newStringSet(), newStringSet(), nil)
 }

 func normalizeDescriptorInternal(in protoreflect.MessageDescriptor, visitedTypes, enumTypes, structTypes *stringSet, root *descriptorpb.DescriptorProto) (*descriptorpb.DescriptorProto, error) {
 	if in == nil {
 		return nil, fmt.Errorf("no messagedescriptor provided")
 	}
 	resultDP := &descriptorpb.DescriptorProto{}
 	if root == nil {
 		root = resultDP
 	}
 	fullProtoName := string(in.FullName())
 	resultDP.Name = proto.String(normalizeName(fullProtoName))
 	visitedTypes.add(fullProtoName)
 	for i := 0; i < in.Fields().Len(); i++ {
 		inField := in.Fields().Get(i)
 		resultFDP := protodesc.ToFieldDescriptorProto(inField)
 		// For messages without explicit presence, use default values to match implicit presence behavior.
 		if !inField.HasPresence() && inField.Cardinality() != protoreflect.Repeated {
 			switch resultFDP.GetType() {
 			case descriptorpb.FieldDescriptorProto_TYPE_BOOL:
 				resultFDP.DefaultValue = proto.String("false")
 			case descriptorpb.FieldDescriptorProto_TYPE_BYTES, descriptorpb.FieldDescriptorProto_TYPE_STRING:
 				resultFDP.DefaultValue = proto.String("")
 			case descriptorpb.FieldDescriptorProto_TYPE_ENUM:
 				// Resolve the proto3 default value.  The default value should be the value name.
 				defValue := inField.Enum().Values().ByNumber(inField.Default().Enum())
 				resultFDP.DefaultValue = proto.String(string(defValue.Name()))
 			case descriptorpb.FieldDescriptorProto_TYPE_DOUBLE,
 				descriptorpb.FieldDescriptorProto_TYPE_FLOAT,
 				descriptorpb.FieldDescriptorProto_TYPE_INT64,
 				descriptorpb.FieldDescriptorProto_TYPE_UINT64,
 				descriptorpb.FieldDescriptorProto_TYPE_INT32,
 				descriptorpb.FieldDescriptorProto_TYPE_FIXED64,
 				descriptorpb.FieldDescriptorProto_TYPE_FIXED32,
 				descriptorpb.FieldDescriptorProto_TYPE_UINT32,
 				descriptorpb.FieldDescriptorProto_TYPE_SFIXED32,
 				descriptorpb.FieldDescriptorProto_TYPE_SFIXED64,
 				descriptorpb.FieldDescriptorProto_TYPE_SINT32,
 				descriptorpb.FieldDescriptorProto_TYPE_SINT64:
 				resultFDP.DefaultValue = proto.String("0")
 			}
 		}
 		// Clear proto3 optional annotation, as the backend converter can
 		// treat this as a proto2 optional.
 		if resultFDP.Proto3Optional != nil {
 			resultFDP.Proto3Optional = nil
 		}
 		if resultFDP.OneofIndex != nil {
 			resultFDP.OneofIndex = nil
 		}
 		if inField.Kind() == protoreflect.MessageKind || inField.Kind() == protoreflect.GroupKind {
 			// Handle fields that reference messages.
 			// Groups are a proto2-ism which predated nested messages.
 			msgFullName := string(inField.Message().FullName())
 			if !skipNormalization(msgFullName) {
 				// for everything but well known types, normalize.
 				normName := normalizeName(string(msgFullName))
 				if structTypes.contains(msgFullName) {
 					resultFDP.TypeName = proto.String(normName)
 				} else {
 					if visitedTypes.contains(msgFullName) {
 						return nil, fmt.Errorf("recursive type not supported: %s", inField.FullName())
 					}
 					visitedTypes.add(msgFullName)
 					dp, err := normalizeDescriptorInternal(inField.Message(), visitedTypes, enumTypes, structTypes, root)
 					if err != nil {
 						return nil, fmt.Errorf("error converting message %s: %v", inField.FullName(), err)
 					}
 					root.NestedType = append(root.NestedType, dp)
 					visitedTypes.delete(msgFullName)
 					lastNested := root.GetNestedType()[len(root.GetNestedType())-1].GetName()
 					resultFDP.TypeName = proto.String(lastNested)
 				}
 			}
 		}
 		if inField.Kind() == protoreflect.EnumKind {
 			// For enums, in order to avoid value conflict, we will always define
 			// a enclosing struct called enum_full_name_E that includes the actual
 			// enum.
 			enumFullName := string(inField.Enum().FullName())
 			enclosingTypeName := normalizeName(enumFullName) + "_E"
 			enumName := string(inField.Enum().Name())
 			actualFullName := fmt.Sprintf("%s.%s", enclosingTypeName, enumName)
 			if enumTypes.contains(enumFullName) {
 				resultFDP.TypeName = proto.String(actualFullName)
 			} else {
 				enumDP := protodesc.ToEnumDescriptorProto(inField.Enum())
 				enumDP.Name = proto.String(enumName)
 				// Ensure values in enum are sorted.
 				vals := enumDP.GetValue()
 				sort.SliceStable(vals, func(i, j int) bool {
 					return vals[i].GetNumber() < vals[j].GetNumber()
 				})
 				// Append wrapped enum to nested types.
 				root.NestedType = append(root.NestedType, &descriptorpb.DescriptorProto{
 					Name:     proto.String(enclosingTypeName),
 					EnumType: []*descriptorpb.EnumDescriptorProto{enumDP},
 				})
 				resultFDP.TypeName = proto.String(actualFullName)
 				enumTypes.add(enumFullName)
 			}
 		}
 		resultDP.Field = append(resultDP.Field, resultFDP)
 	}
 	// To reduce comparison jitter, order the common slices fields where possible.
 	//
 	// First, fields are sorted by ID number.
 	fields := resultDP.GetField()
 	sort.SliceStable(fields, func(i, j int) bool {
 		return fields[i].GetNumber() < fields[j].GetNumber()
 	})
 	// Then, sort nested messages in NestedType by name.
 	nested := resultDP.GetNestedType()
 	sort.SliceStable(nested, func(i, j int) bool {
 		return nested[i].GetName() < nested[j].GetName()
 	})
 	structTypes.add(fullProtoName)
 	return resultDP, nil
 }

 type stringSet struct {
 	m map[string]struct{}
 }

 func (s *stringSet) contains(k string) bool {
 	_, ok := s.m[k]
 	return ok
 }

 func (s *stringSet) add(k string) {
 	s.m[k] = struct{}{}
 }

 func (s *stringSet) delete(k string) {
 	delete(s.m, k)
 }

 func newStringSet() *stringSet {
 	return &stringSet{
 		m: make(map[string]struct{}),
 	}
 }

 func normalizeName(in string) string {
 	return strings.Replace(in, ".", "_", -1)
 }

 // These types don't get normalized into the fully-contained structure.
 var normalizationSkipList = []string{
 	/*
 		TODO: when backend supports resolving well known types, this list should be enabled.
 		"google.protobuf.DoubleValue",
 		"google.protobuf.FloatValue",
 		"google.protobuf.Int64Value",
 		"google.protobuf.UInt64Value",
 		"google.protobuf.Int32Value",
 		"google.protobuf.Uint32Value",
 		"google.protobuf.BoolValue",
 		"google.protobuf.StringValue",
 		"google.protobuf.BytesValue",
 	*/
 }

 func skipNormalization(fullName string) bool {
 	for _, v := range normalizationSkipList {
 		if v == fullName {
 			return true
 		}
 	}
 	return false
 }