| // Copyright 2017 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package profiler is a client for the Google Cloud Profiler service. |
| // |
| // This package is still experimental and subject to change. |
| // |
| // Usage example: |
| // |
| // import "cloud.google.com/go/profiler" |
| // ... |
| // err := profiler.Start(profiler.Config{Service: "my-service"}) |
| // if err != nil { |
| // // TODO: Handle error. |
| // } |
| // |
| // Calling Start will start a goroutine to collect profiles and |
| // upload to Cloud Profiler server, at the rhythm specified by |
| // the server. |
| // |
| // The caller must provide the service string in the config, and |
| // may provide other information as well. See Config for details. |
| package profiler |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "log" |
| "os" |
| "runtime/pprof" |
| "sync" |
| "time" |
| |
| gcemd "cloud.google.com/go/compute/metadata" |
| "cloud.google.com/go/internal/version" |
| "github.com/golang/protobuf/proto" |
| "github.com/golang/protobuf/ptypes" |
| gax "github.com/googleapis/gax-go" |
| "golang.org/x/net/context" |
| "google.golang.org/api/option" |
| gtransport "google.golang.org/api/transport/grpc" |
| pb "google.golang.org/genproto/googleapis/devtools/cloudprofiler/v2" |
| edpb "google.golang.org/genproto/googleapis/rpc/errdetails" |
| "google.golang.org/grpc" |
| "google.golang.org/grpc/codes" |
| grpcmd "google.golang.org/grpc/metadata" |
| "google.golang.org/grpc/status" |
| ) |
| |
| var ( |
| config Config |
| startOnce sync.Once |
| // The functions below are stubbed to be overrideable for testing. |
| getProjectID = gcemd.ProjectID |
| getInstanceName = gcemd.InstanceName |
| getZone = gcemd.Zone |
| startCPUProfile = pprof.StartCPUProfile |
| stopCPUProfile = pprof.StopCPUProfile |
| writeHeapProfile = pprof.WriteHeapProfile |
| sleep = gax.Sleep |
| dialGRPC = gtransport.Dial |
| onGCE = gcemd.OnGCE |
| ) |
| |
| const ( |
| apiAddress = "cloudprofiler.googleapis.com:443" |
| xGoogAPIMetadata = "x-goog-api-client" |
| zoneNameLabel = "zone" |
| versionLabel = "version" |
| instanceLabel = "instance" |
| scope = "https://www.googleapis.com/auth/monitoring.write" |
| |
| initialBackoff = time.Second |
| // Ensure the agent will recover within 1 hour. |
| maxBackoff = time.Hour |
| backoffMultiplier = 1.3 // Backoff envelope increases by this factor on each retry. |
| retryInfoMetadata = "google.rpc.retryinfo-bin" |
| ) |
| |
| // Config is the profiler configuration. |
| type Config struct { |
| // Service (or deprecated Target) must be provided to start the profiler. |
| // It specifies the name of the service under which the profiled data |
| // will be recorded and exposed at the Cloud Profiler UI for the project. |
| // You can specify an arbitrary string, but see Deployment.target at |
| // https://github.com/googleapis/googleapis/blob/master/google/devtools/cloudprofiler/v2/profiler.proto |
| // for restrictions. |
| // NOTE: The string should be the same across different replicas of |
| // your service so that the globally constant profiling rate is |
| // maintained. Do not put things like PID or unique pod ID in the name. |
| Service string |
| |
| // ServiceVersion is an optional field specifying the version of the |
| // service. It can be an arbitrary string. Cloud Profiler profiles |
| // once per minute for each version of each service in each zone. |
| // ServiceVersion defaults to an empty string. |
| ServiceVersion string |
| |
| // DebugLogging enables detailed debug logging from profiler. It |
| // defaults to false. |
| DebugLogging bool |
| |
| // ProjectID is the Cloud Console project ID to use instead of |
| // the one read from the VM metadata server. |
| // |
| // Set this if you are running the agent in your local environment |
| // or anywhere else outside of Google Cloud Platform. |
| ProjectID string |
| |
| // APIAddr is the HTTP endpoint to use to connect to the profiler |
| // agent API. Defaults to the production environment, overridable |
| // for testing. |
| APIAddr string |
| |
| // Target is deprecated, use Service instead. |
| Target string |
| |
| instance string |
| zone string |
| } |
| |
| // startError represents the error occured during the |
| // initializating and starting of the agent. |
| var startError error |
| |
| // Start starts a goroutine to collect and upload profiles. The |
| // caller must provide the service string in the config. See |
| // Config for details. Start should only be called once. Any |
| // additional calls will be ignored. |
| func Start(cfg Config, options ...option.ClientOption) error { |
| startOnce.Do(func() { |
| startError = start(cfg, options...) |
| }) |
| return startError |
| } |
| |
| func start(cfg Config, options ...option.ClientOption) error { |
| if err := initializeConfig(cfg); err != nil { |
| debugLog("failed to initialize config: %v", err) |
| return err |
| } |
| |
| ctx := context.Background() |
| |
| opts := []option.ClientOption{ |
| option.WithEndpoint(config.APIAddr), |
| option.WithScopes(scope), |
| } |
| opts = append(opts, options...) |
| |
| conn, err := dialGRPC(ctx, opts...) |
| if err != nil { |
| debugLog("failed to dial GRPC: %v", err) |
| return err |
| } |
| |
| a := initializeAgent(pb.NewProfilerServiceClient(conn)) |
| go pollProfilerService(withXGoogHeader(ctx), a) |
| return nil |
| } |
| |
| func debugLog(format string, e ...interface{}) { |
| if config.DebugLogging { |
| log.Printf(format, e...) |
| } |
| } |
| |
| // agent polls Cloud Profiler server for instructions on behalf of |
| // a task, and collects and uploads profiles as requested. |
| type agent struct { |
| client pb.ProfilerServiceClient |
| deployment *pb.Deployment |
| profileLabels map[string]string |
| } |
| |
| // abortedBackoffDuration retrieves the retry duration from gRPC trailing |
| // metadata, which is set by Cloud Profiler server. |
| func abortedBackoffDuration(md grpcmd.MD) (time.Duration, error) { |
| elem := md[retryInfoMetadata] |
| if len(elem) <= 0 { |
| return 0, errors.New("no retry info") |
| } |
| |
| var retryInfo edpb.RetryInfo |
| if err := proto.Unmarshal([]byte(elem[0]), &retryInfo); err != nil { |
| return 0, err |
| } else if time, err := ptypes.Duration(retryInfo.RetryDelay); err != nil { |
| return 0, err |
| } else { |
| if time < 0 { |
| return 0, errors.New("negative retry duration") |
| } |
| return time, nil |
| } |
| } |
| |
| type retryer struct { |
| backoff gax.Backoff |
| md grpcmd.MD |
| } |
| |
| func (r *retryer) Retry(err error) (time.Duration, bool) { |
| st, _ := status.FromError(err) |
| if st != nil && st.Code() == codes.Aborted { |
| dur, err := abortedBackoffDuration(r.md) |
| if err == nil { |
| return dur, true |
| } |
| debugLog("failed to get backoff duration: %v", err) |
| } |
| return r.backoff.Pause(), true |
| } |
| |
| // createProfile talks to Cloud Profiler server to create profile. In |
| // case of error, the goroutine will sleep and retry. Sleep duration may |
| // be specified by the server. Otherwise it will be an exponentially |
| // increasing value, bounded by maxBackoff. |
| func (a *agent) createProfile(ctx context.Context) *pb.Profile { |
| req := pb.CreateProfileRequest{ |
| Deployment: a.deployment, |
| ProfileType: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP}, |
| } |
| |
| var p *pb.Profile |
| md := grpcmd.New(map[string]string{}) |
| |
| gax.Invoke(ctx, func(ctx context.Context, settings gax.CallSettings) error { |
| var err error |
| p, err = a.client.CreateProfile(ctx, &req, grpc.Trailer(&md)) |
| return err |
| }, gax.WithRetry(func() gax.Retryer { |
| return &retryer{ |
| backoff: gax.Backoff{ |
| Initial: initialBackoff, |
| Max: maxBackoff, |
| Multiplier: backoffMultiplier, |
| }, |
| md: md, |
| } |
| })) |
| |
| debugLog("successfully created profile %v", p.GetProfileType()) |
| return p |
| } |
| |
| func (a *agent) profileAndUpload(ctx context.Context, p *pb.Profile) { |
| var prof bytes.Buffer |
| pt := p.GetProfileType() |
| |
| switch pt { |
| case pb.ProfileType_CPU: |
| duration, err := ptypes.Duration(p.Duration) |
| if err != nil { |
| debugLog("failed to get profile duration: %v", err) |
| return |
| } |
| if err := startCPUProfile(&prof); err != nil { |
| debugLog("failed to start CPU profile: %v", err) |
| return |
| } |
| sleep(ctx, duration) |
| stopCPUProfile() |
| case pb.ProfileType_HEAP: |
| if err := writeHeapProfile(&prof); err != nil { |
| debugLog("failed to write heap profile: %v", err) |
| return |
| } |
| default: |
| debugLog("unexpected profile type: %v", pt) |
| return |
| } |
| |
| // Starting Go 1.9 the profiles are symbolized by runtime/pprof. |
| // TODO(jianqiaoli): Remove the symbolization code when we decide to |
| // stop supporting Go 1.8. |
| if !shouldAssumeSymbolized { |
| if err := parseAndSymbolize(&prof); err != nil { |
| debugLog("failed to symbolize profile: %v", err) |
| } |
| } |
| |
| p.ProfileBytes = prof.Bytes() |
| p.Labels = a.profileLabels |
| req := pb.UpdateProfileRequest{Profile: p} |
| |
| // Upload profile, discard profile in case of error. |
| debugLog("start uploading profile") |
| if _, err := a.client.UpdateProfile(ctx, &req); err != nil { |
| debugLog("failed to upload profile: %v", err) |
| } |
| } |
| |
| // withXGoogHeader sets the name and version of the application in |
| // the `x-goog-api-client` header passed on each request. Intended for |
| // use by Google-written clients. |
| func withXGoogHeader(ctx context.Context, keyval ...string) context.Context { |
| kv := append([]string{"gl-go", version.Go(), "gccl", version.Repo}, keyval...) |
| kv = append(kv, "gax", gax.Version, "grpc", grpc.Version) |
| |
| md, _ := grpcmd.FromOutgoingContext(ctx) |
| md = md.Copy() |
| md[xGoogAPIMetadata] = []string{gax.XGoogHeader(kv...)} |
| return grpcmd.NewOutgoingContext(ctx, md) |
| } |
| |
| func initializeAgent(c pb.ProfilerServiceClient) *agent { |
| labels := map[string]string{} |
| if config.zone != "" { |
| labels[zoneNameLabel] = config.zone |
| } |
| if config.ServiceVersion != "" { |
| labels[versionLabel] = config.ServiceVersion |
| } |
| d := &pb.Deployment{ |
| ProjectId: config.ProjectID, |
| Target: config.Target, |
| Labels: labels, |
| } |
| |
| profileLabels := map[string]string{} |
| |
| if config.instance != "" { |
| profileLabels[instanceLabel] = config.instance |
| } |
| |
| return &agent{ |
| client: c, |
| deployment: d, |
| profileLabels: profileLabels, |
| } |
| } |
| |
| func initializeConfig(cfg Config) error { |
| config = cfg |
| |
| switch { |
| case config.Service != "": |
| config.Target = config.Service |
| case config.Target == "": |
| config.Target = os.Getenv("GAE_SERVICE") |
| } |
| |
| if config.Target == "" { |
| return errors.New("service name must be specified in the configuration") |
| } |
| |
| if config.ServiceVersion == "" { |
| config.ServiceVersion = os.Getenv("GAE_VERSION") |
| } |
| |
| if onGCE() { |
| var err error |
| if config.ProjectID == "" { |
| if config.ProjectID, err = getProjectID(); err != nil { |
| return fmt.Errorf("failed to get the project ID from Compute Engine: %v", err) |
| } |
| } |
| |
| if config.zone, err = getZone(); err != nil { |
| return fmt.Errorf("failed to get zone from Compute Engine: %v", err) |
| } |
| |
| if config.instance, err = getInstanceName(); err != nil { |
| return fmt.Errorf("failed to get instance from Compute Engine: %v", err) |
| } |
| |
| } else { |
| if config.ProjectID == "" { |
| return fmt.Errorf("project ID must be specified in the configuration if running outside of GCP") |
| } |
| } |
| |
| if config.APIAddr == "" { |
| config.APIAddr = apiAddress |
| } |
| return nil |
| } |
| |
| // pollProfilerService starts an endless loop to poll Cloud Profiler |
| // server for instructions, and collects and uploads profiles as |
| // requested. |
| func pollProfilerService(ctx context.Context, a *agent) { |
| for { |
| p := a.createProfile(ctx) |
| a.profileAndUpload(ctx, p) |
| } |
| } |