| // Copyright 2016 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package vision |
| |
| import ( |
| "image/color" |
| "math" |
| |
| "cloud.google.com/go/internal/version" |
| vkit "cloud.google.com/go/vision/apiv1" |
| "golang.org/x/net/context" |
| "google.golang.org/api/option" |
| pb "google.golang.org/genproto/googleapis/cloud/vision/v1" |
| cpb "google.golang.org/genproto/googleapis/type/color" |
| ) |
| |
| // Scope is the OAuth2 scope required by the Google Cloud Vision API. |
| const Scope = "https://www.googleapis.com/auth/cloud-platform" |
| |
| // Client is a Google Cloud Vision API client. |
| type Client struct { |
| client *vkit.ImageAnnotatorClient |
| } |
| |
| // NewClient creates a new vision client. |
| func NewClient(ctx context.Context, opts ...option.ClientOption) (*Client, error) { |
| c, err := vkit.NewImageAnnotatorClient(ctx, opts...) |
| if err != nil { |
| return nil, err |
| } |
| c.SetGoogleClientInfo("gccl", version.Repo) |
| return &Client{client: c}, nil |
| } |
| |
| // Close closes the client. |
| func (c *Client) Close() error { |
| return c.client.Close() |
| } |
| |
| // Annotate annotates multiple images, each with a potentially different set |
| // of features. |
| func (c *Client) Annotate(ctx context.Context, requests ...*AnnotateRequest) ([]*Annotations, error) { |
| var reqs []*pb.AnnotateImageRequest |
| for _, r := range requests { |
| reqs = append(reqs, r.toProto()) |
| } |
| res, err := c.client.BatchAnnotateImages(ctx, &pb.BatchAnnotateImagesRequest{Requests: reqs}) |
| if err != nil { |
| return nil, err |
| } |
| var results []*Annotations |
| for _, res := range res.Responses { |
| results = append(results, annotationsFromProto(res)) |
| } |
| return results, nil |
| } |
| |
| // An AnnotateRequest specifies an image to annotate and the features to look for in that image. |
| type AnnotateRequest struct { |
| // Image is the image to annotate. |
| Image *Image |
| // MaxFaces is the maximum number of faces to detect in the image. |
| // Specifying a number greater than zero enables face detection. |
| MaxFaces int |
| // MaxLandmarks is the maximum number of landmarks to detect in the image. |
| // Specifying a number greater than zero enables landmark detection. |
| MaxLandmarks int |
| // MaxLogos is the maximum number of logos to detect in the image. |
| // Specifying a number greater than zero enables logo detection. |
| MaxLogos int |
| // MaxLabels is the maximum number of labels to detect in the image. |
| // Specifying a number greater than zero enables labels detection. |
| MaxLabels int |
| // MaxTexts is the maximum number of separate pieces of text to detect in the |
| // image. Specifying a number greater than zero enables text detection. |
| MaxTexts int |
| // DocumentText specifies whether a dense text document OCR should be run |
| // on the image. When true, takes precedence over MaxTexts. |
| DocumentText bool |
| // SafeSearch specifies whether a safe-search detection should be run on the image. |
| SafeSearch bool |
| // ImageProps specifies whether image properties should be obtained for the image. |
| ImageProps bool |
| // Web specifies whether web annotations should be obtained for the image. |
| Web bool |
| // CropHints specifies whether crop hints should be computed for the image. |
| CropHints *CropHintsParams |
| } |
| |
| func (ar *AnnotateRequest) toProto() *pb.AnnotateImageRequest { |
| img, ictx := ar.Image.toProtos() |
| var features []*pb.Feature |
| add := func(typ pb.Feature_Type, max int) { |
| var mr int32 |
| if max > math.MaxInt32 { |
| mr = math.MaxInt32 |
| } else { |
| mr = int32(max) |
| } |
| features = append(features, &pb.Feature{Type: typ, MaxResults: mr}) |
| } |
| if ar.MaxFaces > 0 { |
| add(pb.Feature_FACE_DETECTION, ar.MaxFaces) |
| } |
| if ar.MaxLandmarks > 0 { |
| add(pb.Feature_LANDMARK_DETECTION, ar.MaxLandmarks) |
| } |
| if ar.MaxLogos > 0 { |
| add(pb.Feature_LOGO_DETECTION, ar.MaxLogos) |
| } |
| if ar.MaxLabels > 0 { |
| add(pb.Feature_LABEL_DETECTION, ar.MaxLabels) |
| } |
| if ar.MaxTexts > 0 { |
| add(pb.Feature_TEXT_DETECTION, ar.MaxTexts) |
| } |
| if ar.DocumentText { |
| add(pb.Feature_DOCUMENT_TEXT_DETECTION, 0) |
| } |
| if ar.SafeSearch { |
| add(pb.Feature_SAFE_SEARCH_DETECTION, 0) |
| } |
| if ar.ImageProps { |
| add(pb.Feature_IMAGE_PROPERTIES, 0) |
| } |
| if ar.Web { |
| add(pb.Feature_WEB_DETECTION, 0) |
| } |
| if ar.CropHints != nil { |
| add(pb.Feature_CROP_HINTS, 0) |
| if ictx == nil { |
| ictx = &pb.ImageContext{} |
| } |
| ictx.CropHintsParams = &pb.CropHintsParams{ |
| AspectRatios: ar.CropHints.AspectRatios, |
| } |
| } |
| return &pb.AnnotateImageRequest{ |
| Image: img, |
| Features: features, |
| ImageContext: ictx, |
| } |
| } |
| |
| // CropHintsParams are parameters for a request for crop hints. |
| type CropHintsParams struct { |
| // Aspect ratios for desired crop hints, representing the ratio of the |
| // width to the height of the image. For example, if the desired aspect |
| // ratio is 4:3, the corresponding float value should be 1.33333. If not |
| // specified, the best possible crop is returned. The number of provided |
| // aspect ratios is limited to a maximum of 16; any aspect ratios provided |
| // after the 16th are ignored. |
| AspectRatios []float32 |
| } |
| |
| // Called for a single image and a single feature. |
| func (c *Client) annotateOne(ctx context.Context, req *AnnotateRequest) (*Annotations, error) { |
| annsSlice, err := c.Annotate(ctx, req) |
| if err != nil { |
| return nil, err |
| } |
| anns := annsSlice[0] |
| // When there is only one image and one feature, the Annotations.Error field is |
| // unambiguously about that one detection, so we "promote" it to the error return value. |
| return anns, anns.Error |
| } |
| |
| // TODO(jba): add examples for all single-feature functions (below). |
| |
| // DetectFaces performs face detection on the image. |
| // At most maxResults results are returned. |
| func (c *Client) DetectFaces(ctx context.Context, img *Image, maxResults int) ([]*FaceAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, MaxFaces: maxResults}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Faces, nil |
| } |
| |
| // DetectLandmarks performs landmark detection on the image. |
| // At most maxResults results are returned. |
| func (c *Client) DetectLandmarks(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, MaxLandmarks: maxResults}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Landmarks, nil |
| } |
| |
| // DetectLogos performs logo detection on the image. |
| // At most maxResults results are returned. |
| func (c *Client) DetectLogos(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, MaxLogos: maxResults}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Logos, nil |
| } |
| |
| // DetectLabels performs label detection on the image. |
| // At most maxResults results are returned. |
| func (c *Client) DetectLabels(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, MaxLabels: maxResults}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Labels, nil |
| } |
| |
| // DetectTexts performs text detection on the image. |
| // At most maxResults results are returned. |
| func (c *Client) DetectTexts(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, MaxTexts: maxResults}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Texts, nil |
| } |
| |
| // DetectDocumentText performs full text (OCR) detection on the image. |
| func (c *Client) DetectDocumentText(ctx context.Context, img *Image) (*TextAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, DocumentText: true}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.FullText, nil |
| } |
| |
| // DetectSafeSearch performs safe-search detection on the image. |
| func (c *Client) DetectSafeSearch(ctx context.Context, img *Image) (*SafeSearchAnnotation, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, SafeSearch: true}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.SafeSearch, nil |
| } |
| |
| // DetectImageProps computes properties of the image. |
| func (c *Client) DetectImageProps(ctx context.Context, img *Image) (*ImageProps, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, ImageProps: true}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.ImageProps, nil |
| } |
| |
| // DetectWeb computes a web annotation on the image. |
| func (c *Client) DetectWeb(ctx context.Context, img *Image) (*WebDetection, error) { |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, Web: true}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.Web, nil |
| } |
| |
| // CropHints computes crop hints for the image. |
| func (c *Client) CropHints(ctx context.Context, img *Image, params *CropHintsParams) ([]*CropHint, error) { |
| // A nil AnnotateRequest.CropHints means do not perform CropHints. But |
| // here the user is explicitly asking for CropHints, so treat nil as |
| // an empty CropHintsParams. |
| if params == nil { |
| params = &CropHintsParams{} |
| } |
| anns, err := c.annotateOne(ctx, &AnnotateRequest{Image: img, CropHints: params}) |
| if err != nil { |
| return nil, err |
| } |
| return anns.CropHints, nil |
| } |
| |
| // A Likelihood is an approximate representation of a probability. |
| type Likelihood int |
| |
| const ( |
| // LikelihoodUnknown means the likelihood is unknown. |
| LikelihoodUnknown = Likelihood(pb.Likelihood_UNKNOWN) |
| |
| // VeryUnlikely means the image is very unlikely to belong to the feature specified. |
| VeryUnlikely = Likelihood(pb.Likelihood_VERY_UNLIKELY) |
| |
| // Unlikely means the image is unlikely to belong to the feature specified. |
| Unlikely = Likelihood(pb.Likelihood_UNLIKELY) |
| |
| // Possible means the image possibly belongs to the feature specified. |
| Possible = Likelihood(pb.Likelihood_POSSIBLE) |
| |
| // Likely means the image is likely to belong to the feature specified. |
| Likely = Likelihood(pb.Likelihood_LIKELY) |
| |
| // VeryLikely means the image is very likely to belong to the feature specified. |
| VeryLikely = Likelihood(pb.Likelihood_VERY_LIKELY) |
| ) |
| |
| // A Property is an arbitrary name-value pair. |
| type Property struct { |
| Name string |
| Value string |
| } |
| |
| func propertyFromProto(p *pb.Property) Property { |
| return Property{Name: p.Name, Value: p.Value} |
| } |
| |
| // ColorInfo consists of RGB channels, score and fraction of |
| // image the color occupies in the image. |
| type ColorInfo struct { |
| // RGB components of the color. |
| Color color.NRGBA64 |
| |
| // Score is the image-specific score for this color, in the range [0, 1]. |
| Score float32 |
| |
| // PixelFraction is the fraction of pixels the color occupies in the image, |
| // in the range [0, 1]. |
| PixelFraction float32 |
| } |
| |
| func colorInfoFromProto(ci *pb.ColorInfo) *ColorInfo { |
| return &ColorInfo{ |
| Color: colorFromProto(ci.Color), |
| Score: ci.Score, |
| PixelFraction: ci.PixelFraction, |
| } |
| } |
| |
| // Should this go into protobuf/ptypes? The color proto is in google/types, so |
| // not specific to this API. |
| func colorFromProto(c *cpb.Color) color.NRGBA64 { |
| // Convert a color component from [0.0, 1.0] to a uint16. |
| cvt := func(f float32) uint16 { return uint16(f*math.MaxUint16 + 0.5) } |
| |
| var alpha float32 = 1 |
| if c.Alpha != nil { |
| alpha = c.Alpha.Value |
| } |
| return color.NRGBA64{ |
| R: cvt(c.Red), |
| G: cvt(c.Green), |
| B: cvt(c.Blue), |
| A: cvt(alpha), |
| } |
| } |