blob: 5de7de05b1002f423a006dda2d3e456474de41bf [file] [log] [blame]
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vision
import (
"image"
pb "google.golang.org/genproto/googleapis/cloud/vision/v1"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
// Annotations contains all the annotations performed by the API on a single image.
// A nil field indicates either that the corresponding feature was not requested,
// or that annotation failed for that feature.
type Annotations struct {
// Faces holds the results of face detection.
Faces []*FaceAnnotation
// Landmarks holds the results of landmark detection.
Landmarks []*EntityAnnotation
// Logos holds the results of logo detection.
Logos []*EntityAnnotation
// Labels holds the results of label detection.
Labels []*EntityAnnotation
// Texts holds the results of text detection.
Texts []*EntityAnnotation
// SafeSearch holds the results of safe-search detection.
SafeSearch *SafeSearchAnnotation
// ImageProps contains properties of the annotated image.
ImageProps *ImageProps
// If non-nil, then one or more of the attempted annotations failed.
// Non-nil annotations are guaranteed to be correct, even if Error is
// non-nil.
Error error
}
func annotationsFromProto(res *pb.AnnotateImageResponse) *Annotations {
as := &Annotations{}
for _, a := range res.FaceAnnotations {
as.Faces = append(as.Faces, faceAnnotationFromProto(a))
}
for _, a := range res.LandmarkAnnotations {
as.Landmarks = append(as.Landmarks, entityAnnotationFromProto(a))
}
for _, a := range res.LogoAnnotations {
as.Logos = append(as.Logos, entityAnnotationFromProto(a))
}
for _, a := range res.LabelAnnotations {
as.Labels = append(as.Labels, entityAnnotationFromProto(a))
}
for _, a := range res.TextAnnotations {
as.Texts = append(as.Texts, entityAnnotationFromProto(a))
}
as.SafeSearch = safeSearchAnnotationFromProto(res.SafeSearchAnnotation)
as.ImageProps = imagePropertiesFromProto(res.ImagePropertiesAnnotation)
if res.Error != nil {
// res.Error is a google.rpc.Status. Convert to a Go error. Use a gRPC
// error because it preserves the code as a separate field.
// TODO(jba): preserve the details field.
as.Error = grpc.Errorf(codes.Code(res.Error.Code), "%s", res.Error.Message)
}
return as
}
// A FaceAnnotation describes the results of face detection on an image.
type FaceAnnotation struct {
// BoundingPoly is the bounding polygon around the face. The coordinates of
// the bounding box are in the original image's scale, as returned in
// ImageParams. The bounding box is computed to "frame" the face in
// accordance with human expectations. It is based on the landmarker
// results. Note that one or more x and/or y coordinates may not be
// generated in the BoundingPoly (the polygon will be unbounded) if only a
// partial face appears in the image to be annotated.
BoundingPoly []image.Point
// FDBoundingPoly is tighter than BoundingPoly, and
// encloses only the skin part of the face. Typically, it is used to
// eliminate the face from any image analysis that detects the "amount of
// skin" visible in an image. It is not based on the landmarker results, only
// on the initial face detection, hence the fd (face detection) prefix.
FDBoundingPoly []image.Point
// Landmarks are detected face landmarks.
Face FaceLandmarks
// RollAngle indicates the amount of clockwise/anti-clockwise rotation of
// the face relative to the image vertical, about the axis perpendicular to
// the face. Range [-180,180].
RollAngle float32
// PanAngle is the yaw angle: the leftward/rightward angle that the face is
// pointing, relative to the vertical plane perpendicular to the image. Range
// [-180,180].
PanAngle float32
// TiltAngle is the pitch angle: the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
TiltAngle float32
// DetectionConfidence is the detection confidence. The range is [0, 1].
DetectionConfidence float32
// LandmarkingConfidence is the face landmarking confidence. The range is [0, 1].
LandmarkingConfidence float32
// Likelihoods expresses the likelihood of various aspects of the face.
Likelihoods *FaceLikelihoods
}
func faceAnnotationFromProto(pfa *pb.FaceAnnotation) *FaceAnnotation {
fa := &FaceAnnotation{
BoundingPoly: boundingPolyFromProto(pfa.BoundingPoly),
FDBoundingPoly: boundingPolyFromProto(pfa.FdBoundingPoly),
RollAngle: pfa.RollAngle,
PanAngle: pfa.PanAngle,
TiltAngle: pfa.TiltAngle,
DetectionConfidence: pfa.DetectionConfidence,
LandmarkingConfidence: pfa.LandmarkingConfidence,
Likelihoods: &FaceLikelihoods{
Joy: Likelihood(pfa.JoyLikelihood),
Sorrow: Likelihood(pfa.SorrowLikelihood),
Anger: Likelihood(pfa.AngerLikelihood),
Surprise: Likelihood(pfa.SurpriseLikelihood),
UnderExposed: Likelihood(pfa.UnderExposedLikelihood),
Blurred: Likelihood(pfa.BlurredLikelihood),
Headwear: Likelihood(pfa.HeadwearLikelihood),
},
}
populateFaceLandmarks(pfa.Landmarks, &fa.Face)
return fa
}
// An EntityAnnotation describes the results of a landmark, label, logo or text
// detection on an image.
type EntityAnnotation struct {
// ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
// For more details on KG please see:
// https://developers.google.com/knowledge-graph/
ID string
// Locale is the language code for the locale in which the entity textual
// description (next field) is expressed.
Locale string
// Description is the entity textual description, expressed in the language of Locale.
Description string
// Score is the overall score of the result. Range [0, 1].
Score float32
// Confidence is the accuracy of the entity detection in an image.
// For example, for an image containing the Eiffel Tower, this field represents
// the confidence that there is a tower in the query image. Range [0, 1].
Confidence float32
// Topicality is the relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of 'tower' to an image containing
// 'Eiffel Tower' is likely higher than an image containing a distant towering
// building, though the confidence that there is a tower may be the same.
// Range [0, 1].
Topicality float32
// BoundingPoly is the image region to which this entity belongs. Not filled currently
// for label detection. For text detection, BoundingPolys
// are produced for the entire text detected in an image region, followed by
// BoundingPolys for each word within the detected text.
BoundingPoly []image.Point
// Locations contains the location information for the detected entity.
// Multiple LatLng structs can be present since one location may indicate the
// location of the scene in the query image, and another the location of the
// place where the query image was taken. Location information is usually
// present for landmarks.
Locations []LatLng
// Properties are additional optional Property fields.
// For example a different kind of score or string that qualifies the entity.
Properties []Property
}
func entityAnnotationFromProto(e *pb.EntityAnnotation) *EntityAnnotation {
var locs []LatLng
for _, li := range e.Locations {
locs = append(locs, latLngFromProto(li.LatLng))
}
var props []Property
for _, p := range e.Properties {
props = append(props, propertyFromProto(p))
}
return &EntityAnnotation{
ID: e.Mid,
Locale: e.Locale,
Description: e.Description,
Score: e.Score,
Confidence: e.Confidence,
Topicality: e.Topicality,
BoundingPoly: boundingPolyFromProto(e.BoundingPoly),
Locations: locs,
Properties: props,
}
}
// SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
type SafeSearchAnnotation struct {
// Adult is the likelihood that the image contains adult content.
Adult Likelihood
// Spoof is the likelihood that an obvious modification was made to the
// image's canonical version to make it appear funny or offensive.
Spoof Likelihood
// Medical is the likelihood that this is a medical image.
Medical Likelihood
// Violence is the likelihood that this image represents violence.
Violence Likelihood
}
func safeSearchAnnotationFromProto(s *pb.SafeSearchAnnotation) *SafeSearchAnnotation {
if s == nil {
return nil
}
return &SafeSearchAnnotation{
Adult: Likelihood(s.Adult),
Spoof: Likelihood(s.Spoof),
Medical: Likelihood(s.Medical),
Violence: Likelihood(s.Violence),
}
}
// ImageProps describes properties of the image itself, like the dominant colors.
type ImageProps struct {
// DominantColors describes the dominant colors of the image.
DominantColors []*ColorInfo
}
func imagePropertiesFromProto(ip *pb.ImageProperties) *ImageProps {
if ip == nil || ip.DominantColors == nil {
return nil
}
var cinfos []*ColorInfo
for _, ci := range ip.DominantColors.Colors {
cinfos = append(cinfos, colorInfoFromProto(ci))
}
return &ImageProps{DominantColors: cinfos}
}