vision/annotations.go - gocloud - Git at Google

 // Copyright 2016 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package vision

 import (
 	"image"

 	pb "google.golang.org/genproto/googleapis/cloud/vision/v1"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 )

 // Annotations contains all the annotations performed by the API on a single image.
 // A nil field indicates either that the corresponding feature was not requested,
 // or that annotation failed for that feature.
 type Annotations struct {
 	// Faces holds the results of face detection.
 	Faces []*FaceAnnotation
 	// Landmarks holds the results of landmark detection.
 	Landmarks []*EntityAnnotation
 	// Logos holds the results of logo detection.
 	Logos []*EntityAnnotation
 	// Labels holds the results of label detection.
 	Labels []*EntityAnnotation
 	// Texts holds the results of text detection.
 	Texts []*EntityAnnotation
 	// SafeSearch holds the results of safe-search detection.
 	SafeSearch *SafeSearchAnnotation
 	// ImageProps contains properties of the annotated image.
 	ImageProps *ImageProps

 	// If non-nil, then one or more of the attempted annotations failed.
 	// Non-nil annotations are guaranteed to be correct, even if Error is
 	// non-nil.
 	Error error
 }

 func annotationsFromProto(res *pb.AnnotateImageResponse) *Annotations {
 	as := &Annotations{}
 	for _, a := range res.FaceAnnotations {
 		as.Faces = append(as.Faces, faceAnnotationFromProto(a))
 	}
 	for _, a := range res.LandmarkAnnotations {
 		as.Landmarks = append(as.Landmarks, entityAnnotationFromProto(a))
 	}
 	for _, a := range res.LogoAnnotations {
 		as.Logos = append(as.Logos, entityAnnotationFromProto(a))
 	}
 	for _, a := range res.LabelAnnotations {
 		as.Labels = append(as.Labels, entityAnnotationFromProto(a))
 	}
 	for _, a := range res.TextAnnotations {
 		as.Texts = append(as.Texts, entityAnnotationFromProto(a))
 	}
 	as.SafeSearch = safeSearchAnnotationFromProto(res.SafeSearchAnnotation)
 	as.ImageProps = imagePropertiesFromProto(res.ImagePropertiesAnnotation)
 	if res.Error != nil {
 		// res.Error is a google.rpc.Status. Convert to a Go error. Use a gRPC
 		// error because it preserves the code as a separate field.
 		// TODO(jba): preserve the details field.
 		as.Error = grpc.Errorf(codes.Code(res.Error.Code), "%s", res.Error.Message)
 	}
 	return as
 }

 // A FaceAnnotation describes the results of face detection on an image.
 type FaceAnnotation struct {
 	// BoundingPoly is the bounding polygon around the face. The coordinates of
 	// the bounding box are in the original image's scale, as returned in
 	// ImageParams. The bounding box is computed to "frame" the face in
 	// accordance with human expectations. It is based on the landmarker
 	// results. Note that one or more x and/or y coordinates may not be
 	// generated in the BoundingPoly (the polygon will be unbounded) if only a
 	// partial face appears in the image to be annotated.
 	BoundingPoly []image.Point

 	// FDBoundingPoly is tighter than BoundingPoly, and
 	// encloses only the skin part of the face. Typically, it is used to
 	// eliminate the face from any image analysis that detects the "amount of
 	// skin" visible in an image. It is not based on the landmarker results, only
 	// on the initial face detection, hence the fd (face detection) prefix.
 	FDBoundingPoly []image.Point

 	// Landmarks are detected face landmarks.
 	Face FaceLandmarks

 	// RollAngle indicates the amount of clockwise/anti-clockwise rotation of
 	// the face relative to the image vertical, about the axis perpendicular to
 	// the face. Range [-180,180].
 	RollAngle float32

 	// PanAngle is the yaw angle: the leftward/rightward angle that the face is
 	// pointing, relative to the vertical plane perpendicular to the image. Range
 	// [-180,180].
 	PanAngle float32

 	// TiltAngle is the pitch angle: the upwards/downwards angle that the face is
 	// pointing relative to the image's horizontal plane. Range [-180,180].
 	TiltAngle float32

 	// DetectionConfidence is the detection confidence. The range is [0, 1].
 	DetectionConfidence float32

 	// LandmarkingConfidence is the face landmarking confidence. The range is [0, 1].
 	LandmarkingConfidence float32

 	// Likelihoods expresses the likelihood of various aspects of the face.
 	Likelihoods *FaceLikelihoods
 }

 func faceAnnotationFromProto(pfa *pb.FaceAnnotation) *FaceAnnotation {
 	fa := &FaceAnnotation{
 		BoundingPoly:          boundingPolyFromProto(pfa.BoundingPoly),
 		FDBoundingPoly:        boundingPolyFromProto(pfa.FdBoundingPoly),
 		RollAngle:             pfa.RollAngle,
 		PanAngle:              pfa.PanAngle,
 		TiltAngle:             pfa.TiltAngle,
 		DetectionConfidence:   pfa.DetectionConfidence,
 		LandmarkingConfidence: pfa.LandmarkingConfidence,
 		Likelihoods: &FaceLikelihoods{
 			Joy:          Likelihood(pfa.JoyLikelihood),
 			Sorrow:       Likelihood(pfa.SorrowLikelihood),
 			Anger:        Likelihood(pfa.AngerLikelihood),
 			Surprise:     Likelihood(pfa.SurpriseLikelihood),
 			UnderExposed: Likelihood(pfa.UnderExposedLikelihood),
 			Blurred:      Likelihood(pfa.BlurredLikelihood),
 			Headwear:     Likelihood(pfa.HeadwearLikelihood),
 		},
 	}
 	populateFaceLandmarks(pfa.Landmarks, &fa.Face)
 	return fa
 }

 // An EntityAnnotation describes the results of a landmark, label, logo or text
 // detection on an image.
 type EntityAnnotation struct {
 	// ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
 	// For more details on KG please see:
 	// https://developers.google.com/knowledge-graph/
 	ID string

 	// Locale is the language code for the locale in which the entity textual
 	// description (next field) is expressed.
 	Locale string

 	// Description is the entity textual description, expressed in the language of Locale.
 	Description string

 	// Score is the overall score of the result. Range [0, 1].
 	Score float32

 	// Confidence is the accuracy of the entity detection in an image.
 	// For example, for an image containing the Eiffel Tower, this field represents
 	// the confidence that there is a tower in the query image. Range [0, 1].
 	Confidence float32

 	// Topicality is the relevancy of the ICA (Image Content Annotation) label to the
 	// image. For example, the relevancy of 'tower' to an image containing
 	// 'Eiffel Tower' is likely higher than an image containing a distant towering
 	// building, though the confidence that there is a tower may be the same.
 	// Range [0, 1].
 	Topicality float32

 	// BoundingPoly is the image region to which this entity belongs. Not filled currently
 	// for label detection. For text detection, BoundingPolys
 	// are produced for the entire text detected in an image region, followed by
 	// BoundingPolys for each word within the detected text.
 	BoundingPoly []image.Point

 	// Locations contains the location information for the detected entity.
 	// Multiple LatLng structs can be present since one location may indicate the
 	// location of the scene in the query image, and another the location of the
 	// place where the query image was taken. Location information is usually
 	// present for landmarks.
 	Locations []LatLng

 	// Properties are additional optional Property fields.
 	// For example a different kind of score or string that qualifies the entity.
 	Properties []Property
 }

 func entityAnnotationFromProto(e *pb.EntityAnnotation) *EntityAnnotation {
 	var locs []LatLng
 	for _, li := range e.Locations {
 		locs = append(locs, latLngFromProto(li.LatLng))
 	}
 	var props []Property
 	for _, p := range e.Properties {
 		props = append(props, propertyFromProto(p))
 	}
 	return &EntityAnnotation{
 		ID:           e.Mid,
 		Locale:       e.Locale,
 		Description:  e.Description,
 		Score:        e.Score,
 		Confidence:   e.Confidence,
 		Topicality:   e.Topicality,
 		BoundingPoly: boundingPolyFromProto(e.BoundingPoly),
 		Locations:    locs,
 		Properties:   props,
 	}
 }

 // SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
 type SafeSearchAnnotation struct {
 	// Adult is the likelihood that the image contains adult content.
 	Adult Likelihood

 	// Spoof is the likelihood that an obvious modification was made to the
 	// image's canonical version to make it appear funny or offensive.
 	Spoof Likelihood

 	// Medical is the likelihood that this is a medical image.
 	Medical Likelihood

 	// Violence is the likelihood that this image represents violence.
 	Violence Likelihood
 }

 func safeSearchAnnotationFromProto(s *pb.SafeSearchAnnotation) *SafeSearchAnnotation {
 	if s == nil {
 		return nil
 	}
 	return &SafeSearchAnnotation{
 		Adult:    Likelihood(s.Adult),
 		Spoof:    Likelihood(s.Spoof),
 		Medical:  Likelihood(s.Medical),
 		Violence: Likelihood(s.Violence),
 	}
 }

 // ImageProps describes properties of the image itself, like the dominant colors.
 type ImageProps struct {
 	// DominantColors describes the dominant colors of the image.
 	DominantColors []*ColorInfo
 }

 func imagePropertiesFromProto(ip *pb.ImageProperties) *ImageProps {
 	if ip == nil || ip.DominantColors == nil {
 		return nil
 	}
 	var cinfos []*ColorInfo
 	for _, ci := range ip.DominantColors.Colors {
 		cinfos = append(cinfos, colorInfoFromProto(ci))
 	}
 	return &ImageProps{DominantColors: cinfos}
 }
	// Copyright 2016 Google Inc. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package vision

	import (
	"image"

	pb "google.golang.org/genproto/googleapis/cloud/vision/v1"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	)

	// Annotations contains all the annotations performed by the API on a single image.
	// A nil field indicates either that the corresponding feature was not requested,
	// or that annotation failed for that feature.
	type Annotations struct {
	// Faces holds the results of face detection.
	Faces []*FaceAnnotation
	// Landmarks holds the results of landmark detection.
	Landmarks []*EntityAnnotation
	// Logos holds the results of logo detection.
	Logos []*EntityAnnotation
	// Labels holds the results of label detection.
	Labels []*EntityAnnotation
	// Texts holds the results of text detection.
	Texts []*EntityAnnotation
	// SafeSearch holds the results of safe-search detection.
	SafeSearch *SafeSearchAnnotation
	// ImageProps contains properties of the annotated image.
	ImageProps *ImageProps

	// If non-nil, then one or more of the attempted annotations failed.
	// Non-nil annotations are guaranteed to be correct, even if Error is
	// non-nil.
	Error error
	}

	func annotationsFromProto(res pb.AnnotateImageResponse) Annotations {
	as := &Annotations{}
	for _, a := range res.FaceAnnotations {
	as.Faces = append(as.Faces, faceAnnotationFromProto(a))
	}
	for _, a := range res.LandmarkAnnotations {
	as.Landmarks = append(as.Landmarks, entityAnnotationFromProto(a))
	}
	for _, a := range res.LogoAnnotations {
	as.Logos = append(as.Logos, entityAnnotationFromProto(a))
	}
	for _, a := range res.LabelAnnotations {
	as.Labels = append(as.Labels, entityAnnotationFromProto(a))
	}
	for _, a := range res.TextAnnotations {
	as.Texts = append(as.Texts, entityAnnotationFromProto(a))
	}
	as.SafeSearch = safeSearchAnnotationFromProto(res.SafeSearchAnnotation)
	as.ImageProps = imagePropertiesFromProto(res.ImagePropertiesAnnotation)
	if res.Error != nil {
	// res.Error is a google.rpc.Status. Convert to a Go error. Use a gRPC
	// error because it preserves the code as a separate field.
	// TODO(jba): preserve the details field.
	as.Error = grpc.Errorf(codes.Code(res.Error.Code), "%s", res.Error.Message)
	}
	return as
	}

	// A FaceAnnotation describes the results of face detection on an image.
	type FaceAnnotation struct {
	// BoundingPoly is the bounding polygon around the face. The coordinates of
	// the bounding box are in the original image's scale, as returned in
	// ImageParams. The bounding box is computed to "frame" the face in
	// accordance with human expectations. It is based on the landmarker
	// results. Note that one or more x and/or y coordinates may not be
	// generated in the BoundingPoly (the polygon will be unbounded) if only a
	// partial face appears in the image to be annotated.
	BoundingPoly []image.Point

	// FDBoundingPoly is tighter than BoundingPoly, and
	// encloses only the skin part of the face. Typically, it is used to
	// eliminate the face from any image analysis that detects the "amount of
	// skin" visible in an image. It is not based on the landmarker results, only
	// on the initial face detection, hence the fd (face detection) prefix.
	FDBoundingPoly []image.Point

	// Landmarks are detected face landmarks.
	Face FaceLandmarks

	// RollAngle indicates the amount of clockwise/anti-clockwise rotation of
	// the face relative to the image vertical, about the axis perpendicular to
	// the face. Range [-180,180].
	RollAngle float32

	// PanAngle is the yaw angle: the leftward/rightward angle that the face is
	// pointing, relative to the vertical plane perpendicular to the image. Range
	// [-180,180].
	PanAngle float32

	// TiltAngle is the pitch angle: the upwards/downwards angle that the face is
	// pointing relative to the image's horizontal plane. Range [-180,180].
	TiltAngle float32

	// DetectionConfidence is the detection confidence. The range is [0, 1].
	DetectionConfidence float32

	// LandmarkingConfidence is the face landmarking confidence. The range is [0, 1].
	LandmarkingConfidence float32

	// Likelihoods expresses the likelihood of various aspects of the face.
	Likelihoods *FaceLikelihoods
	}

	func faceAnnotationFromProto(pfa pb.FaceAnnotation) FaceAnnotation {
	fa := &FaceAnnotation{
	BoundingPoly: boundingPolyFromProto(pfa.BoundingPoly),
	FDBoundingPoly: boundingPolyFromProto(pfa.FdBoundingPoly),
	RollAngle: pfa.RollAngle,
	PanAngle: pfa.PanAngle,
	TiltAngle: pfa.TiltAngle,
	DetectionConfidence: pfa.DetectionConfidence,
	LandmarkingConfidence: pfa.LandmarkingConfidence,
	Likelihoods: &FaceLikelihoods{
	Joy: Likelihood(pfa.JoyLikelihood),
	Sorrow: Likelihood(pfa.SorrowLikelihood),
	Anger: Likelihood(pfa.AngerLikelihood),
	Surprise: Likelihood(pfa.SurpriseLikelihood),
	UnderExposed: Likelihood(pfa.UnderExposedLikelihood),
	Blurred: Likelihood(pfa.BlurredLikelihood),
	Headwear: Likelihood(pfa.HeadwearLikelihood),
	},
	}
	populateFaceLandmarks(pfa.Landmarks, &fa.Face)
	return fa
	}

	// An EntityAnnotation describes the results of a landmark, label, logo or text
	// detection on an image.
	type EntityAnnotation struct {
	// ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
	// For more details on KG please see:
	// https://developers.google.com/knowledge-graph/
	ID string

	// Locale is the language code for the locale in which the entity textual
	// description (next field) is expressed.
	Locale string

	// Description is the entity textual description, expressed in the language of Locale.
	Description string

	// Score is the overall score of the result. Range [0, 1].
	Score float32

	// Confidence is the accuracy of the entity detection in an image.
	// For example, for an image containing the Eiffel Tower, this field represents
	// the confidence that there is a tower in the query image. Range [0, 1].
	Confidence float32

	// Topicality is the relevancy of the ICA (Image Content Annotation) label to the
	// image. For example, the relevancy of 'tower' to an image containing
	// 'Eiffel Tower' is likely higher than an image containing a distant towering
	// building, though the confidence that there is a tower may be the same.
	// Range [0, 1].
	Topicality float32

	// BoundingPoly is the image region to which this entity belongs. Not filled currently
	// for label detection. For text detection, BoundingPolys
	// are produced for the entire text detected in an image region, followed by
	// BoundingPolys for each word within the detected text.
	BoundingPoly []image.Point

	// Locations contains the location information for the detected entity.
	// Multiple LatLng structs can be present since one location may indicate the
	// location of the scene in the query image, and another the location of the
	// place where the query image was taken. Location information is usually
	// present for landmarks.
	Locations []LatLng

	// Properties are additional optional Property fields.
	// For example a different kind of score or string that qualifies the entity.
	Properties []Property
	}

	func entityAnnotationFromProto(e pb.EntityAnnotation) EntityAnnotation {
	var locs []LatLng
	for _, li := range e.Locations {
	locs = append(locs, latLngFromProto(li.LatLng))
	}
	var props []Property
	for _, p := range e.Properties {
	props = append(props, propertyFromProto(p))
	}
	return &EntityAnnotation{
	ID: e.Mid,
	Locale: e.Locale,
	Description: e.Description,
	Score: e.Score,
	Confidence: e.Confidence,
	Topicality: e.Topicality,
	BoundingPoly: boundingPolyFromProto(e.BoundingPoly),
	Locations: locs,
	Properties: props,
	}
	}

	// SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
	type SafeSearchAnnotation struct {
	// Adult is the likelihood that the image contains adult content.
	Adult Likelihood

	// Spoof is the likelihood that an obvious modification was made to the
	// image's canonical version to make it appear funny or offensive.
	Spoof Likelihood

	// Medical is the likelihood that this is a medical image.
	Medical Likelihood

	// Violence is the likelihood that this image represents violence.
	Violence Likelihood
	}

	func safeSearchAnnotationFromProto(s pb.SafeSearchAnnotation) SafeSearchAnnotation {
	if s == nil {
	return nil
	}
	return &SafeSearchAnnotation{
	Adult: Likelihood(s.Adult),
	Spoof: Likelihood(s.Spoof),
	Medical: Likelihood(s.Medical),
	Violence: Likelihood(s.Violence),
	}
	}

	// ImageProps describes properties of the image itself, like the dominant colors.
	type ImageProps struct {
	// DominantColors describes the dominant colors of the image.
	DominantColors []*ColorInfo
	}

	func imagePropertiesFromProto(ip pb.ImageProperties) ImageProps {
	if ip == nil \|\| ip.DominantColors == nil {
	return nil
	}
	var cinfos []*ColorInfo
	for _, ci := range ip.DominantColors.Colors {
	cinfos = append(cinfos, colorInfoFromProto(ci))
	}
	return &ImageProps{DominantColors: cinfos}
	}