blob: 468392541ad3ded481ecf287978e8e289c2830e8 [file] [log] [blame]
// Copyright 2016 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"context"
"cloud.google.com/go/internal/trace"
bq "google.golang.org/api/bigquery/v2"
)
// ExtractConfig holds the configuration for an extract job.
type ExtractConfig struct {
// Src is the table from which data will be extracted.
// Only one of Src or SrcModel should be specified.
Src *Table
// SrcModel is the ML model from which the data will be extracted.
// Only one of Src or SrcModel should be specified.
SrcModel *Model
// Dst is the destination into which the data will be extracted.
Dst *GCSReference
// DisableHeader disables the printing of a header row in exported data.
DisableHeader bool
// The labels associated with this job.
Labels map[string]string
// For Avro-based extracts, controls whether logical type annotations are generated.
//
// Example: With this enabled, writing a BigQuery TIMESTAMP column will result in
// an integer column annotated with the appropriate timestamp-micros/millis annotation
// in the resulting Avro files.
UseAvroLogicalTypes bool
}
func (e *ExtractConfig) toBQ() *bq.JobConfiguration {
var printHeader *bool
if e.DisableHeader {
f := false
printHeader = &f
}
cfg := &bq.JobConfiguration{
Labels: e.Labels,
Extract: &bq.JobConfigurationExtract{
DestinationUris: append([]string{}, e.Dst.URIs...),
Compression: string(e.Dst.Compression),
DestinationFormat: string(e.Dst.DestinationFormat),
FieldDelimiter: e.Dst.FieldDelimiter,
PrintHeader: printHeader,
UseAvroLogicalTypes: e.UseAvroLogicalTypes,
},
}
if e.Src != nil {
cfg.Extract.SourceTable = e.Src.toBQ()
}
if e.SrcModel != nil {
cfg.Extract.SourceModel = e.SrcModel.toBQ()
}
return cfg
}
func bqToExtractConfig(q *bq.JobConfiguration, c *Client) *ExtractConfig {
qe := q.Extract
return &ExtractConfig{
Labels: q.Labels,
Dst: &GCSReference{
URIs: qe.DestinationUris,
Compression: Compression(qe.Compression),
DestinationFormat: DataFormat(qe.DestinationFormat),
FileConfig: FileConfig{
CSVOptions: CSVOptions{
FieldDelimiter: qe.FieldDelimiter,
},
},
},
DisableHeader: qe.PrintHeader != nil && !*qe.PrintHeader,
Src: bqToTable(qe.SourceTable, c),
SrcModel: bqToModel(qe.SourceModel, c),
UseAvroLogicalTypes: qe.UseAvroLogicalTypes,
}
}
// An Extractor extracts data from a BigQuery table into Google Cloud Storage.
type Extractor struct {
JobIDConfig
ExtractConfig
c *Client
}
// ExtractorTo returns an Extractor which can be used to extract data from a
// BigQuery table into Google Cloud Storage.
// The returned Extractor may optionally be further configured before its Run method is called.
func (t *Table) ExtractorTo(dst *GCSReference) *Extractor {
return &Extractor{
c: t.c,
ExtractConfig: ExtractConfig{
Src: t,
Dst: dst,
},
}
}
// ExtractorTo returns an Extractor which can be persist a BigQuery Model into
// Google Cloud Storage.
// The returned Extractor may be further configured before its Run method is called.
func (m *Model) ExtractorTo(dst *GCSReference) *Extractor {
return &Extractor{
c: m.c,
ExtractConfig: ExtractConfig{
SrcModel: m,
Dst: dst,
},
}
}
// Run initiates an extract job.
func (e *Extractor) Run(ctx context.Context) (j *Job, err error) {
ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Extractor.Run")
defer func() { trace.EndSpan(ctx, err) }()
return e.c.insertJob(ctx, e.newJob(), nil)
}
func (e *Extractor) newJob() *bq.Job {
return &bq.Job{
JobReference: e.JobIDConfig.createJobRef(e.c),
Configuration: e.ExtractConfig.toBQ(),
}
}