blob: 9fa0190685bfcb05027c1c6d02db274e294a8257 [file] [log] [blame]
// Copyright 2016 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
bq ""
// LoadConfig holds the configuration for a load job.
type LoadConfig struct {
// Src is the source from which data will be loaded.
Src LoadSource
// Dst is the table into which the data will be loaded.
Dst *Table
// CreateDisposition specifies the circumstances under which the destination table will be created.
// The default is CreateIfNeeded.
CreateDisposition TableCreateDisposition
// WriteDisposition specifies how existing data in the destination table is treated.
// The default is WriteAppend.
WriteDisposition TableWriteDisposition
// The labels associated with this job.
Labels map[string]string
// If non-nil, the destination table is partitioned by time.
TimePartitioning *TimePartitioning
// If non-nil, the destination table is partitioned by integer range.
RangePartitioning *RangePartitioning
// Clustering specifies the data clustering configuration for the destination table.
Clustering *Clustering
// Custom encryption configuration (e.g., Cloud KMS keys).
DestinationEncryptionConfig *EncryptionConfig
// Allows the schema of the destination table to be updated as a side effect of
// the load job.
SchemaUpdateOptions []string
// For Avro-based loads, controls whether logical type annotations are used.
// See
// for additional information.
UseAvroLogicalTypes bool
// For ingestion from datastore backups, ProjectionFields governs which fields
// are projected from the backup. The default behavior projects all fields.
ProjectionFields []string
func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
config := &bq.JobConfiguration{
Labels: l.Labels,
Load: &bq.JobConfigurationLoad{
CreateDisposition: string(l.CreateDisposition),
WriteDisposition: string(l.WriteDisposition),
DestinationTable: l.Dst.toBQ(),
TimePartitioning: l.TimePartitioning.toBQ(),
RangePartitioning: l.RangePartitioning.toBQ(),
Clustering: l.Clustering.toBQ(),
DestinationEncryptionConfiguration: l.DestinationEncryptionConfig.toBQ(),
SchemaUpdateOptions: l.SchemaUpdateOptions,
UseAvroLogicalTypes: l.UseAvroLogicalTypes,
ProjectionFields: l.ProjectionFields,
media := l.Src.populateLoadConfig(config.Load)
return config, media
func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
lc := &LoadConfig{
Labels: q.Labels,
CreateDisposition: TableCreateDisposition(q.Load.CreateDisposition),
WriteDisposition: TableWriteDisposition(q.Load.WriteDisposition),
Dst: bqToTable(q.Load.DestinationTable, c),
TimePartitioning: bqToTimePartitioning(q.Load.TimePartitioning),
RangePartitioning: bqToRangePartitioning(q.Load.RangePartitioning),
Clustering: bqToClustering(q.Load.Clustering),
DestinationEncryptionConfig: bqToEncryptionConfig(q.Load.DestinationEncryptionConfiguration),
SchemaUpdateOptions: q.Load.SchemaUpdateOptions,
UseAvroLogicalTypes: q.Load.UseAvroLogicalTypes,
ProjectionFields: q.Load.ProjectionFields,
var fc *FileConfig
if len(q.Load.SourceUris) == 0 {
s := NewReaderSource(nil)
fc = &s.FileConfig
lc.Src = s
} else {
s := NewGCSReference(q.Load.SourceUris...)
fc = &s.FileConfig
lc.Src = s
bqPopulateFileConfig(q.Load, fc)
return lc
// A Loader loads data from Google Cloud Storage into a BigQuery table.
type Loader struct {
c *Client
// A LoadSource represents a source of data that can be loaded into
// a BigQuery table.
// This package defines two LoadSources: GCSReference, for Google Cloud Storage
// objects, and ReaderSource, for data read from an io.Reader.
type LoadSource interface {
// populates config, returns media
populateLoadConfig(*bq.JobConfigurationLoad) io.Reader
// LoaderFrom returns a Loader which can be used to load data into a BigQuery table.
// The returned Loader may optionally be further configured before its Run method is called.
// See GCSReference and ReaderSource for additional configuration options that
// affect loading.
func (t *Table) LoaderFrom(src LoadSource) *Loader {
return &Loader{
c: t.c,
LoadConfig: LoadConfig{
Src: src,
Dst: t,
// Run initiates a load job.
func (l *Loader) Run(ctx context.Context) (j *Job, err error) {
ctx = trace.StartSpan(ctx, "")
defer func() { trace.EndSpan(ctx, err) }()
job, media := l.newJob()
return l.c.insertJob(ctx, job, media)
func (l *Loader) newJob() (*bq.Job, io.Reader) {
config, media := l.LoadConfig.toBQ()
return &bq.Job{
JobReference: l.JobIDConfig.createJobRef(l.c),
Configuration: config,
}, media