| // Copyright 2016 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package bigquery |
| |
| import ( |
| "io" |
| |
| bq "google.golang.org/api/bigquery/v2" |
| ) |
| |
| // A ReaderSource is a source for a load operation that gets |
| // data from an io.Reader. |
| type ReaderSource struct { |
| r io.Reader |
| FileConfig |
| } |
| |
| // NewReaderSource creates a ReaderSource from an io.Reader. You may |
| // optionally configure properties on the ReaderSource that describe the |
| // data being read, before passing it to Table.LoaderFrom. |
| func NewReaderSource(r io.Reader) *ReaderSource { |
| return &ReaderSource{r: r} |
| } |
| |
| func (r *ReaderSource) populateInsertJobConfForLoad(conf *insertJobConf) { |
| conf.media = r.r |
| r.FileConfig.populateLoadConfig(conf.job.Configuration.Load) |
| } |
| |
| // FileConfig contains configuration options that pertain to files, typically |
| // text files that require interpretation to be used as a BigQuery table. A |
| // file may live in Google Cloud Storage (see GCSReference), or it may be |
| // loaded into a table via the Table.LoaderFromReader. |
| type FileConfig struct { |
| // SourceFormat is the format of the GCS data to be read. |
| // Allowed values are: CSV, Avro, JSON, DatastoreBackup. The default is CSV. |
| SourceFormat DataFormat |
| |
| // FieldDelimiter is the separator for fields in a CSV file, used when |
| // reading or exporting data. The default is ",". |
| FieldDelimiter string |
| |
| // The number of rows at the top of a CSV file that BigQuery will skip when |
| // reading data. |
| SkipLeadingRows int64 |
| |
| // AllowJaggedRows causes missing trailing optional columns to be tolerated |
| // when reading CSV data. Missing values are treated as nulls. |
| AllowJaggedRows bool |
| |
| // AllowQuotedNewlines sets whether quoted data sections containing |
| // newlines are allowed when reading CSV data. |
| AllowQuotedNewlines bool |
| |
| // Indicates if we should automatically infer the options and |
| // schema for CSV and JSON sources. |
| AutoDetect bool |
| |
| // Encoding is the character encoding of data to be read. |
| Encoding Encoding |
| |
| // MaxBadRecords is the maximum number of bad records that will be ignored |
| // when reading data. |
| MaxBadRecords int64 |
| |
| // IgnoreUnknownValues causes values not matching the schema to be |
| // tolerated. Unknown values are ignored. For CSV this ignores extra values |
| // at the end of a line. For JSON this ignores named values that do not |
| // match any column name. If this field is not set, records containing |
| // unknown values are treated as bad records. The MaxBadRecords field can |
| // be used to customize how bad records are handled. |
| IgnoreUnknownValues bool |
| |
| // Schema describes the data. It is required when reading CSV or JSON data, |
| // unless the data is being loaded into a table that already exists. |
| Schema Schema |
| |
| // Quote is the value used to quote data sections in a CSV file. The |
| // default quotation character is the double quote ("), which is used if |
| // both Quote and ForceZeroQuote are unset. |
| // To specify that no character should be interpreted as a quotation |
| // character, set ForceZeroQuote to true. |
| // Only used when reading data. |
| Quote string |
| ForceZeroQuote bool |
| } |
| |
| // quote returns the CSV quote character, or nil if unset. |
| func (fc *FileConfig) quote() *string { |
| if fc.ForceZeroQuote { |
| quote := "" |
| return "e |
| } |
| if fc.Quote == "" { |
| return nil |
| } |
| return &fc.Quote |
| } |
| |
| func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { |
| conf.SkipLeadingRows = fc.SkipLeadingRows |
| conf.SourceFormat = string(fc.SourceFormat) |
| conf.Autodetect = fc.AutoDetect |
| conf.AllowJaggedRows = fc.AllowJaggedRows |
| conf.AllowQuotedNewlines = fc.AllowQuotedNewlines |
| conf.Encoding = string(fc.Encoding) |
| conf.FieldDelimiter = fc.FieldDelimiter |
| conf.IgnoreUnknownValues = fc.IgnoreUnknownValues |
| conf.MaxBadRecords = fc.MaxBadRecords |
| if fc.Schema != nil { |
| conf.Schema = fc.Schema.asTableSchema() |
| } |
| conf.Quote = fc.quote() |
| } |
| |
| func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) { |
| format := fc.SourceFormat |
| if format == "" { |
| // Format must be explicitly set for external data sources. |
| format = CSV |
| } |
| // TODO(jba): support AutoDetect. |
| conf.IgnoreUnknownValues = fc.IgnoreUnknownValues |
| conf.MaxBadRecords = fc.MaxBadRecords |
| conf.SourceFormat = string(format) |
| if fc.Schema != nil { |
| conf.Schema = fc.Schema.asTableSchema() |
| } |
| if format == CSV { |
| conf.CsvOptions = &bq.CsvOptions{ |
| AllowJaggedRows: fc.AllowJaggedRows, |
| AllowQuotedNewlines: fc.AllowQuotedNewlines, |
| Encoding: string(fc.Encoding), |
| FieldDelimiter: fc.FieldDelimiter, |
| SkipLeadingRows: fc.SkipLeadingRows, |
| Quote: fc.quote(), |
| } |
| } |
| } |
| |
| // DataFormat describes the format of BigQuery table data. |
| type DataFormat string |
| |
| // Constants describing the format of BigQuery table data. |
| const ( |
| CSV DataFormat = "CSV" |
| Avro DataFormat = "AVRO" |
| JSON DataFormat = "NEWLINE_DELIMITED_JSON" |
| DatastoreBackup DataFormat = "DATASTORE_BACKUP" |
| ) |
| |
| // Encoding specifies the character encoding of data to be loaded into BigQuery. |
| // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding |
| // for more details about how this is used. |
| type Encoding string |
| |
| const ( |
| UTF_8 Encoding = "UTF-8" |
| ISO_8859_1 Encoding = "ISO-8859-1" |
| ) |