If maximumTotalMBytesProcessed is set, do dryrun and check data size.

This commit is contained in:
Ryota Arai
2015-12-16 18:24:32 +09:00
parent d958817b10
commit 81386bcf37

View File

@@ -95,6 +95,10 @@ class BigQuery(BaseQueryRunner):
'jsonKeyFile': {
"type": "string",
'title': 'JSON Key File'
},
'maximumTotalMBytesProcessed': {
"type": "number",
'title': 'Maximum Total MByte Processed'
}
},
'required': ['jsonKeyFile', 'projectId'],
@@ -121,9 +125,23 @@ class BigQuery(BaseQueryRunner):
return self.configuration["projectId"]
def run_query(self, query):
bigquery_service = self._get_bigquery_service()
logger.debug("BigQuery got query: %s", query)
bigquery_service = self._get_bigquery_service()
jobs = bigquery_service.jobs()
project_id = self._get_project_id()
if "maximumTotalMBytesProcessed" in self.configuration:
maximumMB = self.configuration["maximumTotalMBytesProcessed"]
job_data = {
"query": query,
"dryRun": True,
}
response = jobs.query(projectId=project_id, body=job_data).execute()
processedMB = int(response["totalBytesProcessed"]) / 1000.0 / 1000.0
if maximumMB < processedMB:
return None, "Too large data will be processed (%d MBytes; maximum: %d MBytes)" % (processedMB, maximumMB)
job_data = {
"configuration": {
"query": {
@@ -132,10 +150,6 @@ class BigQuery(BaseQueryRunner):
}
}
logger.debug("BigQuery got query: %s", query)
project_id = self._get_project_id()
try:
insert_response = jobs.insert(projectId=project_id, body=job_data).execute()
current_row = 0