mirror of
https://github.com/getredash/redash.git
synced 2025-12-25 01:03:20 -05:00
Excel & CSV query runner (#2478)
* Excel query runner * Param handling for read_excel * CSV query runner * Fix wrong module name * Use yaml as query language * Use yaml as query language for CSV * Added icon and required modules * Local address filtering * Fix syntax error
This commit is contained in:
BIN
client/app/assets/images/db-logos/excel.png
Normal file
BIN
client/app/assets/images/db-logos/excel.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.6 KiB |
100
redash/query_runner/csv.py
Normal file
100
redash/query_runner/csv.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import logging
|
||||
import yaml
|
||||
import requests
|
||||
import io
|
||||
|
||||
from redash import settings
|
||||
from redash.query_runner import *
|
||||
from redash.utils import json_dumps
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
enabled = True
|
||||
except ImportError:
|
||||
enabled = False
|
||||
|
||||
|
||||
class CSV(BaseQueryRunner):
|
||||
should_annotate_query = False
|
||||
|
||||
@classmethod
|
||||
def name(cls):
|
||||
return "CSV"
|
||||
|
||||
@classmethod
|
||||
def enabled(cls):
|
||||
return enabled
|
||||
|
||||
@classmethod
|
||||
def configuration_schema(cls):
|
||||
return {
|
||||
'type': 'object',
|
||||
'properties': {},
|
||||
}
|
||||
|
||||
def __init__(self, configuration):
|
||||
super(CSV, self).__init__(configuration)
|
||||
self.syntax = "yaml"
|
||||
|
||||
def test_connection(self):
|
||||
pass
|
||||
|
||||
def run_query(self, query, user):
|
||||
path = ""
|
||||
ua = ""
|
||||
args = {}
|
||||
try:
|
||||
args = yaml.safe_load(query)
|
||||
path = args['url']
|
||||
args.pop('url', None)
|
||||
ua = args['user-agent']
|
||||
args.pop('user-agent', None)
|
||||
|
||||
if is_private_address(path) and settings.ENFORCE_PRIVATE_ADDRESS_BLOCK:
|
||||
raise Exception("Can't query private addresses.")
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
response = requests.get(url=path, headers={"User-agent": ua})
|
||||
workbook = pd.read_csv(io.BytesIO(response.content),sep=",", **args)
|
||||
|
||||
df = workbook.copy()
|
||||
data = {'columns': [], 'rows': []}
|
||||
conversions = [
|
||||
{'pandas_type': np.integer, 'redash_type': 'integer',},
|
||||
{'pandas_type': np.inexact, 'redash_type': 'float',},
|
||||
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
|
||||
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
|
||||
{'pandas_type': np.object, 'redash_type': 'string'}
|
||||
]
|
||||
labels = []
|
||||
for dtype, label in zip(df.dtypes, df.columns):
|
||||
for conversion in conversions:
|
||||
if issubclass(dtype.type, conversion['pandas_type']):
|
||||
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
|
||||
labels.append(label)
|
||||
func = conversion.get('to_redash')
|
||||
if func:
|
||||
df[label] = df[label].apply(func)
|
||||
break
|
||||
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')
|
||||
|
||||
json_data = json_dumps(data)
|
||||
error = None
|
||||
except KeyboardInterrupt:
|
||||
error = "Query cancelled by user."
|
||||
json_data = None
|
||||
except Exception as e:
|
||||
error = "Error reading {0}. {1}".format(path, str(e))
|
||||
json_data = None
|
||||
|
||||
return json_data, error
|
||||
|
||||
def get_schema(self):
|
||||
raise NotSupported()
|
||||
|
||||
register(CSV)
|
||||
96
redash/query_runner/excel.py
Normal file
96
redash/query_runner/excel.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import logging
|
||||
import yaml
|
||||
import requests
|
||||
|
||||
from redash import settings
|
||||
from redash.query_runner import *
|
||||
from redash.utils import json_dumps
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
import xlrd
|
||||
import openpyxl
|
||||
import numpy as np
|
||||
enabled = True
|
||||
except ImportError:
|
||||
enabled = False
|
||||
|
||||
class Excel(BaseQueryRunner):
|
||||
should_annotate_query = False
|
||||
|
||||
@classmethod
|
||||
def enabled(cls):
|
||||
return enabled
|
||||
|
||||
@classmethod
|
||||
def configuration_schema(cls):
|
||||
return {
|
||||
'type': 'object',
|
||||
'properties': {},
|
||||
}
|
||||
|
||||
def __init__(self, configuration):
|
||||
super(Excel, self).__init__(configuration)
|
||||
self.syntax = "yaml"
|
||||
|
||||
def test_connection(self):
|
||||
pass
|
||||
|
||||
def run_query(self, query, user):
|
||||
path = ""
|
||||
ua = ""
|
||||
args = {}
|
||||
try:
|
||||
args = yaml.safe_load(query)
|
||||
path = args['url']
|
||||
args.pop('url', None)
|
||||
ua = args['user-agent']
|
||||
args.pop('user-agent', None)
|
||||
|
||||
if is_private_address(path) and settings.ENFORCE_PRIVATE_ADDRESS_BLOCK:
|
||||
raise Exception("Can't query private addresses.")
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
response = requests.get(url=path, headers={"User-agent": ua})
|
||||
workbook = pd.read_excel(response.content, **args)
|
||||
|
||||
df = workbook.copy()
|
||||
data = {'columns': [], 'rows': []}
|
||||
conversions = [
|
||||
{'pandas_type': np.integer, 'redash_type': 'integer',},
|
||||
{'pandas_type': np.inexact, 'redash_type': 'float',},
|
||||
{'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')},
|
||||
{'pandas_type': np.bool_, 'redash_type': 'boolean'},
|
||||
{'pandas_type': np.object, 'redash_type': 'string'}
|
||||
]
|
||||
labels = []
|
||||
for dtype, label in zip(df.dtypes, df.columns):
|
||||
for conversion in conversions:
|
||||
if issubclass(dtype.type, conversion['pandas_type']):
|
||||
data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']})
|
||||
labels.append(label)
|
||||
func = conversion.get('to_redash')
|
||||
if func:
|
||||
df[label] = df[label].apply(func)
|
||||
break
|
||||
data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records')
|
||||
|
||||
json_data = json_dumps(data)
|
||||
error = None
|
||||
except KeyboardInterrupt:
|
||||
error = "Query cancelled by user."
|
||||
json_data = None
|
||||
except Exception as e:
|
||||
error = "Error reading {0}. {1}".format(path, str(e))
|
||||
json_data = None
|
||||
|
||||
return json_data, error
|
||||
|
||||
def get_schema(self):
|
||||
raise NotSupported()
|
||||
|
||||
register(Excel)
|
||||
@@ -380,7 +380,9 @@ default_query_runners = [
|
||||
"redash.query_runner.cloudwatch",
|
||||
"redash.query_runner.cloudwatch_insights",
|
||||
"redash.query_runner.corporate_memory",
|
||||
"redash.query_runner.sparql_endpoint"
|
||||
"redash.query_runner.sparql_endpoint",
|
||||
"redash.query_runner.excel",
|
||||
"redash.query_runner.csv"
|
||||
]
|
||||
|
||||
enabled_query_runners = array_from_string(
|
||||
|
||||
@@ -37,3 +37,5 @@ python-rapidjson==0.8.0
|
||||
pyodbc==4.0.28
|
||||
trino~=0.305
|
||||
cmem-cmempy==21.2.3
|
||||
xlrd==2.0.1
|
||||
openpyxl==3.0.7
|
||||
|
||||
Reference in New Issue
Block a user