* secure the jinja environment * format * Update comment * remove extra test * remove lambda * Update * Raise an error on undefined variables * remove unused import * add unit tests to missing context vars and adjust error message --------- Co-authored-by: brianjlai <brian.lai@airbyte.io> Co-authored-by: Brian Lai <51336873+brianjlai@users.noreply.github.com>
104 lines
4.7 KiB
Python
104 lines
4.7 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
import ast
|
|
from typing import Optional
|
|
|
|
from airbyte_cdk.sources.declarative.interpolation.filters import filters
|
|
from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
|
|
from airbyte_cdk.sources.declarative.interpolation.macros import macros
|
|
from airbyte_cdk.sources.declarative.types import Config
|
|
from jinja2 import meta
|
|
from jinja2.exceptions import UndefinedError
|
|
from jinja2.sandbox import Environment
|
|
|
|
|
|
class JinjaInterpolation(Interpolation):
|
|
"""
|
|
Interpolation strategy using the Jinja2 template engine.
|
|
|
|
If the input string is a raw string, the interpolated string will be the same.
|
|
`eval("hello world") -> "hello world"`
|
|
|
|
The engine will evaluate the content passed within {{}}, interpolating the keys from the config and context-specific arguments.
|
|
`eval("hello {{ name }}", name="airbyte") -> "hello airbyte")`
|
|
`eval("hello {{ config.name }}", config={"name": "airbyte"}) -> "hello airbyte")`
|
|
|
|
In additional to passing additional values through the kwargs argument, macros can be called from within the string interpolation.
|
|
For example,
|
|
"{{ max(2, 3) }}" will return 3
|
|
|
|
Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
|
|
"""
|
|
|
|
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
|
ALIASES = {
|
|
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
|
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
|
}
|
|
|
|
# These extensions are not installed so they're not currently a problem,
|
|
# but we're still explicitely removing them from the jinja context.
|
|
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
|
RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
|
|
|
# By default, these Python builtin functions are available in the Jinja context.
|
|
# We explicitely remove them because of the potential security risk.
|
|
# Please add a unit test to test_jinja.py when adding a restriction.
|
|
RESTRICTED_BUILTIN_FUNCTIONS = ["range"] # The range function can cause very expensive computations
|
|
|
|
def __init__(self):
|
|
self._environment = Environment()
|
|
self._environment.filters.update(**filters)
|
|
self._environment.globals.update(**macros)
|
|
|
|
for extension in self.RESTRICTED_EXTENSIONS:
|
|
self._environment.extensions.pop(extension, None)
|
|
for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
|
|
self._environment.globals.pop(builtin, None)
|
|
|
|
def eval(self, input_str: str, config: Config, default: Optional[str] = None, **additional_parameters):
|
|
context = {"config": config, **additional_parameters}
|
|
|
|
for alias, equivalent in self.ALIASES.items():
|
|
if alias in context:
|
|
# This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
|
|
raise ValueError(
|
|
f"Found reserved keyword {alias} in interpolation context. This is unexpected and indicative of a bug in the CDK."
|
|
)
|
|
elif equivalent in context:
|
|
context[alias] = context[equivalent]
|
|
|
|
try:
|
|
if isinstance(input_str, str):
|
|
result = self._eval(input_str, context)
|
|
if result:
|
|
return self._literal_eval(result)
|
|
else:
|
|
# If input is not a string, return it as is
|
|
raise Exception(f"Expected a string. got {input_str}")
|
|
except UndefinedError:
|
|
pass
|
|
# If result is empty or resulted in an undefined error, evaluate and return the default string
|
|
return self._literal_eval(self._eval(default, context))
|
|
|
|
def _literal_eval(self, result):
|
|
try:
|
|
return ast.literal_eval(result)
|
|
except (ValueError, SyntaxError):
|
|
return result
|
|
|
|
def _eval(self, s: str, context):
|
|
try:
|
|
ast = self._environment.parse(s)
|
|
undeclared = meta.find_undeclared_variables(ast)
|
|
undeclared_not_in_context = {var for var in undeclared if var not in context}
|
|
if undeclared_not_in_context:
|
|
raise ValueError(f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}")
|
|
return self._environment.from_string(s).render(context)
|
|
except TypeError:
|
|
# The string is a static value, not a jinja template
|
|
# It can be returned as is
|
|
return s
|