241 lines
8.5 KiB
Python
241 lines
8.5 KiB
Python
#
|
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
import contextlib
|
|
import os
|
|
import re
|
|
from typing import Any, Callable, List, Optional, Type, Union
|
|
from uuid import uuid4
|
|
|
|
import sentry_sdk
|
|
from sentry_sdk.integrations.atexit import AtexitIntegration
|
|
from sentry_sdk.integrations.excepthook import ExcepthookIntegration
|
|
from sentry_sdk.integrations.logging import LoggingIntegration
|
|
|
|
|
|
class AirbyteSentry:
|
|
"""
|
|
Class for working with sentry sdk. It provides methods to:
|
|
- init sentry sdk based on env variable
|
|
- add breadcrumbs and set context
|
|
- work with transactions and transaction spans
|
|
- set tag and capture message and capture exception
|
|
Also it implements client side sensitive data scrubbing.
|
|
"""
|
|
|
|
DSN_ENV_NAME = "SENTRY_DSN"
|
|
SECRET_MASK = "***"
|
|
# Maximum number of breadcrumbs to send on fail. Breadcrumbs is trail of
|
|
# events that occured before the fail and being sent to server only
|
|
# if handled or unhandled exception occured.
|
|
MAX_BREADCRUMBS = 30
|
|
# Event sending rate. could be from 0 (0%) to 1.0 (100 % events being sent
|
|
# to sentry server)
|
|
TRACES_SAMPLE_RATE = 1.0
|
|
SECRET_REGEXP = [
|
|
re.compile("(api_key=)[a-zA-Z0-9_]+"),
|
|
re.compile("(access_token=)[a-zA-Z0-9_]+"),
|
|
re.compile("(refresh_token=)[a-zA-Z0-9_]+"),
|
|
re.compile("(token )[a-zA-Z0-9_]+"),
|
|
re.compile("(Bearer )[a-zA-Z0-9_]+"),
|
|
]
|
|
SENSITIVE_KEYS = ["Authorization", "client_secret", "access_token"]
|
|
|
|
sentry_enabled = False
|
|
source_tag = ""
|
|
run_id = str(uuid4())
|
|
secret_values: List[str] = []
|
|
|
|
@classmethod
|
|
def process_value(cls, key: str, value: str):
|
|
"""
|
|
Process single value. Used by recursive replace_value method or
|
|
standalone for single value.
|
|
"""
|
|
for secret in cls.secret_values:
|
|
value = value.replace(secret, cls.SECRET_MASK)
|
|
if key in cls.SENSITIVE_KEYS:
|
|
return cls.SECRET_MASK
|
|
for regexp in cls.SECRET_REGEXP:
|
|
value = regexp.sub(f"\\1{cls.SECRET_MASK}", value)
|
|
return value
|
|
|
|
@classmethod
|
|
def replace_value(cls, key, value):
|
|
"""
|
|
Recursively scan event and replace all sensitive data with SECRET_MASK.
|
|
Perform inplace data replace i.e. its not creating new object.
|
|
"""
|
|
if isinstance(value, dict):
|
|
for k, v in value.items():
|
|
value[k] = cls.replace_value(k, v)
|
|
elif isinstance(value, list):
|
|
for index, v in enumerate(value):
|
|
value[index] = cls.replace_value(index, v)
|
|
elif isinstance(value, str):
|
|
return cls.process_value(key, value)
|
|
return value
|
|
|
|
@classmethod
|
|
def filter_event(cls, event, hint):
|
|
"""
|
|
Callback for before_send sentry hook.
|
|
"""
|
|
if "message" in event:
|
|
event["message"] = cls.process_value(None, event["message"])
|
|
cls.replace_value(None, event.get("exception"))
|
|
cls.replace_value(None, event.get("contexts"))
|
|
return event
|
|
|
|
@classmethod
|
|
def filter_breadcrumb(cls, event, hint):
|
|
"""
|
|
Callback for before_breadcrumb sentry hook.
|
|
"""
|
|
cls.replace_value(None, event)
|
|
return event
|
|
|
|
@classmethod
|
|
def init(
|
|
cls,
|
|
source_tag: str = None,
|
|
transport: Optional[Union[Type[sentry_sdk.transport.Transport], Callable[[Any], None]]] = None,
|
|
secret_values: List[str] = [],
|
|
):
|
|
"""
|
|
Read sentry data source name (DSN) from env variable and initialize sentry cdk.
|
|
Args:
|
|
source_tag: str - Source name to be used in "source" tag for events organazing.
|
|
transport: Transport or Callable - transport object for transfering
|
|
sentry event to remote server. Usually used for testing, by default
|
|
HTTP transport used
|
|
secret_values: List[str] - list of string that have to be filtered
|
|
out before sending event to sentry server.
|
|
|
|
"""
|
|
sentry_dsn = os.environ.get(cls.DSN_ENV_NAME)
|
|
if sentry_dsn:
|
|
cls.sentry_enabled = True
|
|
cls.secret_values = secret_values
|
|
sentry_sdk.init(
|
|
sentry_dsn,
|
|
max_breadcrumbs=cls.MAX_BREADCRUMBS,
|
|
traces_sample_rate=cls.TRACES_SAMPLE_RATE,
|
|
before_send=AirbyteSentry.filter_event,
|
|
before_breadcrumb=AirbyteSentry.filter_breadcrumb,
|
|
transport=transport,
|
|
# Use only limited list of integration cause sentry may send
|
|
# transaction events e.g. it could send httplib request with
|
|
# url and authorization info over StdlibIntegration and it
|
|
# would bypass before_send hook.
|
|
integrations=[
|
|
ExcepthookIntegration(always_run=True),
|
|
AtexitIntegration(),
|
|
LoggingIntegration(),
|
|
],
|
|
# Disable default integrations cause sentry does not allow to
|
|
# filter transactions event that could transfer sensitive data
|
|
default_integrations=False,
|
|
)
|
|
if source_tag:
|
|
sentry_sdk.set_tag("source", source_tag)
|
|
sentry_sdk.set_tag("run_id", cls.run_id)
|
|
cls.source_tag = source_tag
|
|
|
|
def if_enabled(f):
|
|
def wrapper(cls, *args, **kvargs):
|
|
if cls.sentry_enabled:
|
|
return f(cls, *args, **kvargs)
|
|
|
|
return wrapper
|
|
|
|
def if_enabled_else(return_value):
|
|
def if_enabled(f):
|
|
def wrapper(cls, *args, **kvargs):
|
|
if cls.sentry_enabled:
|
|
return f(cls, *args, **kvargs)
|
|
else:
|
|
return return_value
|
|
|
|
return wrapper
|
|
|
|
return if_enabled
|
|
|
|
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
|
# 'Argument 1 to "if_enabled" has incompatible type "Callable[[Type[AirbyteSentry], str, Any], Any]"; expected "AirbyteSentry"'
|
|
# there are a few similar opened issues
|
|
# https://github.com/python/mypy/issues/12110
|
|
# https://github.com/python/mypy/issues/11619
|
|
# ignored for now
|
|
@classmethod # type: ignore
|
|
@if_enabled
|
|
def set_tag(cls, tag_name: str, value: Any):
|
|
"""
|
|
Set tag that is handy for events organazing and filtering by sentry UI.
|
|
"""
|
|
sentry_sdk.set_tag(tag_name, value)
|
|
|
|
# same ignored as for line 171
|
|
@classmethod # type: ignore
|
|
@if_enabled
|
|
def add_breadcrumb(cls, message, data=None):
|
|
"""
|
|
Add sentry breadcrumb.
|
|
"""
|
|
sentry_sdk.add_breadcrumb(message=message, data=data)
|
|
|
|
# same ignored as for line 171
|
|
@classmethod # type: ignore
|
|
@if_enabled
|
|
def set_context(cls, name, data):
|
|
# Global context being used by transaction event as well. Since we cant
|
|
# filter senstitve data coming from transaction event using sentry
|
|
# before_event hook, apply filter to context here.
|
|
cls.replace_value(None, data)
|
|
sentry_sdk.set_context(name, data)
|
|
|
|
# same ignored as for line 171
|
|
@classmethod # type: ignore
|
|
@if_enabled
|
|
def capture_message(cls, message):
|
|
"""
|
|
Send message event to sentry.
|
|
"""
|
|
sentry_sdk.capture_message(message)
|
|
|
|
# same ignored as for line 171
|
|
@classmethod # type: ignore
|
|
@if_enabled
|
|
def capture_exception(
|
|
cls,
|
|
error: Optional[BaseException] = None,
|
|
scope: Optional[Any] = None,
|
|
**scope_args,
|
|
):
|
|
"""
|
|
Report handled execption to sentry.
|
|
"""
|
|
sentry_sdk.capture_exception(error, scope=scope, **scope_args)
|
|
|
|
# same ignored as for line 171
|
|
@classmethod
|
|
@if_enabled_else(contextlib.nullcontext()) # type: ignore
|
|
def start_transaction(cls, op, name=None):
|
|
"""
|
|
Return context manager for starting sentry transaction for performance monitoring.
|
|
"""
|
|
return sentry_sdk.start_transaction(op=op, name=f"{cls.source_tag}.{name}")
|
|
|
|
# same ignored as for line 171
|
|
@classmethod
|
|
@if_enabled_else(contextlib.nullcontext()) # type: ignore
|
|
def start_transaction_span(cls, op, description=None):
|
|
"""
|
|
Return context manager for starting sentry transaction span inside existing sentry transaction.
|
|
"""
|
|
# Apply filter to description since we cannot use before_send sentry
|
|
# hook for transaction event.
|
|
description = cls.replace_value(None, description)
|
|
return sentry_sdk.start_span(op=op, description=description)
|