refresh_queries shouldn't break because of a single query having a bad schedule object (#4163)

* move filtering of invalid schedules to the query

* simplify retrieved_at assignment and wrap in a try/except block to avoid one query blowing up the rest

* refactor refresh_queries to use simpler functions with a single responsibility and add try/except blocks to avoid one query blowing up the rest

* avoid blowing up when job locks point to expired Job objects. Enqueue them again instead

* there's no need to check for the existence of interval - all schedules have intervals

* disable faulty schedules

* reduce FP style in refresh_queries

* report refresh_queries errors to Sentry (if it is configured)

* avoid using exists+fetch and use exceptions instead
This commit is contained in:
Omer Lachish
2020-03-01 11:02:46 +02:00
committed by GitHub
parent b0f1cdd194
commit a9cb87d4b3
6 changed files with 210 additions and 267 deletions

View File

@@ -159,16 +159,26 @@ class ShouldScheduleNextTest(TestCase):
class QueryOutdatedQueriesTest(BaseTestCase):
def schedule(self, **kwargs):
schedule = {"interval": None, "time": None, "until": None, "day_of_week": None}
schedule.update(**kwargs)
return schedule
def create_scheduled_query(self, **kwargs):
return self.factory.create_query(schedule=self.schedule(**kwargs))
def fake_previous_execution(self, query, **kwargs):
retrieved_at = utcnow() - datetime.timedelta(**kwargs)
query_result = self.factory.create_query_result(
retrieved_at=retrieved_at,
query_text=query.query_text,
query_hash=query.query_hash,
)
query.latest_query_data = query_result
# TODO: this test can be refactored to use mock version of should_schedule_next to simplify it.
def test_outdated_queries_skips_unscheduled_queries(self):
query = self.factory.create_query(
schedule={
"interval": None,
"time": None,
"until": None,
"day_of_week": None,
}
)
query = self.create_scheduled_query()
query_with_none = self.factory.create_query(schedule=None)
queries = models.Query.outdated_queries()
@@ -177,71 +187,33 @@ class QueryOutdatedQueriesTest(BaseTestCase):
self.assertNotIn(query_with_none, queries)
def test_outdated_queries_works_with_ttl_based_schedule(self):
two_hours_ago = utcnow() - datetime.timedelta(hours=2)
query = self.factory.create_query(
schedule={
"interval": "3600",
"time": None,
"until": None,
"day_of_week": None,
}
)
query_result = self.factory.create_query_result(
query=query.query_text, retrieved_at=two_hours_ago
)
query.latest_query_data = query_result
query = self.create_scheduled_query(interval="3600")
self.fake_previous_execution(query, hours=2)
queries = models.Query.outdated_queries()
self.assertIn(query, queries)
def test_outdated_queries_works_scheduled_queries_tracker(self):
two_hours_ago = utcnow() - datetime.timedelta(hours=2)
query = self.factory.create_query(
schedule={
"interval": "3600",
"time": None,
"until": None,
"day_of_week": None,
}
)
query_result = self.factory.create_query_result(
query=query, retrieved_at=two_hours_ago
)
query.latest_query_data = query_result
query = self.create_scheduled_query(interval="3600")
self.fake_previous_execution(query, hours=2)
models.scheduled_queries_executions.update(query.id)
queries = models.Query.outdated_queries()
self.assertNotIn(query, queries)
def test_skips_fresh_queries(self):
half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
query = self.factory.create_query(
schedule={
"interval": "3600",
"time": None,
"until": None,
"day_of_week": None,
}
)
query_result = self.factory.create_query_result(
query=query.query_text, retrieved_at=half_an_hour_ago
)
query.latest_query_data = query_result
query = self.create_scheduled_query(interval="3600")
self.fake_previous_execution(query, minutes=30)
queries = models.Query.outdated_queries()
self.assertNotIn(query, queries)
def test_outdated_queries_works_with_specific_time_schedule(self):
half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
query = self.factory.create_query(
schedule={
"interval": "86400",
"time": half_an_hour_ago.strftime("%H:%M"),
"until": None,
"day_of_week": None,
}
)
query = self.create_scheduled_query(interval="86400", time=half_an_hour_ago.strftime("%H:%M"))
query_result = self.factory.create_query_result(
query=query.query_text,
retrieved_at=half_an_hour_ago - datetime.timedelta(days=1),
@@ -256,32 +228,14 @@ class QueryOutdatedQueriesTest(BaseTestCase):
Only one query per data source with the same text will be reported by
Query.outdated_queries().
"""
query = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
}
)
query = self.create_scheduled_query(interval="60")
query2 = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
},
schedule=self.schedule(interval="60"),
query_text=query.query_text,
query_hash=query.query_hash,
)
retrieved_at = utcnow() - datetime.timedelta(minutes=10)
query_result = self.factory.create_query_result(
retrieved_at=retrieved_at,
query_text=query.query_text,
query_hash=query.query_hash,
)
query.latest_query_data = query_result
query2.latest_query_data = query_result
self.fake_previous_execution(query, minutes=10)
self.fake_previous_execution(query2, minutes=10)
self.assertEqual(list(models.Query.outdated_queries()), [query2])
@@ -291,32 +245,16 @@ class QueryOutdatedQueriesTest(BaseTestCase):
Query.outdated_queries() even if they have the same query text.
"""
query = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
},
schedule=self.schedule(interval="60"),
data_source=self.factory.create_data_source(),
)
query2 = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
},
schedule=self.schedule(interval="60"),
query_text=query.query_text,
query_hash=query.query_hash,
)
retrieved_at = utcnow() - datetime.timedelta(minutes=10)
query_result = self.factory.create_query_result(
retrieved_at=retrieved_at,
query_text=query.query_text,
query_hash=query.query_hash,
)
query.latest_query_data = query_result
query2.latest_query_data = query_result
self.fake_previous_execution(query, minutes=10)
self.fake_previous_execution(query2, minutes=10)
outdated_queries = models.Query.outdated_queries()
self.assertEqual(len(outdated_queries), 2)
@@ -328,32 +266,14 @@ class QueryOutdatedQueriesTest(BaseTestCase):
If multiple queries with the same text exist, only ones that are
scheduled to be refreshed are reported by Query.outdated_queries().
"""
query = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
}
)
query = self.create_scheduled_query(interval="60")
query2 = self.factory.create_query(
schedule={
"interval": "3600",
"until": None,
"time": None,
"day_of_week": None,
},
schedule=self.schedule(interval="3600"),
query_text=query.query_text,
query_hash=query.query_hash,
)
retrieved_at = utcnow() - datetime.timedelta(minutes=10)
query_result = self.factory.create_query_result(
retrieved_at=retrieved_at,
query_text=query.query_text,
query_hash=query.query_hash,
)
query.latest_query_data = query_result
query2.latest_query_data = query_result
self.fake_previous_execution(query, minutes=10)
self.fake_previous_execution(query2, minutes=10)
self.assertEqual(list(models.Query.outdated_queries()), [query])
@@ -363,25 +283,14 @@ class QueryOutdatedQueriesTest(BaseTestCase):
for scheduling future execution.
"""
query = self.factory.create_query(
schedule={
"interval": "60",
"until": None,
"time": None,
"day_of_week": None,
},
schedule=self.schedule(interval="60"),
schedule_failures=4,
)
retrieved_at = utcnow() - datetime.timedelta(minutes=16)
query_result = self.factory.create_query_result(
retrieved_at=retrieved_at,
query_text=query.query_text,
query_hash=query.query_hash,
)
query.latest_query_data = query_result
self.fake_previous_execution(query, minutes=16)
self.assertEqual(list(models.Query.outdated_queries()), [])
query_result.retrieved_at = utcnow() - datetime.timedelta(minutes=17)
self.fake_previous_execution(query, minutes=17)
self.assertEqual(list(models.Query.outdated_queries()), [query])
def test_schedule_until_after(self):
@@ -390,21 +299,11 @@ class QueryOutdatedQueriesTest(BaseTestCase):
Query.outdated_queries() after the given time is past.
"""
one_day_ago = (utcnow() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
two_hours_ago = utcnow() - datetime.timedelta(hours=2)
query = self.factory.create_query(
schedule={
"interval": "3600",
"until": one_day_ago,
"time": None,
"day_of_week": None,
}
)
query_result = self.factory.create_query_result(
query=query.query_text, retrieved_at=two_hours_ago
)
query.latest_query_data = query_result
query = self.create_scheduled_query(interval="3600", until=one_day_ago)
self.fake_previous_execution(query, hours=2)
queries = models.Query.outdated_queries()
self.assertNotIn(query, queries)
def test_schedule_until_before(self):
@@ -413,23 +312,28 @@ class QueryOutdatedQueriesTest(BaseTestCase):
Query.outdated_queries() before the given time is past.
"""
one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
two_hours_ago = utcnow() - datetime.timedelta(hours=2)
query = self.factory.create_query(
schedule={
"interval": "3600",
"until": one_day_from_now,
"time": None,
"day_of_week": None,
}
)
query_result = self.factory.create_query_result(
query=query.query_text, retrieved_at=two_hours_ago
)
query.latest_query_data = query_result
query = self.create_scheduled_query(interval="3600", until=one_day_from_now)
self.fake_previous_execution(query, hours=2)
queries = models.Query.outdated_queries()
self.assertIn(query, queries)
def test_skips_and_disables_faulty_queries(self):
faulty_query = self.create_scheduled_query(until="pigs fly")
valid_query = self.create_scheduled_query(interval="60")
self.fake_previous_execution(valid_query, minutes=10)
queries = models.Query.outdated_queries()
self.assertEqual(list(models.Query.outdated_queries()), [valid_query])
self.assertTrue(faulty_query.schedule.get("disabled"))
def test_skips_disabled_schedules(self):
query = self.create_scheduled_query(disabled=True)
queries = models.Query.outdated_queries()
self.assertNotIn(query, queries)
class QueryArchiveTest(BaseTestCase):
def test_archive_query_sets_flag(self):