Files
impala/tests/custom_cluster/test_web_pages.py
Andrew Sherman b96439f680 IMPALA-11078 Add simple CSP header to webui.
Content Security Policy (CSP) is a computer security standard designed
to prevent cross-site scripting, clickjacking and other code injection
attacks. CSP provides a method for websites to declare approved origins
of content that browsers should be allowed to load on that website.
A good resource is https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP
If a page breaks the rules then the included script or css will
typically not be run by the browser.

In the Impala webui we use a CSP header to declare that all web content
comes from the impalad, with some 'unsafe' inline code.

A new server flag "--disable_content_security_policy_header=true" can be
set to disable the emission of this header in case of any compatibility
issues.

A few small changes were needed to make this CSP header work. Chart.js
was previously included via http, this was changed to being bundled
like other javascript and css we use. Some dodgy array code that
handles connection metrics was also fixed.

TESTING:
  The main webui tests all now validate the CSP header is present.
  A test for the new flag is also added.

Change-Id: Idc335d65b117661da0b420ddb7c9ccd80d8d76ab
Reviewed-on: http://gerrit.cloudera.org:8080/18168
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-01-25 22:52:50 +00:00

256 lines
9.6 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
import random
import re
import requests
import psutil
import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.shell.util import run_impala_shell_cmd
class TestWebPage(CustomClusterTestSuite):
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def setup_class(cls):
if cls.exploration_strategy() != 'exhaustive':
pytest.skip('runs only in exhaustive')
super(TestWebPage, cls).setup_class()
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--enable_extended_memory_metrics=true"
)
def test_varz_hidden_variables(self):
"""Tests that modified hidden variables show up in /varz"""
response = requests.get("http://localhost:25000/varz?json")
assert response.status_code == requests.codes.ok
varz_json = json.loads(response.text)
flag = [e for e in varz_json["flags"]
if e["name"] == "enable_extended_memory_metrics"]
assert len(flag) == 1
assert flag[0]["default"] == "false"
assert flag[0]["current"] == "true"
assert flag[0]["experimental"]
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--webserver_max_post_length_bytes=100"
)
def test_max_post_length(self):
"""Tests that the maximum length of a POST request that will be accepted"""
too_big_post_content = "c" * 10000
# POST that exceeds the limit
response = requests.post("http://localhost:25000/", too_big_post_content)
assert response.status_code == requests.codes.request_entity_too_large
# POST within the limit
# This is on a URI that does not understand POST and treats it like a GET.
ok_post_content = "c" * 100
response = requests.post("http://localhost:25000/", ok_post_content)
assert response.status_code == requests.codes.ok
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args()
def test_webserver_interface(self):
addrs = psutil.net_if_addrs()
print("net_if_addrs returned: %s" % addrs)
ip_matcher = re.compile("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
ip_addrs = []
for addr in addrs:
for snic in addrs[addr]:
if ip_matcher.match(snic.address):
ip_addrs.append(snic.address)
# There must be at least one available interface on the machine.
assert len(ip_addrs) > 0, addrs
ports = ["25000", "25010", "25020"]
# With default args, the webserver should be accessible over all interfaces for all
# daemons.
for ip in ip_addrs:
for port in ports:
response = requests.get("http://%s:%s/" % (ip, port))
assert response.status_code == requests.codes.ok, ip
# Pick a random interface and restart with the webserver on that interface.
interface = random.choice(ip_addrs)
self._start_impala_cluster(["--impalad_args=--webserver_interface=%s" % interface])
# Now the webserver should only be accessible over the choosen interface.
for ip in ip_addrs:
try:
response = requests.get("http://%s:25000/" % ip)
assert ip == interface
assert response.status_code == requests.codes.ok, ip
except requests.exceptions.ConnectionError:
assert ip != interface
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--query_stmt_size=0"
)
def test_query_stmt_without_truncate(self):
"""Check if the full query string is displayed in the query list on the WebUI."""
# The input query is a select + 450 'x ' long.
query_select = "x " * 450
query = 'select "{0}"'.format(query_select)
# In the site there is an extra \ before the " so we need that in the expected
# response too.
expected = 'select \\"{0}\\"'.format(query_select)
self.execute_query(query)
response = requests.get("http://localhost:25000/queries?json")
response_json = response.text
assert expected in response_json, "No matching statement found in the queries site."
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--query_stmt_size=10"
)
def test_query_stmt_with_custom_length(self):
"""Check if the partial query with the correct length is displayed in the query list
on the WebUI."""
# The input query is a select + 450 'x ' long.
query = 'select "{0}"'.format("x " * 450)
# Searching for the custom, 10 chars long response. In the site there is an extra \
# before the " so we need that in the expected response too.
expected = 'select \\"x ...'
self.execute_query(query)
response = requests.get("http://localhost:25000/queries?json")
response_json = response.text
assert expected in response_json, "No matching statement found in the queries site."
# Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
# 'should_exist'
def _validate_shell_messages(self, result_stderr, messages, should_exist=True):
for msg in messages:
if should_exist:
assert msg in result_stderr, result_stderr
else:
assert msg not in result_stderr, result_stderr
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--ping_expose_webserver_url=false"
)
def test_webserver_url_not_exposed(self, vector):
if vector.get_value('table_format').file_format != 'text':
pytest.skip('runs only for text table_format')
# If webserver url is not exposed, debug web urls shouldn't be printed out.
shell_messages = ["Query submitted at: ", "(Coordinator: ",
"Query progress can be monitored at: "]
query_shell_arg = '--query=select * from functional.alltypes'
# hs2
results = run_impala_shell_cmd(vector, [query_shell_arg])
self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
# beeswax
results = run_impala_shell_cmd(vector, ['--protocol=beeswax', query_shell_arg])
self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
# Even though webserver url is not exposed, it is still accessible.
page = requests.get('http://localhost:25000')
assert page.status_code == requests.codes.ok
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--logtostderr=true --redirect_stdout_stderr=false",
statestored_args="--logtostderr=true --redirect_stdout_stderr=false",
catalogd_args="--logtostderr=true --redirect_stdout_stderr=false"
)
def test_webserver_hide_logs_link(self, vector):
"""Validate that there is no /logs link when we use --logtostderr=true """
ports = ["25000", "25010", "25020"]
for port in ports:
# Get the webui home page as json.
response = requests.get("http://localhost:%s?json" % port)
assert response.status_code == requests.codes.ok
home = json.loads(response.text)
# Get the items in the navbar.
navbar = home["__common__"]['navbar']
found_links = [link_item['link'] for link_item in navbar]
# The links that we expect to see in the navbar.
expected_coordinator_links = [
"/",
"/admission",
"/backends",
"/catalog",
"/hadoop-varz",
"/jmx",
"/log_level",
"/memz",
"/metrics",
"/profile_docs",
"/queries",
"/rpcz",
"/sessions",
"/threadz",
"/varz",
]
expected_statestore_links = [
"/",
"/log_level",
"/memz",
"/metrics",
"/profile_docs",
"/rpcz",
"/subscribers",
"/threadz",
"/topics",
"/varz",
]
expected_catalog_links = [
"/",
"/catalog",
"/jmx",
"/log_level",
"/memz",
"/metrics",
"/operations",
"/profile_docs",
"/rpcz",
"/threadz",
"/varz",
]
msg = "bad links from webui port %s" % port
if port == "25000":
assert found_links == expected_coordinator_links, msg
elif port == "25010":
assert found_links == expected_statestore_links, msg
elif port == "25020":
assert found_links == expected_catalog_links, msg
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--disable_content_security_policy_header=true",
statestored_args="--disable_content_security_policy_header=true",
catalogd_args="--disable_content_security_policy_header=true"
)
def test_cdp_header_disabled(self):
"""Test that if servers are started with the flag
--disable_content_security_policy_header=true then the emission of the CDP header is
disabled."""
ports = ["25000", "25010", "25020"] # Respectively the impalad, statestore, catalog.
for port in ports:
response = requests.get("http://localhost:%s" % port)
assert 'Content-Security-Policy' not in response.headers, \
"CSP header present despite being disabled (port %s)" % port