impala/common/thrift/generate_metrics.py

#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
import sys
import os
import re
import collections
from optparse import OptionParser
try:
  import json
except ImportError:
  import simplejson as json # For Python 2.4

THRIFT_DIR = os.path.join(os.getenv('IMPALA_HOME'), 'common/thrift')

parser = OptionParser()
parser.add_option("-i", dest="input_schema_path",
                  default=os.path.join(THRIFT_DIR, "metrics.json"),
                  help="The path of the output mdl file. Default: %default")
parser.add_option("--generate_thrift", dest="generate_thrift",
                  action="store_true", default=True,
                  help="Generates the metric thrift definitions. Default: %default")
parser.add_option("--generate_mdl", dest="generate_mdl",
                  action="store_true", default=False,
                  help="Generates a CM-compatible mdl file. Default: %default")
parser.add_option("-o", dest="output_thrift_path",
                  default=os.path.join(THRIFT_DIR, "MetricDefs.thrift"),
                  help="The path of the output MetricDefs thrift file. Default: %default")
parser.add_option("--output_mdl_path", dest="output_mdl_path",
                  default="/tmp/impala_schema.mdl",
                  help="The path of the output mdl file. Default: %default")
# TODO: get default version value from bin/save-version.sh
parser.add_option("--output_mdl_version", dest="output_mdl_version",
                  metavar="IMPALA_VERSION", default="2.8.0-SNAPSHOT",
                  help="The Impala version that is written in the output mdl.")

options, args = parser.parse_args()

def load_metrics(source_file):
  """Reads the json file of metric definitions and returns a map of metric names to
     metric definitions"""
  raw_metrics = json.loads(open(source_file).read())
  metrics = { }
  for m in raw_metrics:
    if m['key'] in metrics:
      assert False, "Metric key %s already used, check definition of %s" % (m['key'], m)
    m['kind'] = "Metrics.TMetricKind.%s" % m['kind']
    m['units'] = "Metrics.TUnit.%s" % m['units']
    metrics[m['key']] = m
  return metrics

THRIFT_PREAMBLE = """
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
//
// THIS FILE IS AUTO GENERATED BY generate_metrics.py DO NOT MODIFY IT BY HAND.
//

namespace py impala_thrift_gen.Metrics
namespace cpp impala
namespace java org.apache.impala.thrift

include "Metrics.thrift"

// All metadata associated with a metric. Used to instantiate metrics.
struct TMetricDef {
  1: optional string key
  2: optional Metrics.TMetricKind kind
  3: optional Metrics.TUnit units
  4: optional list<string> contexts
  5: optional string label
  6: optional string description
}

"""

def generate_thrift():
  """Generates the thrift metric definitions file used by Impala."""
  metrics = load_metrics(options.input_schema_path)
  metrics_json = json.dumps(metrics, sort_keys=True, indent=2)

  # dumps writes the TMetricKind and TUnit as quoted strings which is not
  # interpreted by the thrift compiler correctly. Need to remove the quotes around
  # the enum values.
  metrics_json = re.sub(r'"(Metrics.TMetricKind.\S+)"', r'\1', metrics_json)
  metrics_json = re.sub(r'"(Metrics.TUnit.\S+)"', r'\1', metrics_json)

  target_file = options.output_thrift_path
  fid = open(target_file, "w")
  try:
    fid.write(THRIFT_PREAMBLE)
    fid.write("const map<string,TMetricDef> TMetricDefs =\n")
    fid.write(metrics_json)
  finally:
    fid.close()
  print("%s created." % target_file)

def metric_to_mdl(m):
  """Returns the metric in the mdl format, or None if the metric isn't supported."""
  # TODO: Stamp out metrics with arguments, e.g. output each rpc call_duration metric.
  if '$0' in m['key']:
    print("Skipping metrics with unbound argument, key=%s" % m['key'], file=sys.stderr)
    return None

  # TODO: Stamp out individual metrics for other metric types.
  SUPPORTED_METRIC_KINDS = ['COUNTER', 'GAUGE']
  if m['kind'] not in SUPPORTED_METRIC_KINDS:
    print("Skipping %s metric %s" % (m['kind'], m['key']), file=sys.stderr)
    return None

  return dict(
    context=(m['key']),
    name=('impala_' + m['key'].lower().replace('-', '_').replace('.', '_')),
    counter=(m['kind'] == 'COUNTER'),
    numeratorUnit=m['units'].lower(),
    description=m['description'],
    label=m['label'])


# Base MDL for the Impala Service. Does not contain metrics.
MDL_BASE = """
{
  "name" : "IMPALA",
  "version" : "$PROJECT_VERSION",
  "nameForCrossEntityAggregateMetrics" : "impalas",
  "roles" : [
    {
      "name" : "IMPALAD",
      "nameForCrossEntityAggregateMetrics" : "impalads"
    },
    {
      "name" : "STATESTORE",
      "nameForCrossEntityAggregateMetrics" : "statestores"
    },
    {
      "name" : "CATALOGSERVER",
      "nameForCrossEntityAggregateMetrics" : "catalogservers"
    }
  ],
  "metricEntityTypeDefinitions" : [
      {
        "name" : "IMPALA_POOL",
        "nameForCrossEntityAggregateMetrics" : "impala_pools",
        "entityNameFormat" :  [
           "serviceName",
           "poolName"
        ],
        "label" : "Impala Pool",
        "labelPlural" : "Impala Pools",
        "description" : "A resource pool within which Impala schedules queries.",
        "immutableAttributeNames" : [
           "poolName",
           "serviceName"
        ]
      },
      {
        "name" : "IMPALA_DAEMON_POOL",
        "nameForCrossEntityAggregateMetrics" : "impala_daemon_pools",
        "entityNameFormat" : [
           "roleName",
           "poolName"
        ],
        "label" : "Impala Daemon Pool",
        "labelPlural" : "Impala Daemon Pools",
        "description" : "An Impala Daemon's view of a specific Impala resource pool.",
        "immutableAttributeNames" : [
           "poolName",
           "roleName",
           "serviceName"
        ],
        "parentMetricEntityTypeNames" : [
            "IMPALA_POOL",
            "IMPALA-IMPALAD"
        ]
      }
    ]
}
"""

def generate_mdl():
  """Generates the CM compatible metric definition (MDL) file."""
  metrics = []
  input_file = open(options.input_schema_path)
  try:
    metrics = json.load(input_file)
  finally:
    input_file.close()

  # A map of entity type -> [metric dicts].
  metrics_by_role = collections.defaultdict(lambda: [])
  for m in metrics:
    # Convert to the format that CM expects.
    mdl_metric = metric_to_mdl(m)
    if mdl_metric is None:
      continue
    for ctx in m['contexts']:
      metrics_by_role[ctx].append(mdl_metric)

  mdl = json.loads(MDL_BASE)
  mdl['version'] = options.output_mdl_version
  for role in mdl['roles']:
    role_metrics = []
    if role['name'] in metrics_by_role:
      role_metrics = metrics_by_role[role['name']]
    role['metricDefinitions'] = role_metrics

  target_file = options.output_mdl_path
  fid = open(target_file, "w")
  try:
    fid.write(json.dumps(mdl, indent=4))
  finally:
    fid.close()
  print("%s created." % target_file)

if __name__ == "__main__":
  if options.generate_thrift:
    generate_thrift()

  if options.generate_mdl:
    generate_mdl()