#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Script which uses a Python "template" to generate a Hadoop-style XML configuration file. The "template" is a Python module which should export a global variable called 'CONFIG'. This variable should be a dictionary of keys/values. The values may use the special syntax '${FOO}' to substitute an environment variable (as a convenience over manually implementing the same). If you have an existing XML configuration and want to see it in convenient python form, you can use a snippet like the following from within the Python REPL: import xml.etree.ElementTree as ET import pprint def convert(path): e = ET.parse(path) c = dict([(property.findtext('name'), property.findtext('value')) for property in e.getroot()]) pprint.pprint(c, stream=file(path + ".py", "w")) """ from __future__ import absolute_import, division, print_function import os import re import sys from textwrap import dedent from xml.sax.saxutils import escape as xmlescape ENV_VAR_RE = re.compile(r'\${(.+?)\}') def _substitute_env_vars(s): """ Substitute ${FOO} with the $FOO environment variable in 's' """ def lookup_func(match): return os.environ[match.group(1)] return ENV_VAR_RE.sub(lookup_func, s) def dump_config(d, source_path, out): """ Dump a Hadoop-style XML configuration file. 'd': a dictionary of name/value pairs. 'source_path': the path where 'd' was parsed from. 'out': stream to write to """ header = """\ """.format(source_path=os.path.abspath(source_path)) print(dedent(header), file=out) for k, v in sorted(d.items()): try: k_new = _substitute_env_vars(k) if isinstance(v, int): v = str(v) v_new = _substitute_env_vars(v) except KeyError as e: raise Exception("failed environment variable substitution for value {k}: {e}" .format(k=k, e=e)) print("""\ {name} {value} """.format(name=xmlescape(k_new), value=xmlescape(v_new)), file=out) print("", file=out) def load_source_with_importlib(modname, filename): """"Emulate imp.load_source() of Python2 for Python3 using importlib Code taken from published Python documentation, see https://docs.python.org/3/whatsnew/3.12.html#imp""" import importlib.util import importlib.machinery loader = importlib.machinery.SourceFileLoader(modname, filename) spec = importlib.util.spec_from_file_location(modname, filename, loader=loader) module = importlib.util.module_from_spec(spec) # The module is always executed and not cached in sys.modules. # Uncomment the following line to cache the module. # sys.modules[module.__name__] = module loader.exec_module(module) return module def import_template(name, module_path): """Handle module import differences between Python2 and Python3""" mod = None if sys.version_info.major < 3: import imp mod = imp.load_source('template', module_path) else: mod = load_source_with_importlib(name, module_path) return mod def main(): if len(sys.argv) != 3: print("usage: {prog}