impala/tests/util/parse_util.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import re
from datetime import datetime

NEW_GLOG_ENTRY_PATTERN = re.compile(r"[IWEF](?P<Time>\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*")

def parse_glog(text, start_time=None):
  '''Parses the log 'text' and returns a list of log entries. If a 'start_time' is
     provided only log entries that are after the time will be returned.
  '''
  year = datetime.now().year
  found_start = False
  log = list()
  entry = None
  for line in text.splitlines():
    if not found_start:
      found_start = line.startswith("Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu")
      continue
    match = NEW_GLOG_ENTRY_PATTERN.match(line)
    if match:
      if entry:
        log.append("\n".join(entry))
      if not start_time or start_time <= datetime.strptime(
          match.group("Time"), "%m%d %H:%M:%S.%f").replace(year):
        entry = [line]
      else:
        entry = None
    elif entry:
      entry.append(line)
  if entry:
    log.append("\n".join(entry))
  return log


def parse_mem_to_mb(mem, units):
  mem = float(mem)
  if mem <= 0:
    return
  units = units.strip().upper() if units else ""
  if units.endswith("B"):
    units = units[:-1]
  if not units:
    mem /= 2 ** 20
  elif units == "K":
    mem /= 2 ** 10
  elif units == "M":
    pass
  elif units == "G":
    mem *= 2 ** 10
  elif units == "T":
    mem *= 2 ** 20
  elif units == "P":
    mem *= 2 ** 30
  else:
    raise Exception('Unexpected memory unit "%s"' % units)
  return int(mem)

def parse_duration_string_ms(duration):
  """Parses a duration string of the form 1h2h3m4s5.6ms4.5us7.8ns into milliseconds."""
  pattern = r'(?P<value>[0-9]+\.?[0-9]*?)(?P<units>\D+)'
  matches = list(re.finditer(pattern, duration))
  assert matches, 'Failed to parse duration string %s' % duration

  times = {'h': 0, 'm': 0, 's': 0, 'ms': 0}
  for match in matches:
    parsed = match.groupdict()
    times[parsed['units']] = float(parsed['value'])

  return (times['h'] * 60 * 60 + times['m'] * 60 + times['s']) * 1000 + times['ms']