#!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. set -euo pipefail trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR IMPALA_DATA=${IMPALA_HOME}/testdata/impala-data TPC_H_DATA=${IMPALA_DATA}/tpch SCALE_FACTOR=1 if [[ $# == 1 && $1 -gt 1 ]] then SCALE_FACTOR=$1 TPC_H_DATA=${TPC_H_DATA}${SCALE_FACTOR} fi TPC_H_HOME=${IMPALA_TOOLCHAIN}/tpc-h-${IMPALA_TPC_H_VERSION} TPC_H_DBGEN=${TPC_H_HOME}/bin/dbgen if [ ! -x ${TPC_H_DBGEN} ]; then echo "Could not find TPC-H data generator executable: ${TPC_H_DBGEN}" exit 1 fi echo "Generating TPC-H data into ${TPC_H_DATA}" # Delete any preexisting data or symlinks # Need to change permissions to work around an old TPC-H data tarball that had a # non-writable top-level directory when extracted. chmod +w ${TPC_H_DATA} || true rm -rf ${TPC_H_DATA} mkdir -p ${TPC_H_DATA} cd ${TPC_H_DATA} if [ -t 1 ] then # Output is terminal, show progress verbosely VERBOSITY='-v' else VERBOSITY='' fi ${TPC_H_DBGEN} ${VERBOSITY} -f -s ${SCALE_FACTOR} # Impala expects each table to be in its own subdirectory. for FILE in *.tbl; do FILE_DIR=${FILE%.tbl} mkdir -p ${FILE_DIR} mv ${FILE} ${FILE_DIR} done