Added TPCH functional query tests and schema generation

This adds most of the Hive TPCH queries into the functional Impala tests. This
code review doesn't actually include the TPCH data. The data set is relatively
large. Instead I updated scripts to copy the data from a data host.

This change has a few parts:
1) Update the benchmark schema generation/test vector generation to be more
generic. This way we can use the same schema creation/data loading steps for
TPCH as we do for benchmark tests.

2) Add in schema template for the TPCH workload along with test vectors and
dimensions which are used for schema generation.

3) Add in a new test file for each TPC-H query. The Hive TPCH work broke down
the queries to generate some "temp" tables, then execute using joins/selects
from these temp tables. Since creating the temp tables does some real work
it is good to execute these via Impala. Each test a) Runs all the Insert
statements to generate the temp tables b) runs the additional TPCH queries

4) Updated all the TPCH insert statements and queries to be parameterized on
$TABLE name. This way we can run the tests across all combinations of file
format/compression/etc.

5) Updated data loading

Change-Id: I6891acc4c7464eaf1dc7dbbb532ddbeb6c259bab
This commit is contained in:
Lenni Kuff
2012-07-11 18:09:57 -07:00
committed by Henry Robinson
parent e5c42edad2
commit e293164b37
44 changed files with 3033 additions and 113 deletions

4
.gitignore vendored
View File

@@ -8,8 +8,8 @@ org.eclipse.jdt.core.prefs
benchmark_results.csv
reference_benchmark_results.csv
testdata/data/test-warehouse
testdata/bin/create-benchmark-*-generated.sql
testdata/bin/load-benchmark-*-generated.sql
testdata/bin/create-*-generated.sql
testdata/bin/load-*-generated.sql
pprof.out

View File

@@ -1,17 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) 2011 Cloudera, Inc. All rights reserved.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
set -e
echo "Copying data files from the filer. If the file already exists locally, the files will not be copied. It's not check summing the files or anything like that, if you need to force a copy, delete the local directory: impala/testdata/data/hive_benchmark"
# TODO: this should be moved somewhere more reasonable.
DATASRC="haus01.sf.cloudera.com:/home/nong/impala-data"
DATADST=$IMPALA_HOME/testdata/hive_benchmark
mkdir -p $DATADST
scp -r $DATASRC/* $DATADST/

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
set -e
echo "Copying data files from the share. If the file already exists locally, the files"\
"will not be copied. It's not check summing the files or anything like that, if"\
"you need to force a copy, delete the local directory:"\
"IMPALA_HOME/testdata/impala-data"
# TODO: Waiting on helpdesk ticket (HD-2861) to move this to a better location
DATASRC="c1326.hal.cloudera.com:/data/1/workspace/impala-data"
DATADST=$IMPALA_HOME/testdata/impala-data
mkdir -p $DATADST
scp -i $bin/ssh_keys/id_rsa_impala -o "StrictHostKeyChecking=no" -r $DATASRC/* $DATADST

View File

@@ -1,47 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# Script that creates schema and loads data into hive for running benchmarks.
# By default the script will load the base data for the "core" scenario.
# If 'pairwise' is specified as a parameter the pairwise combinations of workload
# + file format + compression will be loaded.
# If 'exhaustive' is passed as an argument the exhaustive set of combinations will
# be executed.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
set -e
exploration_strategy=core
if [ $1 ]; then
exploration_strategy=$1
fi
BENCHMARK_SCRIPT_DIR=$IMPALA_HOME/testdata/bin
function execute_hive_query_from_file {
hive_args="-hiveconf hive.root.logger=WARN,console -v -f"
"$HIVE_HOME/bin/hive" $hive_args $1
}
pushd "$IMPALA_HOME/testdata/bin";
./generate_benchmark_statements.py --exploration_strategy $exploration_strategy
popd
if [ "$exploration_strategy" = "exhaustive" ]; then
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/create-benchmark-exhaustive-generated.sql"
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/load-benchmark-exhaustive-generated.sql"
elif [ "$exploration_strategy" = "pairwise" ]; then
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/create-benchmark-pairwise-generated.sql"
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/load-benchmark-pairwise-generated-sql"
elif [ "$exploration_strategy" = "core" ]; then
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/create-benchmark-core-generated.sql"
execute_hive_query_from_file "$BENCHMARK_SCRIPT_DIR/load-benchmark-core-generated.sql"
else
echo "Invalid exploration strategy: $exploration_strategy"
exit 1
fi
$IMPALA_HOME/testdata/bin/generate-block-ids.sh

61
bin/load-impala-data.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env bash
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# This script creates schema and loads data into hive for running benchmarks and
# other tests. Using this script requires passing in two parameters:
# The first is the data set type (benchmark, tpch). This will load the appropriate
# collection of data sets for the run type.
# The second is the exploration strategy. This determines the different combinations
# of file format, compression, etc that will be created and loaded. 'Core' defines
# a basic set of combinations. If 'pairwise' is specified the pairwise combinations
# of workload # + file format + compression will be loaded. If 'exhaustive' is
# passed as an argument the exhaustive set of combinations will be loaded.
exploration_strategy=core
data_set_type=benchmark
if [ $1 = "hive-benchmark" ]; then
data_set_type=benchmark
elif [ $1 = "tpch" ]; then
data_set_type=tpch
elif [ $1 = "all" ]; then
data_set_type="benchmark tpch"
else
echo "Invalid run type: $1. Valid values are 'all, tpch, hive-benchmark'"
exit 1
fi
if [ $2 = "core" -o $2 = "pairwise" -o $2 = "exhaustive" ]; then
exploration_strategy=$2
else
echo "Invalid exploration strategy: $2. Valid values are 'core, pairwise, exhaustive'"
exit 1
fi
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
set -e
SCRIPT_DIR=$IMPALA_HOME/testdata/bin
function execute_hive_query_from_file {
hive_args="-hiveconf hive.root.logger=WARN,console -v -f"
"$HIVE_HOME/bin/hive" $hive_args $1
}
pushd "$IMPALA_HOME/testdata/bin";
for ds in $data_set_type
do
./generate_schema_statements.py --exploration_strategy ${exploration_strategy}\
--base_output_file_name=${ds}\
--schema_template=${ds}_schema_template.sql
execute_hive_query_from_file \
"$SCRIPT_DIR/create-${ds}-${exploration_strategy}-generated.sql"
execute_hive_query_from_file \
"$SCRIPT_DIR/load-${ds}-${exploration_strategy}-generated.sql"
done
popd
$IMPALA_HOME/testdata/bin/generate-block-ids.sh

View File

@@ -65,7 +65,6 @@ prime_cache_cmd = os.environ['IMPALA_HOME'] + "/testdata/bin/cache_tables.py -q
result_single_regex = 'returned (\d*) rows? in (\d*).(\d*) s'
result_multiple_regex = 'returned (\d*) rows? in (\d*).(\d*) s with stddev (\d*).(\d*)'
hive_result_regex = 'Time taken: (\d*).(\d*) seconds'
set_hive_input_cmd = 'set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;'
# Console color format strings
GREEN = '\033[92m'
@@ -141,12 +140,7 @@ def run_query_using_hive(query, prime_buffer_cache, iterations):
else:
prime_buffer_cache_local(query)
query_string = ''
# This works around a problem with Hive where Hive cannot execute queries that result
# in multiple mapreduce steps on a mini-dfs cluster.
if not options.remote:
query_string = set_hive_input_cmd
query_string += (query + ';') * iterations
query_string = (query + ';') * iterations
query_output = tempfile.TemporaryFile("w+")
subprocess.call(options.hive_cmd + "\"%s\"" % query_string, shell=True,

3
bin/ssh_keys/README Normal file
View File

@@ -0,0 +1,3 @@
The [redacted] SSH keys included here are used to help with automation such as copying test data or
impala deployment to test clusters.

View File

View File

@@ -468,7 +468,8 @@ public abstract class BaseQueryTest {
// they can be very expensive.
if (expectedResult.getSetup().size() > 0) {
try {
runSetupSection(expectedResult.getSetup());
runSetupSection(testCase.getSectionContents(Section.SETUP, false,
config.getTableSuffix()));
} catch (Exception e) {
fail(e.getMessage());
}
@@ -511,8 +512,18 @@ public abstract class BaseQueryTest {
boolean abortOnError, int maxErrors) {
switch (executionMode) {
case REDUCED:
// TODO: Consider running with the fastest format to cut down on execution time.
runQueryUncompressedTextOnly(testFile, abortOnError, maxErrors);
// TODO: TPCH Currently has a bug with when LLVM is enabled (IMP-129). This is a
// temporary workaround for this problem. Once that is resolved this can be
// removed.
if (testFile.trim().startsWith("tpch")) {
List<TestConfiguration> testConfigs = generateAllConfigurationPermutations(
TEXT_FORMAT_ONLY, UNCOMPRESSED_ONLY, ImmutableList.of(16),
SMALL_CLUSTER_SIZES, ImmutableList.of(true));
runQueryWithTestConfigs(testConfigs, testFile, abortOnError, maxErrors);
} else {
// TODO: Consider running with the fastest format to cut down on execution time
runQueryUncompressedTextOnly(testFile, abortOnError, maxErrors);
}
break;
case EXHAUSTIVE:
runQueryWithAllConfigurationPermutations(testFile, abortOnError, maxErrors);

View File

@@ -0,0 +1,119 @@
// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
package com.cloudera.impala.service;
import org.junit.Test;
public class TpchQueryTest extends BaseQueryTest {
@Test
public void TestTpchQ1() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q1", false, 1000);
}
@Test
public void TestTpchQ2() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q2", false, 1000);
}
@Test
public void TestTpchQ3() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q3", false, 1000);
}
@Test
public void TestTpchQ4() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q4", false, 1000);
}
@Test
public void TestTpchQ5() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q5", false, 1000);
}
@Test
public void TestTpchQ6() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q6", false, 1000);
}
// TODO: We don't currently support some of the features in Query 7.
// Please see IMP-128
//@Test
//public void TestTpchQ7() {
//runTestInExecutionMode(EXECUTION_MODE, "tpch-q7", false, 1000);
//}
@Test
public void TestTpchQ8() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q8", false, 1000);
}
@Test
public void TestTpchQ9() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q9", false, 1000);
}
@Test
public void TestTpchQ10() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q10", false, 1000);
}
@Test
public void TestTpchQ11() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q11", false, 1000);
}
@Test
public void TestTpchQ12() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q12", false, 1000);
}
@Test
public void TestTpchQ13() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q13", false, 1000);
}
@Test
public void TestTpchQ14() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q14", false, 1000);
}
@Test
public void TestTpchQ15() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q15", false, 1000);
}
@Test
public void TestTpchQ16() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q16", false, 1000);
}
@Test
public void TestTpchQ17() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q17", false, 1000);
}
@Test
public void TestTpchQ18() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q18", false, 1000);
}
@Test
public void TestTpchQ19() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q19", false, 1000);
}
@Test
public void TestTpchQ20() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q20", false, 1000);
}
@Test
public void TestTpchQ21() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q21", false, 1000);
}
@Test
public void TestTpchQ22() {
runTestInExecutionMode(EXECUTION_MODE, "tpch-q22", false, 1000);
}
}

View File

@@ -125,7 +125,8 @@ public class TestFileParser {
ArrayList<String> retList = Lists.newArrayList();
for (String s : ret) {
if (!(s.startsWith("#") || s.startsWith("//"))) {
if (tableSuffix != null && section == Section.QUERY) {
if (tableSuffix != null &&
(section == Section.QUERY || section == Section.SETUP)) {
retList.add(s.replaceAll("\\$TABLE", tableSuffix));
} else {
retList.add(s);
@@ -301,4 +302,4 @@ public class TestFileParser {
}
}
}
}
}

View File

@@ -0,0 +1,19 @@
# Q1 - Pricing Summary Report Query
# Modifications: Remove ORDER BY, added ROUND() calls
select
l_returnflag, l_linestatus, round(sum(l_quantity), 2), round(sum(l_extendedprice), 2),
round(sum(l_extendedprice*(1 - l_discount)), 1),
round(sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), 1),
round(avg(l_quantity), 2), round(avg(l_extendedprice), 2),
round(avg(l_discount), 2), count(1)
from lineitem$TABLE
where l_shipdate<='1998-09-02'
group by l_returnflag, l_linestatus
---- TYPES
string, string, double, double, double, double, double, double, double, bigint
---- RESULTS
'A','F',37734075,56586511539.29,53758218131,55909025048.8,25.52,38273.13,0.05,1478492
'N','F',991417,1487504710.38,1413082168.1,1469649223.2,25.52,38284.47,0.05,38854
'N','O',74476040,111701729697.74,106118230307.6,110367043872.5,25.5,38249.12,0.05,2920374
'R','F',37719753,56568041380.9,53741292684.6,55889619119.8,25.51,38250.85,0.05,1478870
====

View File

@@ -0,0 +1,41 @@
# Q10 - Returned Item Reporting Query
# Modifications: Cast c_acctbal to bigint due to float/double values not allowed in
# GROUP BY clauses
select c_custkey, c_name, round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue,
cast(c_acctbal as bigint) cast_c_acctbal, n_name, c_address, c_phone, c_comment
from customer$TABLE c
join orders$TABLE o
on c.c_custkey = o.o_custkey and
o.o_orderdate >= '1993-10-01' and
o.o_orderdate < '1994-01-01'
join nation$TABLE n
on c.c_nationkey = n.n_nationkey
join lineitem$TABLE l
on l.l_orderkey = o.o_orderkey and l.l_returnflag = 'R'
group by c_custkey, c_name, cast_c_acctbal, c_phone, n_name, c_address, c_comment
order by revenue desc
limit 20
---- TYPES
int, string, double, bigint, string, string, string, string
---- RESULTS
57040,'Customer#000057040',734235.2455,632,'JAPAN','Eioyzjf4pp','22-895-641-3466','sits. slyly regular requests sleep alongside of the regular inst'
143347,'Customer#000143347',721002.6948000001,2557,'EGYPT','1aReFYv,Kw4','14-742-935-3718','ggle carefully enticing requests. final deposits use bold, bold pinto beans. ironic, idle re'
60838,'Customer#000060838',679127.3077,2454,'BRAZIL','64EaJ5vMAHWJlBOxJklpNc2RJiWE','12-913-494-9813',' need to boost against the slyly regular account'
101998,'Customer#000101998',637029.5667,3790,'UNITED KINGDOM','01c9CILnNtfOQYmZj','33-593-865-6378','ress foxes wake slyly after the bold excuses. ironic platelets are furiously carefully bold theodolites'
125341,'Customer#000125341',633508.086,4983,'GERMANY','S29ODD6bceU8QSuuEJznkNaK','17-582-695-5962','arefully even depths. blithely even excuses sleep furiously. foxes use except the dependencies. ca'
25501,'Customer#000025501',620269.7849,7725,'ETHIOPIA',' W556MXuoiaYCCZamJI,Rn0B4ACUGdkQ8DZ','15-874-808-6793','he pending instructions wake carefully at the pinto beans. regular, final instructions along the slyly fina'
115831,'Customer#000115831',596423.8672,5098,'FRANCE','rFeBbEEyk dl ne7zV5fDrmiq1oK09wV7pxqCgIc','16-715-386-3788','l somas sleep. furiously final deposits wake blithely regular pinto b'
84223,'Customer#000084223',594998.0239,528,'UNITED KINGDOM','nAVZCs6BaWap rrM27N 2qBnzc5WBauxbA','33-442-824-8191',' slyly final deposits haggle regular, pending dependencies. pending escapades wake '
54289,'Customer#000054289',585603.3918,5583,'IRAN','vXCxoCsU0Bad5JQI ,oobkZ','20-834-292-4707','ely special foxes are quickly finally ironic p'
39922,'Customer#000039922',584878.1134,7321,'GERMANY','Zgy4s50l2GKN4pLDPBU8m342gIw6R','17-147-757-8036','y final requests. furiously final foxes cajole blithely special platelets. f'
6226,'Customer#000006226',576783.7606,2230,'UNITED KINGDOM','8gPu8,NPGkfyQQ0hcIYUGPIBWc,ybP5g,','33-657-701-3391','ending platelets along the express deposits cajole carefully final '
922,'Customer#000000922',576767.5333,3869,'GERMANY','Az9RFaut7NkPnc5zSD2PwHgVwr4jRzq','17-945-916-9648','luffily fluffy deposits. packages c'
147946,'Customer#000147946',576455.132,2030,'ALGERIA','iANyZHjqhyy7Ajah0pTrYyhJ','10-886-956-3143','ithely ironic deposits haggle blithely ironic requests. quickly regu'
115640,'Customer#000115640',569341.1933,6436,'ARGENTINA','Vtgfia9qI 7EpHgecU1X','11-411-543-4901','ost slyly along the patterns; pinto be'
73606,'Customer#000073606',568656.8578,1785,'JAPAN','xuR0Tro5yChDfOCrjkd2ol','22-437-653-6966','he furiously regular ideas. slowly'
110246,'Customer#000110246',566842.9815,7763,'VIETNAM','7KzflgX MDOq7sOkI','31-943-426-9837','egular deposits serve blithely above the fl'
142549,'Customer#000142549',563537.2368,5085,'INDONESIA','ChqEoK43OysjdHbtKCp6dKqjNyvvi9','19-955-562-2398','sleep pending courts. ironic deposits against the carefully unusual platelets cajole carefully express accounts.'
146149,'Customer#000146149',557254.9865,1791,'ROMANIA','s87fvzFQpU','29-744-164-6487',' of the slyly silent accounts. quickly final accounts across the '
52528,'Customer#000052528',556397.3509,551,'ARGENTINA','NFztyTOR10UOJ','11-208-192-3205',' deposits hinder. blithely pending asymptotes breach slyly regular re'
23431,'Customer#000023431',554269.536,3381,'ROMANIA','HgiV0phqhaIa9aydNoIlb','29-915-458-2654','nusual, even instructions: furiously stealthy n'
====

View File

@@ -0,0 +1,28 @@
# Q11 - Important Stock Identification
# NOTE: Alan is re-writing part of this query set
# so it is missing for now
insert overwrite table q11_part_tmp$TABLE
select ps_partkey, sum(ps_supplycost * ps_availqty) as part_value
from nation$TABLE n
join supplier$TABLE s
on s.s_nationkey = n.n_nationkey and n.n_name = 'GERMANY'
join partsupp$TABLE ps
on ps.ps_suppkey = s.s_suppkey
group by ps_partkey
---- SETUP
RESET q11_sum_tmp$TABLE
RELOAD q11_sum_tmp$TABLE
---- RESULTS
---- NUMROWS
29818
====
insert overwrite table q11_sum_tmp$TABLE
select sum(part_value) as total_value
from q11_part_tmp$TABLE
---- SETUP
RESET q11_part_tmp$TABLE
RELOAD q11_part_tmp$TABLE
---- RESULTS
---- NUMROWS
1
====

View File

@@ -0,0 +1,33 @@
# Q12 - Shipping Mode and Order Priority Query
select l_shipmode,
sum(case
when o_orderpriority ='1-URGENT' or
o_orderpriority ='2-HIGH'
then 1
else 0
end
) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT' and
o_orderpriority <> '2-HIGH'
then 1
else 0
end
) as low_line_count
from orders$TABLE o
join lineitem$TABLE l
on o.o_orderkey = l.l_orderkey and
l.l_commitdate < l.l_receiptdate and
l.l_shipdate < l.l_commitdate and
l.l_receiptdate >= '1994-01-01' and
l.l_receiptdate < '1995-01-01'
where l.l_shipmode = 'MAIL' or l.l_shipmode = 'SHIP'
group by l_shipmode
order by l_shipmode
limit 100
---- TYPES
string, bigint, bigint
---- RESULTS
'MAIL',6202,9324
'SHIP',6200,9262
====

View File

@@ -0,0 +1,59 @@
# Q13 - Customer Distribution Query
select c_count, count(1) as custdist
from
( select c_custkey, count(o_orderkey) as c_count
from customer$TABLE c
left outer join orders$TABLE o
on c.c_custkey = o.o_custkey and
not o.o_comment like '%special%requests%'
group by c_custkey
) c_orders
group by c_count
order by custdist desc, c_count desc
limit 100
---- TYPES
bigint, bigint
---- RESULTS
0,50005
1,17
10,6532
11,6014
12,5639
13,5024
14,4446
15,4505
16,4273
17,4587
18,4529
19,4793
2,134
20,4516
21,4190
22,3623
23,3225
24,2742
25,2086
26,1612
27,1179
28,893
29,593
3,415
30,376
31,226
32,148
33,75
34,50
35,37
36,14
37,5
38,5
39,1
4,1007
40,4
41,2
5,1948
6,3265
7,4687
8,5937
9,6641
====

View File

@@ -0,0 +1,16 @@
# Q14 - Promotion Effect
select
round(100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice*(1-l_discount)
else 0.0
end
) / sum(l_extendedprice * (1 - l_discount)), 5) as promo_revenue
from lineitem$TABLE l
join part$TABLE p
on l.l_partkey = p.p_partkey and
l.l_shipdate >= '1995-09-01' and
l.l_shipdate < '1995-10-01'
---- TYPES
double
---- RESULTS
16.38078
====

View File

@@ -0,0 +1,38 @@
# Q15 - Top Supplier Query
insert overwrite table revenue$TABLE
select l_suppkey as supplier_no,
sum(l_extendedprice * (1 - l_discount)) as total_revenue
from lineitem$TABLE
where l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01'
group by l_suppkey
---- SETUP
RESET revenue$TABLE
RELOAD revenue$TABLE
---- RESULTS
---- NUMROWS
10000
====
insert overwrite table max_revenue$TABLE
select max(total_revenue)
from revenue$TABLE
---- SETUP
RESET max_revenue$TABLE
RELOAD max_revenue$TABLE
---- RESULTS
---- NUMROWS
1
====
# Modifications - Added limit
select s_suppkey, s_name, s_address, s_phone, total_revenue
from supplier$TABLE s
join revenue$TABLE r
on s.s_suppkey = r.supplier_no
join max_revenue$TABLE m
on r.total_revenue = m.max_revenue
order by s_suppkey
limit 100
---- TYPES
int, string, string, string, double
---- RESULTS
8449,'Supplier#000008449','Wp34zim9qYFbVctdW','20-469-856-8873',1772630
====

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,33 @@
# Q17 - Small-Quantity-Order Revenue Query
insert overwrite table lineitem_tmp$TABLE
select l_partkey as t_partkey, 0.2 * avg(l_quantity) as t_avg_quantity
from lineitem$TABLE
group by l_partkey
---- SETUP
RESET lineitem_tmp$TABLE
RELOAD lineitem_tmp$TABLE
---- RESULTS
---- NUMROWS
200000
====
# Modifications: Added round() call
select round(sum(l_extendedprice) / 7.0, 5) as avg_yearly
from
( select l_quantity, l_extendedprice, t_avg_quantity
from lineitem_tmp$TABLE t
join
( select l_quantity, l_partkey, l_extendedprice
from part$TABLE p
join lineitem$TABLE l
on p.p_partkey = l.l_partkey and
p.p_brand = 'Brand#23' and
p.p_container = 'MED BOX'
) l1
on l1.l_partkey = t.t_partkey
) a
where l_quantity < t_avg_quantity
---- TYPES
double
---- RESULTS
348406.05429
====

View File

@@ -0,0 +1,87 @@
# Q18 - Large Value Customer Query
insert overwrite table q18_tmp$TABLE
select l_orderkey, sum(l_quantity) as t_sum_quantity
from lineitem$TABLE
group by l_orderkey
---- SETUP
RESET q18_tmp$TABLE
RELOAD q18_tmp$TABLE
---- RESULTS
---- NUMROWS
1500000
====
# Modifications: Removed o_totalprice column due to float/double values not
# allowed in GROUP BY. TODO: Update to cast o_totalprice instead of remove it.
select c_name, c_custkey, o_orderkey, o_orderdate, round(sum(l_quantity), 5)
from customer$TABLE c
join orders$TABLE o
on c.c_custkey = o.o_custkey
join q18_tmp$TABLE t
on o.o_orderkey = t.l_orderkey and
t.t_sum_quantity > 300
join lineitem$TABLE l
on o.o_orderkey = l.l_orderkey
group by c_name,c_custkey,o_orderkey,o_orderdate
order by o_orderdate
limit 100
---- TYPES
string, int, int, string, double
---- RESULTS
'Customer#000003566',3566,2329187,'1998-01-04',304
'Customer#000003680',3680,3861123,'1998-07-03',301
'Customer#000010129',10129,5849444,'1994-03-21',309
'Customer#000012251',12251,735366,'1993-11-24',309
'Customer#000012599',12599,4259524,'1998-02-12',304
'Customer#000013072',13072,1481925,'1998-03-15',301
'Customer#000013940',13940,2232932,'1997-04-13',304
'Customer#000015272',15272,3883783,'1993-07-28',302
'Customer#000015619',15619,3767271,'1996-08-07',318
'Customer#000015631',15631,1845057,'1994-05-12',302
'Customer#000016384',16384,502886,'1994-04-12',312
'Customer#000017746',17746,6882,'1997-04-09',303
'Customer#000018188',18188,3037414,'1995-01-25',308
'Customer#000024341',24341,1474818,'1992-11-15',302
'Customer#000036619',36619,4806726,'1995-01-17',328
'Customer#000037729',37729,4134341,'1995-06-29',309
'Customer#000045538',45538,4527553,'1994-05-22',305
'Customer#000046435',46435,4745607,'1997-07-03',309
'Customer#000050008',50008,2366755,'1996-12-09',302
'Customer#000053029',53029,2662214,'1993-08-13',302
'Customer#000054602',54602,5832321,'1997-02-09',307
'Customer#000064483',64483,2745894,'1996-07-04',304
'Customer#000066098',66098,5007490,'1992-08-07',304
'Customer#000066533',66533,29158,'1995-10-21',305
'Customer#000066790',66790,2199712,'1996-09-30',327
'Customer#000069904',69904,1742403,'1996-10-19',305
'Customer#000077260',77260,1436544,'1992-09-12',307
'Customer#000081581',81581,4739650,'1995-11-04',305
'Customer#000082441',82441,857959,'1994-02-07',305
'Customer#000088703',88703,2995076,'1994-01-30',302
'Customer#000088876',88876,983201,'1993-12-30',304
'Customer#000093392',93392,5200102,'1997-01-22',304
'Customer#000096103',96103,5984582,'1992-03-16',312
'Customer#000105260',105260,5296167,'1996-09-06',303
'Customer#000105410',105410,4478371,'1996-03-05',302
'Customer#000105995',105995,2096705,'1994-07-03',307
'Customer#000107590',107590,4267751,'1994-11-04',301
'Customer#000109379',109379,5746311,'1996-10-10',302
'Customer#000112987',112987,4439686,'1996-09-17',305
'Customer#000113131',113131,967334,'1995-12-15',301
'Customer#000114586',114586,551136,'1993-05-19',308
'Customer#000117076',117076,4290656,'1997-02-05',301
'Customer#000117919',117919,2869152,'1996-06-20',317
'Customer#000119989',119989,1544643,'1997-09-20',320
'Customer#000120098',120098,1971680,'1995-06-14',308
'Customer#000126865',126865,4702759,'1994-11-07',320
'Customer#000128120',128120,4722021,'1994-04-07',323
'Customer#000129379',129379,4720454,'1997-06-07',303
'Customer#000136573',136573,2761378,'1996-05-31',301
'Customer#000137446',137446,5489475,'1997-05-23',311
'Customer#000141098',141098,565574,'1995-09-24',301
'Customer#000141823',141823,2806245,'1996-12-29',310
'Customer#000144617',144617,3043270,'1997-02-12',317
'Customer#000146608',146608,3342468,'1994-06-12',303
'Customer#000147197',147197,1263015,'1997-02-02',320
'Customer#000148885',148885,2942469,'1992-05-31',313
'Customer#000149842',149842,5156581,'1994-05-30',302
====

View File

@@ -0,0 +1,38 @@
# Q19 - Discounted Revenue Query
# Modifications: Added round() calls
select round(sum(l_extendedprice * (1 - l_discount) ), 5) as revenue
from lineitem$TABLE l
join part$TABLE p
on p.p_partkey = l.l_partkey
where
(
p_brand = 'Brand#12'
and p_container REGEXP 'SM CASE||SM BOX||SM PACK||SM PKG'
and l_quantity >= 1 and l_quantity <= 11
and p_size >= 1 and p_size <= 5
and l_shipmode REGEXP 'AIR||AIR REG'
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_brand = 'Brand#23'
and p_container REGEXP 'MED BAG||MED BOX||MED PKG||MED PACK'
and l_quantity >= 10 and l_quantity <= 20
and p_size >= 1 and p_size <= 10
and l_shipmode REGEXP 'AIR||AIR REG'
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_brand = 'Brand#34'
and p_container REGEXP 'LG CASE||LG BOX||LG PACK||LG PKG'
and l_quantity >= 20 and l_quantity <= 30
and p_size >= 1 and p_size <= 15
and l_shipmode REGEXP 'AIR||AIR REG'
and l_shipinstruct = 'DELIVER IN PERSON'
)
---- TYPES
double
---- RESULTS
3083843.0578
====

View File

@@ -0,0 +1,146 @@
# Q2 - Minimum Cost Supplier Query
insert overwrite table q2_minimum_cost_supplier_tmp1$TABLE
select s.s_acctbal, s.s_name, n.n_name, p.p_partkey, ps.ps_supplycost, p.p_mfgr,
s.s_address, s.s_phone, s.s_comment
from nation$TABLE n
join region$TABLE r
on n.n_regionkey = r.r_regionkey and r.r_name = 'EUROPE'
join supplier$TABLE s
on s.s_nationkey = n.n_nationkey
join partsupp$TABLE ps
on s.s_suppkey = ps.ps_suppkey
join part$TABLE p
on p.p_partkey = ps.ps_partkey and p.p_size = 15 and p.p_type like '%BRASS'
---- SETUP
RESET q2_minimum_cost_supplier_tmp1$TABLE
RELOAD q2_minimum_cost_supplier_tmp1$TABLE
---- RESULTS
---- NUMROWS
642
====
# Q2 - Minimum Cost Supplier Query
insert overwrite table q2_minimum_cost_supplier_tmp2$TABLE
select p_partkey, min(ps_supplycost)
from q2_minimum_cost_supplier_tmp1$TABLE
group by p_partkey
---- SETUP
RESET q2_minimum_cost_supplier_tmp2$TABLE
RELOAD q2_minimum_cost_supplier_tmp2$TABLE
---- RESULTS
---- NUMROWS
460
====
# Q2 - Minimum Cost Supplier Query
# Modifications: Fully qualified name of p_partkey column in the ORDER BY clause
select
t1.s_acctbal, t1.s_name, t1.n_name, t1.p_partkey, t1.p_mfgr, t1.s_address, t1.s_phone,
t1.s_comment
from q2_minimum_cost_supplier_tmp1$TABLE t1
join q2_minimum_cost_supplier_tmp2$TABLE t2
on t1.p_partkey = t2.p_partkey and t1.ps_supplycost=t2.ps_min_supplycost
order by s_acctbal desc, n_name, s_name, t1.p_partkey
limit 100
---- TYPES
double, string, string, int, string, string, string, string
---- RESULTS
7843.52,'Supplier#000006683','FRANCE',11680,'Manufacturer#4','2Z0JGkiv01Y00oCFwUGfviIbhzCdy','16-464-517-8943',' express, final pinto beans x-ray slyly asymptotes. unusual, unusual'
7850.66,'Supplier#000001518','UNITED KINGDOM',86501,'Manufacturer#1','ONda3YJiHKJOC','33-730-383-3892','ifts haggle fluffily pending pai'
7852.45,'Supplier#000005864','RUSSIA',8363,'Manufacturer#4','WCNfBPZeSXh3h,c','32-454-883-3821','usly unusual pinto beans. brave ideas sleep carefully quickly ironi'
7871.5,'Supplier#000007206','RUSSIA',104695,'Manufacturer#1','3w fNCnrVmvJjE95sgWZzvW','32-432-452-7731','ironic requests. furiously final theodolites cajole. final, express packages sleep. quickly reg'
7887.08,'Supplier#000009792','GERMANY',164759,'Manufacturer#3','Y28ITVeYriT3kIGdV2K8fSZ V2UqT5H1Otz','17-988-938-4296','ckly around the carefully fluffy theodolites. slyly ironic pack'
7894.56,'Supplier#000007981','GERMANY',85472,'Manufacturer#4','NSJ96vMROAbeXP','17-963-404-3760','ic platelets affix after the furiously'
7912.91,'Supplier#000004211','GERMANY',159180,'Manufacturer#5','2wQRVovHrm3,v03IKzfTd,1PYsFXQFFOG','17-266-947-7315','ay furiously regular platelets. cou'
7912.91,'Supplier#000004211','GERMANY',184210,'Manufacturer#4','2wQRVovHrm3,v03IKzfTd,1PYsFXQFFOG','17-266-947-7315','ay furiously regular platelets. cou'
7914.45,'Supplier#000001013','RUSSIA',125988,'Manufacturer#2','riRcntps4KEDtYScjpMIWeYF6mNnR','32-194-698-3365',' busily bold packages are dolphi'
7937.93,'Supplier#000009012','ROMANIA',83995,'Manufacturer#2','iUiTziH,Ek3i4lwSgunXMgrcTzwdb','29-250-925-9690','to the blithely ironic deposits nag sly'
7950.37,'Supplier#000008101','GERMANY',33094,'Manufacturer#5','kkYvL6IuvojJgTNG IKkaXQDYgx8ILohj','17-627-663-8014','arefully unusual requests x-ray above the quickly final deposits. '
7980.65,'Supplier#000001288','FRANCE',13784,'Manufacturer#4','zE,7HgVPrCn','16-646-464-8247','ully bold courts. escapades nag slyly. furiously fluffy theodo'
7992.4,'Supplier#000006108','FRANCE',118574,'Manufacturer#1','8tBydnTDwUqfBfFV4l3','16-974-998-8937',' ironic ideas? fluffily even instructions wake. blithel'
8042.09,'Supplier#000003245','RUSSIA',135705,'Manufacturer#4','Dh8Ikg39onrbOL4DyTfGw8a9oKUX3d9Y','32-836-132-8872','osits. packages cajole slyly. furiously regular deposits cajole slyly. q'
8042.09,'Supplier#000003245','RUSSIA',150729,'Manufacturer#1','Dh8Ikg39onrbOL4DyTfGw8a9oKUX3d9Y','32-836-132-8872','osits. packages cajole slyly. furiously regular deposits cajole slyly. q'
8046.07,'Supplier#000008780','FRANCE',191222,'Manufacturer#3','AczzuE0UK9osj ,Lx0Jmh','16-473-215-6395','onic platelets cajole after the regular instructions. permanently bold excuses'
8102.62,'Supplier#000003347','UNITED KINGDOM',18344,'Manufacturer#5','m CtXS2S16i','33-454-274-8532','egrate with the slyly bold instructions. special foxes haggle silently among the'
8109.09,'Supplier#000009186','FRANCE',99185,'Manufacturer#1','wgfosrVPexl9pEXWywaqlBMDYYf','16-668-570-1402','tions haggle slyly about the sil'
8152.61,'Supplier#000002731','ROMANIA',15227,'Manufacturer#4',' nluXJCuY1tu','29-805-463-2030',' special requests. even, regular warhorses affix among the final gr'
8231.610000000001,'Supplier#000009558','RUSSIA',192000,'Manufacturer#2','mcdgen,yT1iJDHDS5fV','32-762-137-5858',' foxes according to the furi'
8307.93,'Supplier#000003142','GERMANY',18139,'Manufacturer#1','dqblvV8dCNAorGlJ','17-595-447-6026','olites wake furiously regular decoys. final requests nod '
8328.459999999999,'Supplier#000001744','ROMANIA',69237,'Manufacturer#5','oLo3fV64q2,FKHa3p,qHnS7Yzv,ps8','29-330-728-5873','ep carefully-- even, careful packages are slyly along t'
8338.58,'Supplier#000007269','FRANCE',17268,'Manufacturer#4','ZwhJSwABUoiB04,3','16-267-277-4365','iously final accounts. even pinto beans cajole slyly regular'
8348.74,'Supplier#000008851','FRANCE',66344,'Manufacturer#4','nWxi7GwEbjhw1','16-796-240-2472',' boldly final deposits. regular, even instructions detect slyly. fluffily unusual pinto bea'
8376.52,'Supplier#000005306','UNITED KINGDOM',190267,'Manufacturer#5','9t8Y8 QqSIsoADPt6NLdk,TP5zyRx41oBUlgoGc9','33-632-514-7931','ly final accounts sleep special, regular requests. furiously regular'
8386.08,'Supplier#000008518','FRANCE',36014,'Manufacturer#3','2jqzqqAVe9crMVGP,n9nTsQXulNLTUYoJjEDcqWV','16-618-780-7481','blithely bold pains are carefully platelets. finally regular pinto beans sleep carefully special'
8407.040000000001,'Supplier#000005406','RUSSIA',162889,'Manufacturer#4','j7 gYF5RW8DC5UrjKC','32-626-152-4621','r the blithely regular packages. slyly ironic theodoli'
8431.4,'Supplier#000002675','ROMANIA',5174,'Manufacturer#1','HJFStOu9R5NGPOegKhgbzBdyvrG2yh8w','29-474-643-1443','ithely express pinto beans. blithely even foxes haggle. furiously regular theodol'
8432.889999999999,'Supplier#000003990','RUSSIA',191470,'Manufacturer#1','wehBBp1RQbfxAYDASS75MsywmsKHRVdkrvNe6m','32-839-509-9301','ep furiously. packages should have to haggle slyly across the deposits. furiously regu'
8441.4,'Supplier#000003817','FRANCE',141302,'Manufacturer#2','hU3fz3xL78','16-339-356-5115','ely even ideas. ideas wake slyly furiously unusual instructions. pinto beans sleep ag'
8457.09,'Supplier#000009456','UNITED KINGDOM',19455,'Manufacturer#1','7SBhZs8gP1cJjT0Qf433YBk','33-858-440-4349','cing requests along the furiously unusual deposits promise among the furiously unus'
8503.700000000001,'Supplier#000006830','RUSSIA',44325,'Manufacturer#4','BC4WFCYRUZyaIgchU 4S','32-147-878-5069','pades cajole. furious packages among the carefully express excuses boost furiously across th'
8517.23,'Supplier#000009529','RUSSIA',37025,'Manufacturer#5','e44R8o7JAIS9iMcr','32-565-297-8775','ove the even courts. furiously special platelets '
8517.23,'Supplier#000009529','RUSSIA',59528,'Manufacturer#2','e44R8o7JAIS9iMcr','32-565-297-8775','ove the even courts. furiously special platelets '
8553.82,'Supplier#000003979','ROMANIA',143978,'Manufacturer#4','BfmVhCAnCMY3jzpjUMy4CNWs9 HzpdQR7INJU','29-124-646-4897','ic requests wake against the blithely unusual accounts. fluffily r'
8564.120000000001,'Supplier#000000033','GERMANY',110032,'Manufacturer#1','gfeKpYw3400L0SDywXA6Ya1Qmq1w6YB9f3R','17-138-897-9374','n sauternes along the regular asymptotes are regularly along the '
8569.52,'Supplier#000005936','RUSSIA',5935,'Manufacturer#5','jXaNZ6vwnEWJ2ksLZJpjtgt0bY2a3AU','32-644-251-7916','. regular foxes nag carefully atop the regular, silent deposits. quickly regular packages '
8607.690000000001,'Supplier#000006003','UNITED KINGDOM',76002,'Manufacturer#2','EH9wADcEiuenM0NR08zDwMidw,52Y2RyILEiA','33-416-807-5206','ar, pending accounts. pending depende'
8638.360000000001,'Supplier#000002920','RUSSIA',170402,'Manufacturer#3','Je2a8bszf3L','32-122-621-7549','ly quickly ironic requests. even requests whithout t'
8638.360000000001,'Supplier#000002920','RUSSIA',75398,'Manufacturer#1','Je2a8bszf3L','32-122-621-7549','ly quickly ironic requests. even requests whithout t'
8655.99,'Supplier#000006330','RUSSIA',193810,'Manufacturer#2','UozlaENr0ytKe2w6CeIEWFWn iO3S8Rae7Ou','32-561-198-3705','symptotes use about the express dolphins. requests use after the express platelets. final, ex'
8691.059999999999,'Supplier#000004429','UNITED KINGDOM',126892,'Manufacturer#2','k,BQms5UhoAF1B2Asi,fLib','33-964-337-5038','efully express deposits kindle after the deposits. final '
8754.24,'Supplier#000009407','UNITED KINGDOM',179406,'Manufacturer#4','CHRCbkaWcf5B','33-903-970-9604','e ironic requests. carefully even foxes above the furious'
8781.709999999999,'Supplier#000003121','ROMANIA',13120,'Manufacturer#5','wNqTogx238ZYCamFb,50v,bj 4IbNFW9Bvw1xP','29-707-291-5144','s wake quickly ironic ideas'
8841.59,'Supplier#000005750','ROMANIA',100729,'Manufacturer#5','Erx3lAgu0g62iaHF9x50uMH4EgeN9hEG','29-344-502-5481','gainst the pinto beans. fluffily unusual dependencies affix slyly even deposits.'
8862.24,'Supplier#000003323','ROMANIA',73322,'Manufacturer#3','W9 lYcsC9FwBqk3ItL','29-736-951-3710','ly pending ideas sleep about the furiously unu'
8877.82,'Supplier#000007967','FRANCE',167966,'Manufacturer#5','A3pi1BARM4nx6R,qrwFoRPU','16-442-147-9345','ously foxes. express, ironic requests im'
8913.959999999999,'Supplier#000004603','UNITED KINGDOM',137063,'Manufacturer#2','OUzlvMUr7n,utLxmPNeYKSf3T24OXskxB5','33-789-255-7342',' haggle slyly above the furiously regular pinto beans. even '
8920.59,'Supplier#000003967','ROMANIA',173966,'Manufacturer#2','eHoAXe62SY9','29-194-731-3944','aters. express, pending instructions sleep. brave, r'
8920.59,'Supplier#000003967','ROMANIA',26460,'Manufacturer#1','eHoAXe62SY9','29-194-731-3944','aters. express, pending instructions sleep. brave, r'
8929.42,'Supplier#000008770','FRANCE',173735,'Manufacturer#4','R7cG26TtXrHAP9 HckhfRi','16-242-746-9248','cajole furiously unusual requests. quickly stealthy requests are. '
8936.82,'Supplier#000007043','UNITED KINGDOM',109512,'Manufacturer#1','FVajceZInZdbJE6Z9XsRUxrUEpiwHDrOXi,1Rz','33-784-177-8208','efully regular courts. furiousl'
8968.42,'Supplier#000010000','ROMANIA',119999,'Manufacturer#5','aTGLEusCiL4F PDBdv665XBJhPyCOB0i','29-578-432-2146','ly regular foxes boost slyly. quickly special waters boost carefully ironi'
8996.139999999999,'Supplier#000009814','ROMANIA',139813,'Manufacturer#2','af0O5pg83lPU4IDVmEylXZVqYZQzSDlYLAmR','29-995-571-8781',' dependencies boost quickly across the furiously pending requests! unusual dolphins play sl'
8996.870000000001,'Supplier#000004702','FRANCE',102191,'Manufacturer#5','8XVcQK23akp','16-811-269-8946','ickly final packages along the express plat'
9094.57,'Supplier#000004582','RUSSIA',39575,'Manufacturer#1','WB0XkCSG3r,mnQ n,h9VIxjjr9ARHFvKgMDf','32-587-577-1351','jole. regular accounts sleep blithely frets. final pinto beans play furiously past the '
9101,'Supplier#000005791','ROMANIA',128254,'Manufacturer#5','zub2zCV,jhHPPQqi,P2INAjE1zI n66cOEoXFG','29-549-251-5384','ts. notornis detect blithely above the carefully bold requests. blithely even package'
9104.83,'Supplier#000008520','GERMANY',150974,'Manufacturer#4','RqRVDgD0ER J9 b41vR2,3','17-728-804-1793','ly about the blithely ironic depths. slyly final theodolites among the fluffily bold ideas print'
9128.969999999999,'Supplier#000004311','RUSSIA',146768,'Manufacturer#5','I8IjnXd7NSJRs594RxsRR0','32-155-440-7120','refully. blithely unusual asymptotes haggle '
9189.98,'Supplier#000001226','GERMANY',21225,'Manufacturer#4','qsLCqSvLyZfuXIpjz','17-725-903-1381',' deposits. blithely bold excuses about the slyly bold forges wake '
9192.1,'Supplier#000000115','UNITED KINGDOM',85098,'Manufacturer#3','nJ 2t0f7Ve,wL1,6WzGBJLNBUCKlsV','33-597-248-1220','es across the carefully express accounts boost caref'
9201.469999999999,'Supplier#000009690','UNITED KINGDOM',67183,'Manufacturer#5','CB BnUTlmi5zdeEl7R7','33-121-267-9529','e even, even foxes. blithely ironic packages cajole regular packages. slyly final ide'
9208.700000000001,'Supplier#000007769','ROMANIA',40256,'Manufacturer#5','rsimdze 5o9P Ht7xS','29-964-424-9649','lites was quickly above the furiously ironic requests. slyly even foxes against the blithely bold '
9249.35,'Supplier#000003973','FRANCE',26466,'Manufacturer#1','d18GiDsL6Wm2IsGXM,RZf1jCsgZAOjNYVThTRP4','16-722-866-1658','uests are furiously. regular tithes through the regular, final accounts cajole furiously above the q'
9249.35,'Supplier#000003973','FRANCE',33972,'Manufacturer#1','d18GiDsL6Wm2IsGXM,RZf1jCsgZAOjNYVThTRP4','16-722-866-1658','uests are furiously. regular tithes through the regular, final accounts cajole furiously above the q'
9274.799999999999,'Supplier#000008854','RUSSIA',76346,'Manufacturer#3','1xhLoOUM7I3mZ1mKnerw OSqdbb4QbGa','32-524-148-5221','y. courts do wake slyly. carefully ironic platelets haggle above the slyly regular the'
9280.27,'Supplier#000007194','ROMANIA',47193,'Manufacturer#3','zhRUQkBSrFYxIAXTfInj vyGRQjeK','29-318-454-2133','o beans haggle after the furiously unusual deposits. carefully silent dolphins cajole carefully'
9312.969999999999,'Supplier#000007807','RUSSIA',100276,'Manufacturer#5','oGYMPCk9XHGB2PBfKRnHA','32-673-872-5854','ecial packages among the pending, even requests use regula'
9312.969999999999,'Supplier#000007807','RUSSIA',90279,'Manufacturer#5','oGYMPCk9XHGB2PBfKRnHA','32-673-872-5854','ecial packages among the pending, even requests use regula'
9352.040000000001,'Supplier#000003439','GERMANY',170921,'Manufacturer#4','qYPDgoiBGhCYxjgC','17-128-996-4650',' according to the carefully bold ideas'
9357.450000000001,'Supplier#000006188','UNITED KINGDOM',138648,'Manufacturer#1','g801,ssP8wpTk4Hm','33-583-607-1633','ously always regular packages. fluffily even accounts beneath the furiously final pack'
9359.610000000001,'Supplier#000004856','ROMANIA',62349,'Manufacturer#5','HYogcF3Jb yh1','29-334-870-9731','y ironic theodolites. blithely sile'
9408.65,'Supplier#000007772','UNITED KINGDOM',117771,'Manufacturer#4','AiC5YAH,gdu0i7','33-152-491-1126','nag against the final requests. furiously unusual packages cajole blit'
9453.01,'Supplier#000000802','ROMANIA',175767,'Manufacturer#1',',6HYXb4uaHITmtMBj4Ak57Pd','29-342-882-6463','gular frets. permanently special multipliers believe blithely alongs'
9461.049999999999,'Supplier#000002536','UNITED KINGDOM',20033,'Manufacturer#1','8mmGbyzaU 7ZS2wJumTibypncu9pNkDc4FYA','33-556-973-5522','. slyly regular deposits wake slyly. furiously regular warthogs are.'
9492.790000000001,'Supplier#000005975','GERMANY',25974,'Manufacturer#5','S6mIiCTx82z7lV','17-992-579-4839','arefully pending accounts. blithely regular excuses boost carefully carefully ironic p'
9558.1,'Supplier#000003532','UNITED KINGDOM',88515,'Manufacturer#4','EOeuiiOn21OVpTlGguufFDFsbN1p0lhpxHp','33-152-301-2164',' foxes. quickly even excuses use. slyly special foxes nag bl'
9571.83,'Supplier#000004305','ROMANIA',179270,'Manufacturer#2','qNHZ7WmCzygwMPRDO9Ps','29-973-481-1831','kly carefully express asymptotes. furiou'
9612.940000000001,'Supplier#000003228','ROMANIA',120715,'Manufacturer#2','KDdpNKN3cWu7ZSrbdqp7AfSLxx,qWB','29-325-784-8187','warhorses. quickly even deposits sublate daringly ironic instructions. slyly blithe t'
9612.940000000001,'Supplier#000003228','ROMANIA',198189,'Manufacturer#4','KDdpNKN3cWu7ZSrbdqp7AfSLxx,qWB','29-325-784-8187','warhorses. quickly even deposits sublate daringly ironic instructions. slyly blithe t'
9624.780000000001,'Supplier#000009658','ROMANIA',189657,'Manufacturer#1','oE9uBgEfSS4opIcepXyAYM,x','29-748-876-2014','ronic asymptotes wake bravely final'
9624.82,'Supplier#000001816','FRANCE',34306,'Manufacturer#3','e7vab91vLJPWxxZnewmnDBpDmxYHrb','16-392-237-6726','e packages are around the special ideas. special, pending foxes us'
9643.549999999999,'Supplier#000005148','ROMANIA',107617,'Manufacturer#1','kT4ciVFslx9z4s79p Js825','29-252-617-4850','final excuses. final ideas boost quickly furiously speci'
9681.33,'Supplier#000008406','RUSSIA',78405,'Manufacturer#1',',qUuXcftUl','32-139-873-8571','haggle slyly regular excuses. quic'
9721.950000000001,'Supplier#000008757','UNITED KINGDOM',156241,'Manufacturer#3','Atg6GnM4dT2','33-821-407-2995','eep furiously sauternes; quickl'
9739.860000000001,'Supplier#000003384','FRANCE',138357,'Manufacturer#2','o,Z3v4POifevE k9U1b 6J1ucX,I','16-494-913-5925','s after the furiously bold packages sleep fluffily idly final requests: quickly final'
9817.1,'Supplier#000002352','RUSSIA',124815,'Manufacturer#2','4LfoHUZjgjEbAKw TgdKcgOc4D4uCYw','32-551-831-1437','wake carefully alongside of the carefully final ex'
9817.1,'Supplier#000002352','RUSSIA',152351,'Manufacturer#3','4LfoHUZjgjEbAKw TgdKcgOc4D4uCYw','32-551-831-1437','wake carefully alongside of the carefully final ex'
9836.93,'Supplier#000007342','RUSSIA',4841,'Manufacturer#4','JOlK7C1,7xrEZSSOw','32-399-414-5385','blithely carefully bold theodolites. fur'
9847.57,'Supplier#000006345','FRANCE',173827,'Manufacturer#2','VSt3rzk3qG698u6ld8HhOByvrTcSTSvQlDQDag','16-886-766-7945','ges. slyly regular requests are. ruthless, express excuses cajole blithely across the unu'
9847.57,'Supplier#000006345','FRANCE',86344,'Manufacturer#1','VSt3rzk3qG698u6ld8HhOByvrTcSTSvQlDQDag','16-886-766-7945','ges. slyly regular requests are. ruthless, express excuses cajole blithely across the unu'
9847.83,'Supplier#000008097','RUSSIA',130557,'Manufacturer#2','xMe97bpE69NzdwLoX','32-375-640-3593',' the special excuses. silent sentiments serve carefully final ac'
9852.52,'Supplier#000008973','RUSSIA',18972,'Manufacturer#2','t5L67YdBYYH6o,Vz24jpDyQ9','32-188-594-7038','rns wake final foxes. carefully unusual depende'
9870.780000000001,'Supplier#000001286','GERMANY',181285,'Manufacturer#4','YKA,E2fjiVd7eUrzp2Ef8j1QxGo2DFnosaTEH','17-516-924-4574',' regular accounts. furiously unusual courts above the fi'
9870.780000000001,'Supplier#000001286','GERMANY',81285,'Manufacturer#2','YKA,E2fjiVd7eUrzp2Ef8j1QxGo2DFnosaTEH','17-516-924-4574',' regular accounts. furiously unusual courts above the fi'
9871.219999999999,'Supplier#000006373','GERMANY',43868,'Manufacturer#5','J8fcXWsTqM','17-813-485-8637','etect blithely bold asymptotes. fluffily ironic platelets wake furiously; blit'
9923.77,'Supplier#000002324','GERMANY',29821,'Manufacturer#4','y3OD9UywSTOk','17-779-299-1839','ackages boost blithely. blithely regular deposits c'
9936.219999999999,'Supplier#000005250','UNITED KINGDOM',249,'Manufacturer#4','B3rqp0xbSEim4Mpy2RH J','33-320-228-2957','etect about the furiously final accounts. slyly ironic pinto beans sleep inside the furiously'
9937.84,'Supplier#000005969','ROMANIA',108438,'Manufacturer#1','ANDENSOSmk,miq23Xfb5RWt6dvUcvt6Qa','29-520-692-3537','efully express instructions. regular requests against the slyly fin'
9938.530000000001,'Supplier#000005359','UNITED KINGDOM',185358,'Manufacturer#4','QKuHYh,vZGiwu2FWEJoLDx04','33-429-790-6131','uriously regular requests hag'
====

View File

@@ -0,0 +1,163 @@
# Q20 - Potential Part Promotion Query
insert overwrite table q20_tmp1$TABLE
select distinct p_partkey
from part$TABLE
where p_name like 'forest%'
---- SETUP
RESET q20_tmp1$TABLE
RELOAD q20_tmp1$TABLE
---- RESULTS
---- NUMROWS
2127
====
insert overwrite table q20_tmp2$TABLE
select l_partkey, l_suppkey, 0.5 * sum(l_quantity)
from lineitem$TABLE
where l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01'
group by l_partkey, l_suppkey
---- SETUP
RESET q20_tmp2$TABLE
RELOAD q20_tmp2$TABLE
---- RESULTS
---- NUMROWS
543210
====
insert overwrite table q20_tmp3$TABLE
select ps_suppkey, ps_availqty, sum_quantity
from partsupp$TABLE ps join q20_tmp1$TABLE t1
on
ps.ps_partkey = t1.p_partkey
join q20_tmp2$TABLE t2
on
ps.ps_partkey = t2.l_partkey and ps.ps_suppkey = t2.l_suppkey
---- SETUP
RESET q20_tmp3$TABLE
RELOAD q20_tmp3$TABLE
---- RESULTS
---- NUMROWS
5843
====
# Modified to use subquery to work around IMP-127
insert overwrite table q20_tmp4$TABLE
select a.ps_suppkey
from (select ps_suppkey, count(1) from q20_tmp3$TABLE
where ps_availqty > sum_quantity
group by ps_suppkey) a
---- SETUP
RESET q20_tmp4$TABLE
RELOAD q20_tmp4$TABLE
---- RESULTS
====
# Modifications: Added limit
select s_name, s_address
from supplier$TABLE s
join nation$TABLE n
on s.s_nationkey = n.n_nationkey and n.n_name = 'CANADA'
join q20_tmp4$TABLE t4
on s.s_suppkey = t4.ps_suppkey
order by s_name
limit 100
---- TYPES
string, string
---- RESULTS
'Supplier#000000020','iybAE,RmTymrZVYaFZva2SH,j'
'Supplier#000000091','YV45D7TkfdQanOOZ7q9QxkyGUapU1oOWU6q3'
'Supplier#000000205','rF uV8d0JNEk'
'Supplier#000000285','Br7e1nnt1yxrw6ImgpJ7YdhFDjuBf'
'Supplier#000000287','7a9SP7qW5Yku5PvSg'
'Supplier#000000354','w8fOo5W,aS'
'Supplier#000000378','FfbhyCxWvcPrO8ltp9'
'Supplier#000000402','i9Sw4DoyMhzhKXCH9By,AYSgmD'
'Supplier#000000530','0qwCMwobKY OcmLyfRXlagA8ukENJv,'
'Supplier#000000555','TfB,a5bfl3Ah 3Z 74GqnNs6zKVGM'
'Supplier#000000640','mvvtlQKsTOsJj5Ihk7,cq'
'Supplier#000000729','pqck2ppy758TQpZCUAjPvlU55K3QjfL7Bi'
'Supplier#000000736','l6i2nMwVuovfKnuVgaSGK2rDy65DlAFLegiL7'
'Supplier#000000761','zlSLelQUj2XrvTTFnv7WAcYZGvvMTx882d4'
'Supplier#000000887','urEaTejH5POADP2ARrf'
'Supplier#000000935','ij98czM 2KzWe7dDTOxB8sq0UfCdvrX'
'Supplier#000000975',',AC e,tBpNwKb5xMUzeohxlRn, hdZJo73gFQF8y'
'Supplier#000001263','rQWr6nf8ZhB2TAiIDIvo5Io'
'Supplier#000001367','42YSkFcAXMMcucsqeEefOE4HeCC'
'Supplier#000001426','bPOCc086oFm8sLtS,fGrH'
'Supplier#000001446','lch9HMNU1R7a0LIybsUodVknk6'
'Supplier#000001500','wDmF5xLxtQch9ctVu,'
'Supplier#000001602','uKNWIeafaM644'
'Supplier#000001626','UhxNRzUu1dtFmp0'
'Supplier#000001682','pXTkGxrTQVyH1Rr'
'Supplier#000001700','7hMlCof1Y5zLFg'
'Supplier#000001726','TeRY7TtTH24sEword7yAaSkjx8'
'Supplier#000001730','Rc8e,1Pybn r6zo0VJIEiD0UD vhk'
'Supplier#000001746','qWsendlOekQG1aW4uq06uQaCm51se8lirv7 hBRd'
'Supplier#000001806','M934fuZSnLW'
'Supplier#000001855','MWk6EAeozXb'
'Supplier#000001931','FpJbMU2h6ZR2eBv8I9NIxF'
'Supplier#000002022',' dwebGX7Id2pc25YvY33'
'Supplier#000002036','20ytTtVObjKUUI2WCB0A'
'Supplier#000002096','kuxseyLtq QPLXxm9ZUrnB6Kkh92JtK5cQzzXNU '
'Supplier#000002117','MRtkgKolHJ9Wh X9J,urANHKDzvjr'
'Supplier#000002204','uYmlr46C06udCqanj0KiRsoTQakZsEyssL'
'Supplier#000002218','nODZw5q4dx kp0K5'
'Supplier#000002243','nSOEV3JeOU79'
'Supplier#000002245','hz2qWXWVjOyKhqPYMoEwz6zFkrTaDM'
'Supplier#000002282','ES21K9dxoW1I1TzWCj7ekdlNwSWnv1Z 6mQ,BKn'
'Supplier#000002303','nCoWfpB6YOymbgOht7ltfklpkHl'
'Supplier#000002331','WRh2w5WFvRg7Z0S1AvSvHCL'
'Supplier#000002373','RzHSxOTQmElCjxIBiVA52Z JB58rJhPRylR'
'Supplier#000002419','qydBQd14I5l5mVXa4fYY'
'Supplier#000002571','JZUugz04c iJFLrlGsz9O N,W 1rVHNIReyq'
'Supplier#000002585','CsPoKpw2QuTY4AV1NkWuttneIa4SN'
'Supplier#000002629','0Bw,q5Zp8su9XrzoCngZ3cAEXZwZ'
'Supplier#000002721','HVdFAN2JHMQSpKm'
'Supplier#000002730','lIFxR4fzm31C6,muzJwl84z'
'Supplier#000002775','yDclaDaBD4ihH'
'Supplier#000002799','lwr, 6L3gdfc79PQut,4XO6nQsTJY63cAyYO'
'Supplier#000002934','m,trBENywSArwg3DhB'
'Supplier#000002941','Naddba 8YTEKekZyP0'
'Supplier#000003028','jouzgX0WZjhNMWLaH4fy'
'Supplier#000003095','HxON3jJhUi3zjt,r mTD'
'Supplier#000003143','hdolgh608uTkHh7t6qfSqkifKaiFjnCH'
'Supplier#000003185','hMa535Cbf2mj1Nw4OWOKWVrsK0VdDkJURrdjSIJe'
'Supplier#000003189','DWdPxt7 RnkZv6VOByR0em'
'Supplier#000003201','E87yws6I,t0qNs4QW7UzExKiJnJDZWue'
'Supplier#000003213','pxrRP4irQ1VoyfQ,dTf3'
'Supplier#000003275','9xO4nyJ2QJcX6vGf'
'Supplier#000003288','EDdfNt7E5Uc,xLTupoIgYL4yY7ujh,'
'Supplier#000003314','jnisU8MzqO4iUB3zsPcrysMw3DDUojS4q7LD'
'Supplier#000003373','iy8VM48ynpc3N2OsBwAvhYakO2us9R1bi'
'Supplier#000003421','Sh3dt9W5oeofFWovnFhrg,'
'Supplier#000003422','DJoCEapUeBXoV1iYiCcPFQvzsTv2ZI960'
'Supplier#000003441','zvFJIzS,oUuShHjpcX'
'Supplier#000003590','sy79CMLxqb,Cbo'
'Supplier#000003607','lNqFHQYjwSAkf'
'Supplier#000003625','qY588W0Yk5iaUy1RXTgNrEKrMAjBYHcKs'
'Supplier#000003723','jZEp0OEythCLcS OmJSrFtxJ66bMlzSp'
'Supplier#000003849','KgbZEaRk,6Q3mWvwh6uptrs1KRUHg 0'
'Supplier#000003894','vvGC rameLOk'
'Supplier#000003941','Pmb05mQfBMS618O7WKqZJ 9vyv'
'Supplier#000004059','umEYZSq9RJ2WEzdsv9meU8rmqwzVLRgiZwC'
'Supplier#000004207','tF64pwiOM4IkWjN3mS,e06WuAjLx'
'Supplier#000004236','dl,HPtJmGipxYsSqn9wmqkuWjst,mCeJ8O6T'
'Supplier#000004278','bBddbpBxIVp Di9'
'Supplier#000004281','1OwPHh Pgiyeus,iZS5eA23JDOipwk'
'Supplier#000004304','hQCAz59k,HLlp2CKUrcBIL'
'Supplier#000004346','S3076LEOwo'
'Supplier#000004406','Ah0ZaLu6VwufPWUz,7kbXgYZhauEaHqGIg'
'Supplier#000004430','yvSsKNSTL5HLXBET4luOsPNLxKzAMk'
'Supplier#000004527','p pVXCnxgcklWF6A1o3OHY3qW6'
'Supplier#000004655','67NqBc4 t3PG3F8aO IsqWNq4kGaPowYL'
'Supplier#000004851','Rj,x6IgLT7kBL99nqp'
'Supplier#000004871',',phpt6AWEnUS8t4Avb50rFfdg7O9c6nU8xxv8eC5'
'Supplier#000004884','42Z1uLye9nsn6aTGBNd dI8 x'
'Supplier#000004975','GPq5PMKY6Wy'
'Supplier#000005076','Xl7h9ifgvIHmqxFLgWfHK4Gjav BkP'
'Supplier#000005195','Woi3b2ZaicPh ZSfu1EfXhE'
'Supplier#000005256','Onc3t57VAMchm,pmoVLaU8bONni9NsuaM PzMMFz'
'Supplier#000005257','f9g8SEHB7obMj3QXAjXS2vfYY22'
'Supplier#000005300','gXG28YqpxU'
'Supplier#000005323','tMCkdqbDoyNo8vMIkzjBqYexoRAuv,T6 qzcu'
'Supplier#000005386','Ub6AAfHpWLWP'
'Supplier#000005426','9Dz2OVT1q sb4BK71ljQ1XjPBYRPvO'
'Supplier#000005465','63cYZenZBRZ613Q1FaoG0,smnC5zl9'
'Supplier#000005484','saFdOR qW7AFY,3asPqiiAa11Mo22pCoN0BtPrKo'
====

View File

@@ -0,0 +1,168 @@
# Q21 - Suppliers Who Kept Orders Waiting Query
insert overwrite table q21_tmp1$TABLE
select l_orderkey, count(distinct l_suppkey), max(l_suppkey) as max_suppkey
from lineitem$TABLE
group by l_orderkey
---- SETUP
RESET q21_tmp1$TABLE
RELOAD q21_tmp1$TABLE
---- RESULTS
---- NUMROWS
1500000
====
insert overwrite table q21_tmp2$TABLE
select l_orderkey, count(distinct l_suppkey), max(l_suppkey) as max_suppkey
from lineitem$TABLE
where l_receiptdate > l_commitdate
group by l_orderkey
---- SETUP
RESET q21_tmp2$TABLE
RELOAD q21_tmp2$TABLE
---- RESULTS
---- NUMROWS
1375365
====
select s_name, count(1) as numwait
from
( select s_name
from
( select s_name, t2.l_orderkey, l_suppkey, count_suppkey, max_suppkey
from q21_tmp2$TABLE t2
right outer join
( select s_name, l_orderkey, l_suppkey
from
( select s_name, t1.l_orderkey, l_suppkey, count_suppkey, max_suppkey
from q21_tmp1$TABLE t1
join
( select s_name, l_orderkey, l_suppkey
from orders$TABLE o
join
( select s_name, l_orderkey, l_suppkey
from nation$TABLE n
join supplier$TABLE s
on s.s_nationkey = n.n_nationkey and
n.n_name = 'SAUDI ARABIA'
join lineitem$TABLE l
on s.s_suppkey = l.l_suppkey
where l.l_receiptdate > l.l_commitdate
) l1
on o.o_orderkey = l1.l_orderkey and
o.o_orderstatus = 'F'
) l2
on l2.l_orderkey = t1.l_orderkey
) a
where (count_suppkey > 1) or
((count_suppkey=1) and (l_suppkey <> max_suppkey))
) l3
on l3.l_orderkey = t2.l_orderkey
) b
where (count_suppkey is null) or
((count_suppkey=1) and (l_suppkey = max_suppkey))
)c
group by s_name
order by numwait desc, s_name
limit 100
---- TYPES
string, bigint
---- RESULTS
'Supplier#000000262',17
'Supplier#000000357',13
'Supplier#000000379',12
'Supplier#000000436',13
'Supplier#000000486',15
'Supplier#000000496',17
'Supplier#000000565',15
'Supplier#000000610',13
'Supplier#000000633',14
'Supplier#000000673',12
'Supplier#000000762',12
'Supplier#000000788',13
'Supplier#000000811',12
'Supplier#000000821',12
'Supplier#000000889',13
'Supplier#000001046',15
'Supplier#000001047',15
'Supplier#000001062',13
'Supplier#000001161',15
'Supplier#000001336',15
'Supplier#000001337',12
'Supplier#000001435',15
'Supplier#000001498',13
'Supplier#000001916',12
'Supplier#000001925',12
'Supplier#000001960',14
'Supplier#000002005',16
'Supplier#000002039',12
'Supplier#000002056',13
'Supplier#000002095',16
'Supplier#000002160',17
'Supplier#000002301',17
'Supplier#000002312',13
'Supplier#000002323',14
'Supplier#000002344',13
'Supplier#000002357',12
'Supplier#000002483',12
'Supplier#000002490',14
'Supplier#000002540',17
'Supplier#000002596',13
'Supplier#000002615',13
'Supplier#000002829',20
'Supplier#000002978',13
'Supplier#000002993',14
'Supplier#000003048',13
'Supplier#000003063',17
'Supplier#000003075',15
'Supplier#000003101',14
'Supplier#000003234',13
'Supplier#000003335',15
'Supplier#000003727',13
'Supplier#000003806',13
'Supplier#000004472',13
'Supplier#000004489',14
'Supplier#000005178',17
'Supplier#000005236',13
'Supplier#000005435',14
'Supplier#000005583',14
'Supplier#000005649',15
'Supplier#000005774',14
'Supplier#000005799',16
'Supplier#000005808',18
'Supplier#000005842',16
'Supplier#000005906',13
'Supplier#000006027',15
'Supplier#000006241',13
'Supplier#000006326',13
'Supplier#000006384',13
'Supplier#000006394',13
'Supplier#000006450',16
'Supplier#000006624',13
'Supplier#000006629',13
'Supplier#000006682',13
'Supplier#000006737',13
'Supplier#000006795',15
'Supplier#000006800',15
'Supplier#000006824',15
'Supplier#000006825',13
'Supplier#000006939',16
'Supplier#000007021',13
'Supplier#000007131',15
'Supplier#000007382',15
'Supplier#000007417',13
'Supplier#000007497',13
'Supplier#000007579',14
'Supplier#000007602',13
'Supplier#000008134',13
'Supplier#000008180',14
'Supplier#000008234',13
'Supplier#000008331',17
'Supplier#000008695',14
'Supplier#000008913',15
'Supplier#000009200',16
'Supplier#000009224',14
'Supplier#000009435',13
'Supplier#000009436',13
'Supplier#000009564',13
'Supplier#000009727',16
'Supplier#000009787',15
'Supplier#000009896',13
====

View File

@@ -0,0 +1,44 @@
# Q22 - Global Sales Opportunity Query
# TODO: Missing a query. Need to rewrite it.
insert overwrite table q22_customer_tmp$TABLE
select c_acctbal, c_custkey, substr(c_phone, 1, 2) as cntrycode
from customer$TABLE
where
substr(c_phone, 1, 2) = '13' or
substr(c_phone, 1, 2) = '31' or
substr(c_phone, 1, 2) = '23' or
substr(c_phone, 1, 2) = '29' or
substr(c_phone, 1, 2) = '30' or
substr(c_phone, 1, 2) = '18' or
substr(c_phone, 1, 2) = '17'
---- SETUP
RESET q22_customer_tmp$TABLE
RELOAD q22_customer_tmp$TABLE
---- RESULTS
---- NUMROWS
42015
====
insert overwrite table q22_customer_tmp1$TABLE
select avg(c_acctbal)
from q22_customer_tmp$TABLE
where c_acctbal > 0.00
---- SETUP
RESET q22_customer_tmp1$TABLE
RELOAD q22_customer_tmp1$TABLE
---- RESULTS
---- NUMROWS
1
====
# Modified to use subquery to work around IMP-127
insert overwrite table q22_orders_tmp$TABLE
select a.o_custkey
from
( select o_custkey, count(1)
from orders$TABLE
group by o_custkey
) a
---- SETUP
RESET q22_orders_tmp$TABLE
RELOAD q22_orders_tmp$TABLE
---- RESULTS
====

View File

@@ -0,0 +1,27 @@
# Q3 - Shipping Priority Query
# Modifications: Added round() calls
select l_orderkey, round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue,
o_orderdate, o_shippriority
from customer$TABLE c
join orders$TABLE o
on c.c_mktsegment = 'BUILDING' and c.c_custkey = o.o_custkey
join lineitem$TABLE l
on l.l_orderkey = o.o_orderkey
where o_orderdate < '1995-03-15' and l_shipdate > '1995-03-15'
group by l_orderkey, o_orderdate, o_shippriority
order by revenue desc, o_orderdate
limit 10
---- TYPES
int, double, string, int
---- RESULTS
1188320,384537.9359,'1995-03-09',0
2300070,367371.1452,'1995-03-13',0
2435712,378673.0558,'1995-02-26',0
2456423,406181.0111,'1995-03-05',0
2628192,373133.3094,'1995-02-22',0
3459808,405838.6989,'1995-03-04',0
4878020,378376.7952,'1995-03-12',0
492164,390324.061,'1995-02-19',0
5521732,375153.9215,'1995-03-13',0
993600,371407.4595,'1995-03-05',0
====

View File

@@ -0,0 +1,31 @@
# Q4 - Order Priority Checking Query
insert overwrite table q4_order_priority_tmp
select distinct l_orderkey
from lineitem$TABLE
where l_commitdate < l_receiptdate
---- SETUP
RESET q4_order_priority_tmp$TABLE
RELOAD q4_order_priority_tmp$TABLE
---- RESULTS
---- NUMROWS
1375365
====
# Modifications: Added limit clause
select o_orderpriority, count(1) as order_count
from orders$TABLE o
join q4_order_priority_tmp$TABLE t
on o.o_orderkey = t.o_orderkey and
o.o_orderdate >= '1993-07-01' and
o.o_orderdate < '1993-10-01'
group by o_orderpriority
order by o_orderpriority
limit 10
---- TYPES
string, bigint
---- RESULTS
'1-URGENT',10594
'2-HIGH',10476
'3-MEDIUM',10410
'4-NOT SPECIFIED',10556
'5-LOW',10487
====

View File

@@ -0,0 +1,36 @@
# Q5 - Local Supplier Volume Query
# Modifications: Removed ORDER BY clause, added round() calls
select n_name, round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue
from customer$TABLE c
join
( select n_name, l_extendedprice, l_discount, s_nationkey, o_custkey
from orders$TABLE o
join
( select n_name, l_extendedprice, l_discount, l_orderkey, s_nationkey
from lineitem$TABLE l
join
( select n_name, s_suppkey, s_nationkey
from supplier$TABLE s join
( select n_name, n_nationkey
from nation$TABLE n
join region$TABLE r
on n.n_regionkey = r.r_regionkey and r.r_name = 'ASIA'
) n1
on s.s_nationkey = n1.n_nationkey
) s1
on l.l_suppkey = s1.s_suppkey
) l1
on l1.l_orderkey = o.o_orderkey and o.o_orderdate >= '1994-01-01'
and o.o_orderdate < '1995-01-01'
) o1
on c.c_nationkey = o1.s_nationkey and c.c_custkey = o1.o_custkey
group by n_name
---- TYPES
string, double
---- RESULTS
'CHINA',53724494.2566
'INDIA',52035512.0002
'INDONESIA',55502041.1697
'JAPAN',45410175.6954
'VIETNAM',55295086.9967
====

View File

@@ -0,0 +1,14 @@
# Q6 - Forecasting Revenue Change Query
# Modifications: Added round() call
select round(sum(l_extendedprice * l_discount), 5) as revenue
from lineitem$TABLE
where l_shipdate >= '1994-01-01' and
l_shipdate < '1995-01-01' and
l_discount >= 0.05 and
l_discount <= 0.07 and
l_quantity < 24
---- TYPES
double
---- RESULTS
123141078.2283
====

View File

@@ -0,0 +1,51 @@
# Q8 - National Market Share Query
# Modifications: Removed ORDER BY, added round() calls
select o_year,
round(sum(case when nation = 'BRAZIL' then volume else 0.0 end) / sum(volume), 5) as mkt_share
from
( select year(o_orderdate) as o_year, l_extendedprice * (1-l_discount) as volume,
n2.n_name as nation
from nation$TABLE n2
join
( select o_orderdate, l_discount, l_extendedprice, s_nationkey
from supplier$TABLE s
join
( select o_orderdate, l_discount, l_extendedprice, l_suppkey
from part$TABLE p
join
( select o_orderdate, l_partkey, l_discount, l_extendedprice, l_suppkey
from lineitem$TABLE l
join
( select o_orderdate, o_orderkey
from orders$TABLE o
join
( select c.c_custkey
from customer$TABLE c
join
( select n1.n_nationkey
from nation$TABLE n1
join region r
on n1.n_regionkey = r.r_regionkey and r.r_name = 'AMERICA'
) n11
on c.c_nationkey = n11.n_nationkey
) c1
on c1.c_custkey = o.o_custkey
) o1
on l.l_orderkey = o1.o_orderkey and o1.o_orderdate >= '1995-01-01'
and o1.o_orderdate < '1996-12-31'
) l1
on p.p_partkey = l1.l_partkey and p.p_type = 'ECONOMY ANODIZED STEEL'
) p1
on s.s_suppkey = p1.l_suppkey
) s1
on s1.s_nationkey = n2.n_nationkey
) all_nation
group by o_year
order by o_year
limit 100
---- TYPES
int, double
---- RESULTS
1995,0.03444
1996,0.04158
====

View File

@@ -0,0 +1,142 @@
# Q9 - Product Type Measure Query
# Modifications: Added limit, added round() call
select nation, o_year, round(sum(amount), 5) as sum_profit
from
( select n_name as nation, year(o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from orders$TABLE o
join
( select l_extendedprice, l_discount, l_quantity, l_orderkey, n_name, ps_supplycost
from part$TABLE p
join
( select l_extendedprice, l_discount, l_quantity, l_partkey, l_orderkey,
n_name, ps_supplycost
from partsupp$TABLE ps
join
( select l_suppkey, l_extendedprice, l_discount, l_quantity, l_partkey,
l_orderkey, n_name
from
( select s_suppkey, n_name
from nation$TABLE n
join supplier$TABLE s
on n.n_nationkey = s.s_nationkey
) s1
join lineitem$TABLE l
on s1.s_suppkey = l.l_suppkey
) l1
on ps.ps_suppkey = l1.l_suppkey and
ps.ps_partkey = l1.l_partkey
) l2
on p.p_name like '%green%' and
p.p_partkey = l2.l_partkey
) l3
on o.o_orderkey = l3.l_orderkey
) profit
group by nation, o_year
order by nation, o_year desc
limit 101
---- TYPES
string, int, double
---- RESULTS
'ALGERIA',1992,45636849.4881
'ALGERIA',1993,46044207.7838
'ALGERIA',1994,48694008.0668
'ALGERIA',1995,44402273.5999
'ALGERIA',1996,48285482.6782
'ALGERIA',1997,48611833.4962
'ALGERIA',1998,27136900.1803
'ARGENTINA',1992,46654240.7487
'ARGENTINA',1993,48605593.6162
'ARGENTINA',1994,48268856.3547
'ARGENTINA',1995,45631769.2054
'ARGENTINA',1996,45255278.6021
'ARGENTINA',1997,47143964.1176
'ARGENTINA',1998,28341663.7848
'BRAZIL',1992,45280216.8027
'BRAZIL',1993,45766603.7379
'BRAZIL',1994,44854218.8932
'BRAZIL',1995,44015888.5132
'BRAZIL',1996,45090647.163
'BRAZIL',1997,45640660.7677
'BRAZIL',1998,26527736.396
'CANADA',1992,45873849.6882
'CANADA',1993,46634791.1121
'CANADA',1994,46691491.9596
'CANADA',1995,47311993.0441
'CANADA',1996,46307936.1108
'CANADA',1997,44849954.3186
'CANADA',1998,26828985.3944
'CHINA',1992,46949457.6426
'CHINA',1993,49634673.9463
'CHINA',1994,46397896.6097
'CHINA',1995,46734651.4838
'CHINA',1996,49532807.0601
'CHINA',1997,46123865.4097
'CHINA',1998,27510180.1657
'EGYPT',1992,47000574.5027
'EGYPT',1993,49133627.6471
'EGYPT',1994,47194895.228
'EGYPT',1995,45897160.6783
'EGYPT',1996,47745727.545
'EGYPT',1997,47674857.6783
'EGYPT',1998,28401491.7968
'ETHIOPIA',1992,44385735.6813
'ETHIOPIA',1993,42622804.1616
'ETHIOPIA',1994,41597208.5283
'ETHIOPIA',1995,43575757.3343
'ETHIOPIA',1996,43636287.1922
'ETHIOPIA',1997,43010596.0838
'ETHIOPIA',1998,25135046.1377
'FRANCE',1992,44052308.429
'FRANCE',1993,43729961.0639
'FRANCE',1994,43447352.9922
'FRANCE',1995,46377408.4328
'FRANCE',1996,43306317.9749
'FRANCE',1997,42392969.4731
'FRANCE',1998,26210392.2804
'GERMANY',1992,44361141.2107
'GERMANY',1993,45126645.9113
'GERMANY',1994,44616995.4369
'GERMANY',1995,43314338.3077
'GERMANY',1996,45882074.8049
'GERMANY',1997,43968355.8079
'GERMANY',1998,25991257.1071
'INDIA',1992,47914303.1234
'INDIA',1993,48112766.6987
'INDIA',1994,50106952.4261
'INDIA',1995,49344062.2829
'INDIA',1996,47571018.5122
'INDIA',1997,51386111.3448
'INDIA',1998,29626417.2379
'INDONESIA',1992,45185777.0688
'INDONESIA',1993,46147963.7895
'INDONESIA',1994,45988483.8772
'INDONESIA',1995,45593622.6993
'INDONESIA',1996,44746729.8078
'INDONESIA',1997,44593812.9863
'INDONESIA',1998,27734909.6763
'IRAN',1992,43052338.4143
'IRAN',1993,45362775.8094
'IRAN',1994,43696360.4795
'IRAN',1995,44414285.2348
'IRAN',1996,45891397.0992
'IRAN',1997,45019114.1696
'IRAN',1998,26661608.9301
'IRAQ',1992,47562355.6571
'IRAQ',1993,47435691.5082
'IRAQ',1994,48795847.231
'IRAQ',1995,48774801.7275
'IRAQ',1996,50036593.8404
'IRAQ',1997,48585307.5222
'IRAQ',1998,31188498.1914
'JAPAN',1992,39113493.9052
'JAPAN',1993,39589074.2771
'JAPAN',1994,41159518.3058
'JAPAN',1995,40925317.465
'JAPAN',1996,40267778.9094
'JAPAN',1997,42377052.3454
'JAPAN',1998,24694102.172
'JORDAN',1996,41860855.4684
'JORDAN',1997,41615962.6619
'JORDAN',1998,23489867.7893
====

View File

@@ -51,5 +51,8 @@
<value>true</value>
<description>controls whether to connect to remove metastore server or open a new metastore server in Hive Client JVM</description>
</property>
<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
</configuration>

View File

@@ -86,5 +86,8 @@
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
</configuration>

4
testdata/.gitignore vendored
View File

@@ -19,8 +19,8 @@ target
HBaseAllTypesError
HBaseAllTypesErrorNoNulls
# Hive benchmark data (not in repo, manually copied)
hive_benchmark
# Impala test data (not in repo, manually copied)
impala-data
# Generated block ids (hdfs ids) for each table
block-ids

View File

@@ -7,7 +7,7 @@
# by performing the defined INSERT / SELECT INTO statement. Each new table using the
# file format/compression combination needs to have a unique name, so all the
# statements are pameterized on table name.
# This file is read in by the 'generate_benchmark_statements.py' script to
# This file is read in by the 'generate_schema_statements.py' script to
# to generate all the schema for the Imapla benchmark tests.
#
# Each table is defined as a new section in this file with the following format:
@@ -41,12 +41,12 @@ ALTER TABLE %(table_name)s ADD PARTITION (chunk=5);
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s PARTITION(chunk) SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00000' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=0);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00001' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=1);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00002' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=2);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00003' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=3);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00004' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=4);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep1GB/part-00005' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=5);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00000' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=0);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00001' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=1);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00002' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=2);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00003' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=3);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00004' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=4);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep1GB/part-00005' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=5);
===
grep10gb
---
@@ -65,12 +65,12 @@ ALTER TABLE %(table_name)s ADD PARTITION (chunk=5);
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s PARTITION(chunk) SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00000' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=0);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00001' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=1);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00002' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=2);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00003' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=3);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00004' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=4);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part-00005' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=5);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00000' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=0);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00001' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=1);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00002' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=2);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00003' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=3);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00004' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=4);
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/grep10GB/part-00005' OVERWRITE INTO TABLE %(table_name)s PARTITION(chunk=5);
===
web
---
@@ -87,7 +87,7 @@ LOCATION '${hiveconf:hive.metastore.warehouse.dir}/%(table_name)s/Rankings.dat';
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/html1GB/Rankings.dat' OVERWRITE INTO TABLE %(table_name)s;
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/html1GB/Rankings.dat' OVERWRITE INTO TABLE %(table_name)s;
===
web
---
@@ -110,5 +110,5 @@ LOCATION '${hiveconf:hive.metastore.warehouse.dir}/%(table_name)s/UserVisits.dat
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/html1GB/UserVisits.dat' OVERWRITE INTO TABLE %(table_name)s;
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/html1GB/UserVisits.dat' OVERWRITE INTO TABLE %(table_name)s;
===

View File

@@ -6,6 +6,10 @@ if [ x${JAVA_HOME} == x ]; then
exit -1
fi
# Load the TPCH data set
pushd ${IMPALA_HOME}/bin
./load-impala-data.sh tpch core
${HIVE_HOME}/bin/hive -hiveconf hive.root.logger=WARN,console -v \
-f ${IMPALA_HOME}/testdata/bin/create.sql
if [ $? != 0 ]; then

View File

@@ -2,18 +2,18 @@
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
# This script generates the "CREATE TABLE", "INSERT", and "LOAD" statements for loading
# benchmark data and writes them to create-benchmark*-generated.sql and
# load-benchmark*-generated.sql.
# test data and writes them to create-*-generated.sql and
# load-*-generated.sql.
#
# The statements that are generated are based on an input test vector
# (read from a file) that describes the coverage desired. For example, currently
# we want to run benchmarks with different data sets, across different file types, and
# with different compression algorithms set. To improve data loading performance this
# script will generate an INSERT INTO statement to generate the data if the file doesn't
# already exist in HDFS. If the file does already exist in HDFS then we simply issue a
# script will generate an INSERT INTO statement to generate the data if the file does
# not already exist in HDFS. If the file does already exist in HDFS then we simply issue a
# LOAD statement which is much faster.
#
# The input test vectors are generated via the 'generate_test_vectors.py' so
# The input test vectors are generated via the generate_test_vectors.py so
# ensure that script has been run (or the test vector files already exist) before
# running this script.
#
@@ -39,6 +39,17 @@ parser.add_option("--exploration_strategy", dest="exploration_strategy", default
parser.add_option("--hive_warehouse_dir", dest="hive_warehouse_dir",
default="/test-warehouse",
help="The HDFS path to the base Hive test warehouse directory")
parser.add_option("--schema_template", dest="schema_template",
default="benchmark_schema_template.sql",
help="The schema template to use for statement generation")
parser.add_option("--base_output_file_name", dest="base_output_file_name",
default="benchmark",
help="The base file name to use for generated create/load scripts")
parser.add_option("--force_reload", dest="force_reload", action="store_true",
default= False, help='Skips HDFS exists check and reloads all tables')
parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
default = False, help="If set, outputs additional logging.")
(options, args) = parser.parse_args()
COMPRESSION_TYPE = "SET mapred.output.compression.type=%s;"
@@ -151,20 +162,25 @@ def write_statements_to_file_based_on_input_vector(output_name, input_file_name,
# and skip loading the data. Otherwise, the data is generated using either an
# INSERT INTO statement or a LOAD statement.
data_path = os.path.join(options.hive_warehouse_dir, table_name)
if does_dir_exist_in_hdfs(data_path):
if not options.force_reload and does_dir_exist_in_hdfs(data_path):
print 'Path:', data_path, 'already exists in HDFS. Data loading can be skipped.'
else:
print 'Path:', data_path, 'does not exists in HDFS. Data file will be generated.'
if table_name == s.base_table_name:
output_load_base.append(build_load_statement(load_local, table_name))
else:
if load_local:
output_load_base.append(build_load_statement(load_local, table_name))
else:
print 'Empty base table load for %s. Skipping load generation' % table_name
elif insert:
output_load.append(build_insert(insert, table_name, s.base_table_name,
codec, compression_type))
else:
print 'Empty insert for table %s. Skipping insert generation' % table_name
# Make sure we create the base tables before the remaining tables
output_load = output_load_base + output_load
write_array_to_file('create-benchmark-' + output_name + '-generated.sql', output_create)
write_array_to_file('load-benchmark-' + output_name + '-generated.sql', output_load)
write_array_to_file('create-' + output_name + '-generated.sql', output_create)
write_array_to_file('load-' + output_name + '-generated.sql', output_load)
def parse_benchmark_file(file_name):
template = open(file_name, 'rb')
@@ -176,7 +192,7 @@ def parse_benchmark_file(file_name):
data_set = sub_section[0]
gen_statement = SqlGenerationStatement(*sub_section[1:5])
statements[data_set.strip()].append(gen_statement)
else:
elif options.verbose:
print 'Skipping invalid subsection:', sub_section
return statements
@@ -186,9 +202,9 @@ if (options.exploration_strategy != 'core' and
print 'Invalid exploration strategy:', options.exploration_strategy
sys.exit(1)
statements = parse_benchmark_file('benchmark_schema_template.sql')
statements = parse_benchmark_file(options.schema_template)
write_statements_to_file_based_on_input_vector(
options.exploration_strategy,
'benchmark_%s.csv' % options.exploration_strategy,
'%s-%s' % (options.base_output_file_name, options.exploration_strategy),
'%s_%s.csv' % (options.base_output_file_name, options.exploration_strategy),
statements)

View File

@@ -37,6 +37,9 @@ parser = OptionParser()
parser.add_option("--dimension_file", dest="dimension_file",
default = "benchmark_dimensions.csv",
help="The file containing the list of dimensions.")
parser.add_option("--base_output_file_name", dest="base_output_file_name",
default = "benchmark",
help="The base file name for test vector output")
(options, args) = parser.parse_args()
FILE_FORMAT_IDX = 0
@@ -80,8 +83,8 @@ def write_vectors_to_csv(output_csv_file, matrix):
vectors = read_csv_vector_file(options.dimension_file)
vg = VectorGenerator(vectors)
write_vectors_to_csv('benchmark_pairwise.csv',
write_vectors_to_csv('%s_pairwise.csv' % options.base_output_file_name,
vg.generate_pairwise_matrix(is_valid_combination))
write_vectors_to_csv('benchmark_exhaustive.csv',
write_vectors_to_csv('%s_exhaustive.csv' % options.base_output_file_name,
vg.generate_exhaustive_matrix(is_valid_combination))

1
testdata/bin/tpch_core.csv vendored Normal file
View File

@@ -0,0 +1 @@
text,tpch,none,none
1 text tpch none none

4
testdata/bin/tpch_dimensions.csv vendored Normal file
View File

@@ -0,0 +1,4 @@
text,seq,rc
tpch
none,def,gzip,bzip,snap
none,block,record
1 text,seq,rc
2 tpch
3 none,def,gzip,bzip,snap
4 none,block,record

23
testdata/bin/tpch_exhaustive.csv vendored Normal file
View File

@@ -0,0 +1,23 @@
text,tpch,none,none
seq,tpch,none,none
seq,tpch,def,none
seq,tpch,def,block
seq,tpch,def,record
seq,tpch,gzip,none
seq,tpch,gzip,block
seq,tpch,gzip,record
seq,tpch,bzip,none
seq,tpch,bzip,block
seq,tpch,bzip,record
seq,tpch,snap,none
seq,tpch,snap,block
seq,tpch,snap,record
rc,tpch,none,none
rc,tpch,def,none
rc,tpch,def,block
rc,tpch,gzip,none
rc,tpch,gzip,block
rc,tpch,bzip,none
rc,tpch,bzip,block
rc,tpch,snap,none
rc,tpch,snap,block
1 text tpch none none
2 seq tpch none none
3 seq tpch def none
4 seq tpch def block
5 seq tpch def record
6 seq tpch gzip none
7 seq tpch gzip block
8 seq tpch gzip record
9 seq tpch bzip none
10 seq tpch bzip block
11 seq tpch bzip record
12 seq tpch snap none
13 seq tpch snap block
14 seq tpch snap record
15 rc tpch none none
16 rc tpch def none
17 rc tpch def block
18 rc tpch gzip none
19 rc tpch gzip block
20 rc tpch bzip none
21 rc tpch bzip block
22 rc tpch snap none
23 rc tpch snap block

5
testdata/bin/tpch_pairwise.csv vendored Normal file
View File

@@ -0,0 +1,5 @@
text,tpch,none,none
seq,tpch,def,block
rc,tpch,gzip,block
rc,tpch,bzip,none
seq,tpch,snap,record
1 text tpch none none
2 seq tpch def block
3 rc tpch gzip block
4 rc tpch bzip none
5 seq tpch snap record

455
testdata/bin/tpch_schema_template.sql vendored Normal file
View File

@@ -0,0 +1,455 @@
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
# For details on this file format please see benchmark_schema_template.sql
===
tpch
---
lineitem
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
L_ORDERKEY INT,
L_PARTKEY INT,
L_SUPPKEY INT,
L_LINENUMBER INT,
L_QUANTITY DOUBLE,
L_EXTENDEDPRICE DOUBLE,
L_DISCOUNT DOUBLE,
L_TAX DOUBLE,
L_RETURNFLAG STRING,
L_LINESTATUS STRING,
L_SHIPDATE STRING,
L_COMMITDATE STRING,
L_RECEIPTDATE STRING,
L_SHIPINSTRUCT STRING,
L_SHIPMODE STRING,
L_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/lineitem.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
part
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
P_PARTKEY INT,
P_NAME STRING,
P_MFGR STRING,
P_BRAND STRING,
P_TYPE
STRING,
P_SIZE INT,
P_CONTAINER STRING,
P_RETAILPRICE DOUBLE,
P_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/part.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
partsupp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
PS_PARTKEY INT,
PS_SUPPKEY INT,
PS_AVAILQTY INT,
PS_SUPPLYCOST DOUBLE,
PS_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/partsupp.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
supplier
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
S_SUPPKEY INT,
S_NAME STRING,
S_ADDRESS STRING,
S_NATIONKEY INT,
S_PHONE STRING,
S_ACCTBAL DOUBLE,
S_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/supplier.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
nation
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
N_NATIONKEY INT,
N_NAME STRING,
N_REGIONKEY INT,
N_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/nation.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
region
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
R_REGIONKEY INT,
R_NAME STRING,
R_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/region.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
orders
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
O_ORDERKEY INT,
O_CUSTKEY INT,
O_ORDERSTATUS STRING,
O_TOTALPRICE DOUBLE,
O_ORDERDATE STRING,
O_ORDERPRIORITY STRING,
O_CLERK STRING,
O_SHIPPRIORITY INT,
O_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/orders.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
customer
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
C_CUSTKEY INT,
C_NAME STRING,
C_ADDRESS STRING,
C_NATIONKEY INT,
C_PHONE STRING,
C_ACCTBAL DOUBLE,
C_MKTSEGMENT STRING,
C_COMMENT STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
FROM %(base_table_name)s INSERT OVERWRITE TABLE %(table_name)s SELECT *;
---
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/impala-data/tpch/customer.tbl'
OVERWRITE INTO TABLE %(table_name)s;
===
tpch
---
q2_minimum_cost_supplier_tmp1
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
s_acctbal double,
s_name string,
n_name string,
p_partkey int,
ps_supplycost double,
p_mfgr string,
s_address string,
s_phone string,
s_comment string)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q2_minimum_cost_supplier_tmp2
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
p_partkey int,
ps_min_supplycost double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q4_order_priority_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (O_ORDERKEY INT)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q7_volume_shipping_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
supp_nation string,
cust_nation string,
s_nationkey int,
c_nationkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q11_part_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
ps_partkey int,
part_value double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q11_sum_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (total_value double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
revenue
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
supplier_no int,
total_revenue double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
max_revenue
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (max_revenue double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
supplier_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (s_suppkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q16_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
p_brand string,
p_type string,
p_size int,
ps_suppkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
lineitem_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
t_partkey int,
t_avg_quantity double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q18_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
l_orderkey int,
t_sum_quantity double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q20_tmp1
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (p_partkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q20_tmp2
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
l_partkey int,
l_suppkey int,
sum_quantity double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q20_tmp3
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
ps_suppkey int,
ps_availqty int,
sum_quantity double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q21_tmp1
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
l_orderkey int,
count_suppkey int,
max_suppkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q21_tmp2
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
l_orderkey int,
count_suppkey int,
max_suppkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q22_customer_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (
c_acctbal double,
c_custkey int,
cntrycode string)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q22_customer_tmp1
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (avg_acctbal double)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===
tpch
---
q22_orders_tmp
---
DROP TABLE %(table_name)s;
CREATE EXTERNAL TABLE %(table_name)s (o_custkey int)
STORED AS %(file_format)s
LOCATION '/test-warehouse/%(table_name)s';
---
---
===