diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 9c9abaef7..eb001401b 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -68,9 +68,9 @@ export IMPALA_CYRUS_SASL_VERSION=2.1.23 export IMPALA_OPENLDAP_VERSION=2.4.25 export IMPALA_SQUEASEL_VERSION=3.3 -export IMPALA_HADOOP_VERSION=2.2.0-cdh5.0.0-beta-1 -export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-beta-1-SNAPSHOT -export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-beta-1-SNAPSHOT +export IMPALA_HADOOP_VERSION=2.1.0-cdh5.0.0-SNAPSHOT +export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-SNAPSHOT +export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-SNAPSHOT export IMPALA_SENTRY_VERSION=1.1.0 export IMPALA_THRIFT_VERSION=0.9.0 export IMPALA_AVRO_VERSION=1.7.4 diff --git a/bin/set-classpath.sh b/bin/set-classpath.sh index 1714e5774..07ade52ba 100644 --- a/bin/set-classpath.sh +++ b/bin/set-classpath.sh @@ -22,7 +22,10 @@ CLASSPATH=\ $IMPALA_HOME/fe/src/test/resources:\ $IMPALA_HOME/fe/target/classes:\ $IMPALA_HOME/fe/target/dependency:\ -$IMPALA_HOME/fe/target/test-classes: +$IMPALA_HOME/fe/target/test-classes:\ +${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\ +${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\ +${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar: for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do CLASSPATH=${CLASSPATH}:$jar diff --git a/fe/pom.xml b/fe/pom.xml index 2c0789bcb..167e821fc 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -89,9 +89,25 @@ parquet parquet-hive - ${env.IMPALA_PARQUET_VERSION} + 1.1.1 system - ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-${env.IMPALA_PARQUET_VERSION}-cdh4.5.0.jar + ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-1.1.1.jar + + + + org.apache.hbase + hbase-client + ${hbase.version} + system + ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar + + + + org.apache.hbase + hbase-common + ${hbase.version} + system + ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar @@ -114,6 +130,7 @@ libthrift ${env.IMPALA_THRIFT_VERSION} + org.apache.thrift libfb303 @@ -123,13 +140,9 @@ org.apache.hive hive-hbase-handler + system ${hive.version} - - - - org.apache.hbase - hbase - ${hbase.version} + ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-hbase-handler-${hive.version}.jar @@ -168,7 +181,54 @@ hive-jdbc ${hive.version} +<<<<<<< HEAD +======= + + org.apache.hive + hive-exec + ${hive.version} + + + org.apache.derby + derby + 10.4.2.0 + + + + org.datanucleus + datanucleus-api-jdo + 3.2.1 + compile + + + javax.jdo + jdo2-api + + + junit + junit + + + log4j + log4j + + + + + org.datanucleus + datanucleus-core + 3.2.2 + compile + + + org.datanucleus + datanucleus-rdbms + 3.2.1 + compile + +>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes. postgresql @@ -176,9 +236,25 @@ 9.0-801.jdbc4 +<<<<<<< HEAD org.apache.derby derby 10.4.2.0 +======= + commons-dbcp + commons-dbcp + 1.4 + + + javax.jdo + jdo-api + 3.0.1 + + + org.antlr + antlr-runtime + 3.3 +>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes. diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java index 1cd33b35d..ceb7633d3 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java @@ -20,36 +20,31 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hive.hbase.HBaseSerDe; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hive.service.cli.thrift.TColumn; import org.apache.log4j.Logger; import com.cloudera.impala.common.Pair; import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.TColumn; import com.cloudera.impala.thrift.THBaseTable; import com.cloudera.impala.thrift.TPrimitiveType; -import com.cloudera.impala.thrift.TResultSet; import com.cloudera.impala.thrift.TResultSetMetadata; import com.cloudera.impala.thrift.TTable; import com.cloudera.impala.thrift.TTableDescriptor; @@ -400,15 +395,15 @@ public class HBaseTable extends Table { Result r = rs.next(); if (r == null) break; currentRowCount += 1; - for (KeyValue kv : r.list()) { + for (Cell c: r.list()) { // some extra row size added to make up for shared overhead - currentRowSize += kv.getRowLength() // row key + currentRowSize += c.getRowLength() // row key + 4 // row key length field - + kv.getFamilyLength() // Column family bytes + + c.getFamilyLength() // Column family bytes + 4 // family length field - + kv.getQualifierLength() // qualifier bytes + + c.getQualifierLength() // qualifier bytes + 4 // qualifier length field - + kv.getValueLength() // length of the value + + c.getValueLength() // length of the value + 4 // value length field + 10; // extra overhead for hfile index, checksums, metadata, etc } @@ -454,6 +449,17 @@ public class HBaseTable extends Table { return fs.getContentSummary(regionDir).getLength(); } + /** + * Returns hbase's root directory: i.e. hbase.rootdir from + * the given configuration as a qualified Path. + * Method copied from HBase FSUtils.java to avoid depending on HBase server. + */ + public static Path getRootDir(final Configuration c) throws IOException { + Path p = new Path(c.get(HConstants.HBASE_DIR)); + FileSystem fs = p.getFileSystem(c); + return p.makeQualified(fs); + } + /** * Hive returns the columns in order of their declaration for HBase tables. */ diff --git a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java index d07af41be..7c6e1e732 100644 --- a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java +++ b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java @@ -94,7 +94,7 @@ public class DescribeResultFactory { StringBuilder sb = new StringBuilder(); // First add all the columns (includes partition columns). sb.append(MetaDataFormatUtils.getAllColumnsInformation(msTable.getSd().getCols(), - msTable.getPartitionKeys())); + msTable.getPartitionKeys(), false)); // Add the extended table metadata information. sb.append(MetaDataFormatUtils.getTableInformation(hiveTable)); diff --git a/testdata/bin/load-hive-builtins.sh b/testdata/bin/load-hive-builtins.sh index 664e4c81f..886e99552 100755 --- a/testdata/bin/load-hive-builtins.sh +++ b/testdata/bin/load-hive-builtins.sh @@ -1,9 +1,15 @@ #!/bin/bash -# TODO: remove this once we understand why Hive 0.8.1 looks in HDFS for its builtins jar +# TODO: remove this once we understand why Hive looks in HDFS for many of its jars ${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/ ${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/ -${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*builtins*.jar ${HIVE_HOME}/lib/ +${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*.jar ${HIVE_HOME}/lib/ +${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/ +${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/ +${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/*.jar ${HADOOP_HOME}/share/hadoop/common/ +${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/lib/ +${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/lib/ +${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/lib/*.jar ${HADOOP_HOME}/share/hadoop/common/lib/ ${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${IMPALA_HOME}/fe/target/ ${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${IMPALA_HOME}/fe/target/ ${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_LZO}/build diff --git a/testdata/pom.xml b/testdata/pom.xml index 43bbb6fe2..be2f47469 100644 --- a/testdata/pom.xml +++ b/testdata/pom.xml @@ -1,7 +1,7 @@ @@ -17,8 +17,7 @@ 0.1-SNAPSHOT jar - Builds test data generators - + Build some test data ${env.IMPALA_HADOOP_VERSION} ${env.IMPALA_HBASE_VERSION} @@ -46,8 +45,36 @@ org.apache.hbase - hbase + hbase-client ${hbase.version} + system + ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar + + + + org.apache.hbase + hbase-common + ${hbase.version} + system + ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar + + + + commons-io + commons-io + 2.4 + + + + commons-logging + commons-logging + 1.1.1 + + + + org.slf4j + slf4j-api + 1.6.4 diff --git a/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java b/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java index b4a155f61..c42b97a86 100644 --- a/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java +++ b/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java @@ -2,62 +2,34 @@ package com.cloudera.impala.datagenerator; -import java.io.File; -import java.io.FileWriter; import java.io.IOException; -import java.io.PrintWriter; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Collections; -import java.util.Collection; -import java.util.GregorianCalendar; -import java.util.List; -import java.util.Iterator; import java.util.ArrayList; -import java.util.NavigableMap; +import java.util.Collection; +import java.util.Collections; +import java.util.List; import java.util.Map; -import java.util.Random; -import java.util.Set; +import java.util.NavigableMap; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.util.Merge; -import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.Chore; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.NotServingRegionException; -import org.apache.hadoop.hbase.Stoppable; -import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Get; -import org.apache.hadoop.hbase.client.HConnection; -import org.apache.hadoop.hbase.client.HConnectionManager; -import org.apache.hadoop.hbase.client.MetaScanner; -import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.PairOfSameType; import org.apache.hadoop.hbase.util.Threads; - -import com.google.common.collect.Iterators; -import com.google.common.collect.Sets; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import org.apache.hadoop.util.ToolRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + /** * Splits HBase tables into regions and deterministically assigns regions to region * servers. @@ -187,7 +159,7 @@ class HBaseTestDataRegionAssigment { throws IOException, InterruptedException { long start = System.currentTimeMillis(); HRegionInfo daughterA = null, daughterB = null; - HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME); + HTable metaTable = new HTable(conf, TableName.META_TABLE_NAME); try { while (System.currentTimeMillis() - start < timeout) { @@ -196,9 +168,9 @@ class HBaseTestDataRegionAssigment { break; } - HRegionInfo region = MetaReader.parseCatalogResult(result).getFirst(); + HRegionInfo region = HRegionInfo.getHRegionInfo(result); if(region.isSplitParent()) { - PairOfSameType pair = MetaReader.getDaughterRegions(result); + PairOfSameType pair = HRegionInfo.getDaughterRegions(result); daughterA = pair.getFirst(); daughterB = pair.getSecond(); break; @@ -258,7 +230,7 @@ class HBaseTestDataRegionAssigment { while (System.currentTimeMillis() - start < timeout) { Result result = getRegionRow(metaTable, hri.getRegionName()); if (result != null) { - HRegionInfo info = MetaReader.parseCatalogResult(result).getFirst(); + HRegionInfo info = HRegionInfo.getHRegionInfo(result); if (info != null && !info.isOffline()) { break; }