diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9c9abaef7..eb001401b 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -68,9 +68,9 @@ export IMPALA_CYRUS_SASL_VERSION=2.1.23
export IMPALA_OPENLDAP_VERSION=2.4.25
export IMPALA_SQUEASEL_VERSION=3.3
-export IMPALA_HADOOP_VERSION=2.2.0-cdh5.0.0-beta-1
-export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-beta-1-SNAPSHOT
-export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-beta-1-SNAPSHOT
+export IMPALA_HADOOP_VERSION=2.1.0-cdh5.0.0-SNAPSHOT
+export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-SNAPSHOT
+export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-SNAPSHOT
export IMPALA_SENTRY_VERSION=1.1.0
export IMPALA_THRIFT_VERSION=0.9.0
export IMPALA_AVRO_VERSION=1.7.4
diff --git a/bin/set-classpath.sh b/bin/set-classpath.sh
index 1714e5774..07ade52ba 100644
--- a/bin/set-classpath.sh
+++ b/bin/set-classpath.sh
@@ -22,7 +22,10 @@ CLASSPATH=\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
-$IMPALA_HOME/fe/target/test-classes:
+$IMPALA_HOME/fe/target/test-classes:\
+${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
+${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
+${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
CLASSPATH=${CLASSPATH}:$jar
diff --git a/fe/pom.xml b/fe/pom.xml
index 2c0789bcb..167e821fc 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -89,9 +89,25 @@
parquet
parquet-hive
- ${env.IMPALA_PARQUET_VERSION}
+ 1.1.1
system
- ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-${env.IMPALA_PARQUET_VERSION}-cdh4.5.0.jar
+ ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-1.1.1.jar
+
+
+
+ org.apache.hbase
+ hbase-client
+ ${hbase.version}
+ system
+ ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar
+
+
+
+ org.apache.hbase
+ hbase-common
+ ${hbase.version}
+ system
+ ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar
@@ -114,6 +130,7 @@
libthrift
${env.IMPALA_THRIFT_VERSION}
+
org.apache.thrift
libfb303
@@ -123,13 +140,9 @@
org.apache.hive
hive-hbase-handler
+ system
${hive.version}
-
-
-
- org.apache.hbase
- hbase
- ${hbase.version}
+ ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-hbase-handler-${hive.version}.jar
@@ -168,7 +181,54 @@
hive-jdbc
${hive.version}
+<<<<<<< HEAD
+=======
+
+ org.apache.hive
+ hive-exec
+ ${hive.version}
+
+
+ org.apache.derby
+ derby
+ 10.4.2.0
+
+
+
+ org.datanucleus
+ datanucleus-api-jdo
+ 3.2.1
+ compile
+
+
+ javax.jdo
+ jdo2-api
+
+
+ junit
+ junit
+
+
+ log4j
+ log4j
+
+
+
+
+ org.datanucleus
+ datanucleus-core
+ 3.2.2
+ compile
+
+
+ org.datanucleus
+ datanucleus-rdbms
+ 3.2.1
+ compile
+
+>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
postgresql
@@ -176,9 +236,25 @@
9.0-801.jdbc4
+<<<<<<< HEAD
org.apache.derby
derby
10.4.2.0
+=======
+ commons-dbcp
+ commons-dbcp
+ 1.4
+
+
+ javax.jdo
+ jdo-api
+ 3.0.1
+
+
+ org.antlr
+ antlr-runtime
+ 3.3
+>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
index 1cd33b35d..ceb7633d3 100644
--- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
+++ b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
@@ -20,36 +20,31 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.hfile.Compression;
+import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hive.hbase.HBaseSerDe;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hive.service.cli.thrift.TColumn;
import org.apache.log4j.Logger;
import com.cloudera.impala.common.Pair;
import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TColumn;
import com.cloudera.impala.thrift.THBaseTable;
import com.cloudera.impala.thrift.TPrimitiveType;
-import com.cloudera.impala.thrift.TResultSet;
import com.cloudera.impala.thrift.TResultSetMetadata;
import com.cloudera.impala.thrift.TTable;
import com.cloudera.impala.thrift.TTableDescriptor;
@@ -400,15 +395,15 @@ public class HBaseTable extends Table {
Result r = rs.next();
if (r == null) break;
currentRowCount += 1;
- for (KeyValue kv : r.list()) {
+ for (Cell c: r.list()) {
// some extra row size added to make up for shared overhead
- currentRowSize += kv.getRowLength() // row key
+ currentRowSize += c.getRowLength() // row key
+ 4 // row key length field
- + kv.getFamilyLength() // Column family bytes
+ + c.getFamilyLength() // Column family bytes
+ 4 // family length field
- + kv.getQualifierLength() // qualifier bytes
+ + c.getQualifierLength() // qualifier bytes
+ 4 // qualifier length field
- + kv.getValueLength() // length of the value
+ + c.getValueLength() // length of the value
+ 4 // value length field
+ 10; // extra overhead for hfile index, checksums, metadata, etc
}
@@ -454,6 +449,17 @@ public class HBaseTable extends Table {
return fs.getContentSummary(regionDir).getLength();
}
+ /**
+ * Returns hbase's root directory: i.e. hbase.rootdir from
+ * the given configuration as a qualified Path.
+ * Method copied from HBase FSUtils.java to avoid depending on HBase server.
+ */
+ public static Path getRootDir(final Configuration c) throws IOException {
+ Path p = new Path(c.get(HConstants.HBASE_DIR));
+ FileSystem fs = p.getFileSystem(c);
+ return p.makeQualified(fs);
+ }
+
/**
* Hive returns the columns in order of their declaration for HBase tables.
*/
diff --git a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java
index d07af41be..7c6e1e732 100644
--- a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java
+++ b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java
@@ -94,7 +94,7 @@ public class DescribeResultFactory {
StringBuilder sb = new StringBuilder();
// First add all the columns (includes partition columns).
sb.append(MetaDataFormatUtils.getAllColumnsInformation(msTable.getSd().getCols(),
- msTable.getPartitionKeys()));
+ msTable.getPartitionKeys(), false));
// Add the extended table metadata information.
sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));
diff --git a/testdata/bin/load-hive-builtins.sh b/testdata/bin/load-hive-builtins.sh
index 664e4c81f..886e99552 100755
--- a/testdata/bin/load-hive-builtins.sh
+++ b/testdata/bin/load-hive-builtins.sh
@@ -1,9 +1,15 @@
#!/bin/bash
-# TODO: remove this once we understand why Hive 0.8.1 looks in HDFS for its builtins jar
+# TODO: remove this once we understand why Hive looks in HDFS for many of its jars
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/
-${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*builtins*.jar ${HIVE_HOME}/lib/
+${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*.jar ${HIVE_HOME}/lib/
+${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/
+${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/
+${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/*.jar ${HADOOP_HOME}/share/hadoop/common/
+${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/lib/
+${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/lib/
+${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/lib/*.jar ${HADOOP_HOME}/share/hadoop/common/lib/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${IMPALA_HOME}/fe/target/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${IMPALA_HOME}/fe/target/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_LZO}/build
diff --git a/testdata/pom.xml b/testdata/pom.xml
index 43bbb6fe2..be2f47469 100644
--- a/testdata/pom.xml
+++ b/testdata/pom.xml
@@ -1,7 +1,7 @@
@@ -17,8 +17,7 @@
0.1-SNAPSHOT
jar
- Builds test data generators
-
+ Build some test data
${env.IMPALA_HADOOP_VERSION}
${env.IMPALA_HBASE_VERSION}
@@ -46,8 +45,36 @@
org.apache.hbase
- hbase
+ hbase-client
${hbase.version}
+ system
+ ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar
+
+
+
+ org.apache.hbase
+ hbase-common
+ ${hbase.version}
+ system
+ ${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar
+
+
+
+ commons-io
+ commons-io
+ 2.4
+
+
+
+ commons-logging
+ commons-logging
+ 1.1.1
+
+
+
+ org.slf4j
+ slf4j-api
+ 1.6.4
diff --git a/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java b/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java
index b4a155f61..c42b97a86 100644
--- a/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java
+++ b/testdata/src/main/java/com/cloudera/impala/datagenerator/HBaseTestDataRegionAssigment.java
@@ -2,62 +2,34 @@
package com.cloudera.impala.datagenerator;
-import java.io.File;
-import java.io.FileWriter;
import java.io.IOException;
-import java.io.PrintWriter;
-import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.Collection;
-import java.util.GregorianCalendar;
-import java.util.List;
-import java.util.Iterator;
import java.util.ArrayList;
-import java.util.NavigableMap;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
import java.util.Map;
-import java.util.Random;
-import java.util.Set;
+import java.util.NavigableMap;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.util.Merge;
-import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.Chore;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.NotServingRegionException;
-import org.apache.hadoop.hbase.Stoppable;
-import org.apache.hadoop.hbase.catalog.MetaEditor;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HConnection;
-import org.apache.hadoop.hbase.client.HConnectionManager;
-import org.apache.hadoop.hbase.client.MetaScanner;
-import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Threads;
-
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Sets;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
/**
* Splits HBase tables into regions and deterministically assigns regions to region
* servers.
@@ -187,7 +159,7 @@ class HBaseTestDataRegionAssigment {
throws IOException, InterruptedException {
long start = System.currentTimeMillis();
HRegionInfo daughterA = null, daughterB = null;
- HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
+ HTable metaTable = new HTable(conf, TableName.META_TABLE_NAME);
try {
while (System.currentTimeMillis() - start < timeout) {
@@ -196,9 +168,9 @@ class HBaseTestDataRegionAssigment {
break;
}
- HRegionInfo region = MetaReader.parseCatalogResult(result).getFirst();
+ HRegionInfo region = HRegionInfo.getHRegionInfo(result);
if(region.isSplitParent()) {
- PairOfSameType pair = MetaReader.getDaughterRegions(result);
+ PairOfSameType pair = HRegionInfo.getDaughterRegions(result);
daughterA = pair.getFirst();
daughterB = pair.getSecond();
break;
@@ -258,7 +230,7 @@ class HBaseTestDataRegionAssigment {
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(metaTable, hri.getRegionName());
if (result != null) {
- HRegionInfo info = MetaReader.parseCatalogResult(result).getFirst();
+ HRegionInfo info = HRegionInfo.getHRegionInfo(result);
if (info != null && !info.isOffline()) {
break;
}