[CDH5] Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.

Change-Id: Icbbeb13eefa29e38286328d45600117a383cd106
This commit is contained in:
Alex Behm
2013-09-03 17:33:20 -07:00
parent 752b8e3ee4
commit 60003ad211
8 changed files with 169 additions and 79 deletions

View File

@@ -68,9 +68,9 @@ export IMPALA_CYRUS_SASL_VERSION=2.1.23
export IMPALA_OPENLDAP_VERSION=2.4.25
export IMPALA_SQUEASEL_VERSION=3.3
export IMPALA_HADOOP_VERSION=2.2.0-cdh5.0.0-beta-1
export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-beta-1-SNAPSHOT
export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-beta-1-SNAPSHOT
export IMPALA_HADOOP_VERSION=2.1.0-cdh5.0.0-SNAPSHOT
export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-SNAPSHOT
export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-SNAPSHOT
export IMPALA_SENTRY_VERSION=1.1.0
export IMPALA_THRIFT_VERSION=0.9.0
export IMPALA_AVRO_VERSION=1.7.4

View File

@@ -22,7 +22,10 @@ CLASSPATH=\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
$IMPALA_HOME/fe/target/test-classes:
$IMPALA_HOME/fe/target/test-classes:\
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
CLASSPATH=${CLASSPATH}:$jar

View File

@@ -89,9 +89,25 @@
<dependency>
<groupId>parquet</groupId>
<artifactId>parquet-hive</artifactId>
<version>${env.IMPALA_PARQUET_VERSION}</version>
<version>1.1.1</version>
<scope>system</scope>
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-${env.IMPALA_PARQUET_VERSION}-cdh4.5.0.jar</systemPath>
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-1.1.1.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<scope>system</scope>
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<scope>system</scope>
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar</systemPath>
</dependency>
<dependency>
@@ -114,6 +130,7 @@
<artifactId>libthrift</artifactId>
<version>${env.IMPALA_THRIFT_VERSION}</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
@@ -123,13 +140,9 @@
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-hbase-handler</artifactId>
<scope>system</scope>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>${hbase.version}</version>
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-hbase-handler-${hive.version}.jar</systemPath>
</dependency>
<dependency>
@@ -168,7 +181,54 @@
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
<<<<<<< HEAD
=======
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.derby</groupId>
<artifactId>derby</artifactId>
<version>10.4.2.0</version>
</dependency>
<!-- The datanucleus dependencies are copied directly from Hive's pom.xml
to make our FE build work -->
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>3.2.1</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>javax.jdo</groupId>
<artifactId>jdo2-api</artifactId>
</exclusion>
<exclusion>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<version>3.2.2</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>3.2.1</version>
<scope>compile</scope>
</dependency>
>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
<!-- This driver supports PostgreSQL 7.2 and newer -->
<dependency>
<groupId>postgresql</groupId>
@@ -176,9 +236,25 @@
<version>9.0-801.jdbc4</version>
</dependency>
<dependency>
<<<<<<< HEAD
<groupId>org.apache.derby</groupId>
<artifactId>derby</artifactId>
<version>10.4.2.0</version>
=======
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo-api</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.3</version>
>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
</dependency>
<dependency>

View File

@@ -20,36 +20,31 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hive.hbase.HBaseSerDe;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hive.service.cli.thrift.TColumn;
import org.apache.log4j.Logger;
import com.cloudera.impala.common.Pair;
import com.cloudera.impala.thrift.TCatalogObjectType;
import com.cloudera.impala.thrift.TColumn;
import com.cloudera.impala.thrift.THBaseTable;
import com.cloudera.impala.thrift.TPrimitiveType;
import com.cloudera.impala.thrift.TResultSet;
import com.cloudera.impala.thrift.TResultSetMetadata;
import com.cloudera.impala.thrift.TTable;
import com.cloudera.impala.thrift.TTableDescriptor;
@@ -400,15 +395,15 @@ public class HBaseTable extends Table {
Result r = rs.next();
if (r == null) break;
currentRowCount += 1;
for (KeyValue kv : r.list()) {
for (Cell c: r.list()) {
// some extra row size added to make up for shared overhead
currentRowSize += kv.getRowLength() // row key
currentRowSize += c.getRowLength() // row key
+ 4 // row key length field
+ kv.getFamilyLength() // Column family bytes
+ c.getFamilyLength() // Column family bytes
+ 4 // family length field
+ kv.getQualifierLength() // qualifier bytes
+ c.getQualifierLength() // qualifier bytes
+ 4 // qualifier length field
+ kv.getValueLength() // length of the value
+ c.getValueLength() // length of the value
+ 4 // value length field
+ 10; // extra overhead for hfile index, checksums, metadata, etc
}
@@ -454,6 +449,17 @@ public class HBaseTable extends Table {
return fs.getContentSummary(regionDir).getLength();
}
/**
* Returns hbase's root directory: i.e. <code>hbase.rootdir</code> from
* the given configuration as a qualified Path.
* Method copied from HBase FSUtils.java to avoid depending on HBase server.
*/
public static Path getRootDir(final Configuration c) throws IOException {
Path p = new Path(c.get(HConstants.HBASE_DIR));
FileSystem fs = p.getFileSystem(c);
return p.makeQualified(fs);
}
/**
* Hive returns the columns in order of their declaration for HBase tables.
*/

View File

@@ -94,7 +94,7 @@ public class DescribeResultFactory {
StringBuilder sb = new StringBuilder();
// First add all the columns (includes partition columns).
sb.append(MetaDataFormatUtils.getAllColumnsInformation(msTable.getSd().getCols(),
msTable.getPartitionKeys()));
msTable.getPartitionKeys(), false));
// Add the extended table metadata information.
sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));

View File

@@ -1,9 +1,15 @@
#!/bin/bash
# TODO: remove this once we understand why Hive 0.8.1 looks in HDFS for its builtins jar
# TODO: remove this once we understand why Hive looks in HDFS for many of its jars
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/
${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*builtins*.jar ${HIVE_HOME}/lib/
${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*.jar ${HIVE_HOME}/lib/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/
${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/*.jar ${HADOOP_HOME}/share/hadoop/common/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/lib/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/lib/
${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/lib/*.jar ${HADOOP_HOME}/share/hadoop/common/lib/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${IMPALA_HOME}/fe/target/
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${IMPALA_HOME}/fe/target/
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_LZO}/build

35
testdata/pom.xml vendored
View File

@@ -1,7 +1,7 @@
<?xml version="1.0"?>
<!--
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
@@ -17,8 +17,7 @@
<version>0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Builds test data generators</name>
<name>Build some test data</name>
<properties>
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
@@ -46,8 +45,36 @@
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<scope>system</scope>
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<scope>system</scope>
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar</systemPath>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.4</version>
</dependency>
</dependencies>

View File

@@ -2,62 +2,34 @@
package com.cloudera.impala.datagenerator;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Collections;
import java.util.Collection;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.NavigableMap;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.NavigableMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.util.Merge;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Threads;
import com.google.common.collect.Iterators;
import com.google.common.collect.Sets;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
/**
* Splits HBase tables into regions and deterministically assigns regions to region
* servers.
@@ -187,7 +159,7 @@ class HBaseTestDataRegionAssigment {
throws IOException, InterruptedException {
long start = System.currentTimeMillis();
HRegionInfo daughterA = null, daughterB = null;
HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
HTable metaTable = new HTable(conf, TableName.META_TABLE_NAME);
try {
while (System.currentTimeMillis() - start < timeout) {
@@ -196,9 +168,9 @@ class HBaseTestDataRegionAssigment {
break;
}
HRegionInfo region = MetaReader.parseCatalogResult(result).getFirst();
HRegionInfo region = HRegionInfo.getHRegionInfo(result);
if(region.isSplitParent()) {
PairOfSameType<HRegionInfo> pair = MetaReader.getDaughterRegions(result);
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result);
daughterA = pair.getFirst();
daughterB = pair.getSecond();
break;
@@ -258,7 +230,7 @@ class HBaseTestDataRegionAssigment {
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(metaTable, hri.getRegionName());
if (result != null) {
HRegionInfo info = MetaReader.parseCatalogResult(result).getFirst();
HRegionInfo info = HRegionInfo.getHRegionInfo(result);
if (info != null && !info.isOffline()) {
break;
}