mirror of
https://github.com/apache/impala.git
synced 2026-01-07 09:02:19 -05:00
[CDH5] Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
Change-Id: Icbbeb13eefa29e38286328d45600117a383cd106
This commit is contained in:
@@ -68,9 +68,9 @@ export IMPALA_CYRUS_SASL_VERSION=2.1.23
|
||||
export IMPALA_OPENLDAP_VERSION=2.4.25
|
||||
export IMPALA_SQUEASEL_VERSION=3.3
|
||||
|
||||
export IMPALA_HADOOP_VERSION=2.2.0-cdh5.0.0-beta-1
|
||||
export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-beta-1-SNAPSHOT
|
||||
export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-beta-1-SNAPSHOT
|
||||
export IMPALA_HADOOP_VERSION=2.1.0-cdh5.0.0-SNAPSHOT
|
||||
export IMPALA_HBASE_VERSION=0.95.2-cdh5.0.0-SNAPSHOT
|
||||
export IMPALA_HIVE_VERSION=0.11.0-cdh5.0.0-SNAPSHOT
|
||||
export IMPALA_SENTRY_VERSION=1.1.0
|
||||
export IMPALA_THRIFT_VERSION=0.9.0
|
||||
export IMPALA_AVRO_VERSION=1.7.4
|
||||
|
||||
@@ -22,7 +22,10 @@ CLASSPATH=\
|
||||
$IMPALA_HOME/fe/src/test/resources:\
|
||||
$IMPALA_HOME/fe/target/classes:\
|
||||
$IMPALA_HOME/fe/target/dependency:\
|
||||
$IMPALA_HOME/fe/target/test-classes:
|
||||
$IMPALA_HOME/fe/target/test-classes:\
|
||||
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
|
||||
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
|
||||
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
|
||||
|
||||
for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
|
||||
CLASSPATH=${CLASSPATH}:$jar
|
||||
|
||||
92
fe/pom.xml
92
fe/pom.xml
@@ -89,9 +89,25 @@
|
||||
<dependency>
|
||||
<groupId>parquet</groupId>
|
||||
<artifactId>parquet-hive</artifactId>
|
||||
<version>${env.IMPALA_PARQUET_VERSION}</version>
|
||||
<version>1.1.1</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-${env.IMPALA_PARQUET_VERSION}-cdh4.5.0.jar</systemPath>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/parquet-hive-1.1.1.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -114,6 +130,7 @@
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${env.IMPALA_THRIFT_VERSION}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libfb303</artifactId>
|
||||
@@ -123,13 +140,9 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-hbase-handler</artifactId>
|
||||
<scope>system</scope>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-hbase-handler-${hive.version}.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -168,7 +181,54 @@
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<<<<<<< HEAD
|
||||
|
||||
=======
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.derby</groupId>
|
||||
<artifactId>derby</artifactId>
|
||||
<version>10.4.2.0</version>
|
||||
</dependency>
|
||||
<!-- The datanucleus dependencies are copied directly from Hive's pom.xml
|
||||
to make our FE build work -->
|
||||
<dependency>
|
||||
<groupId>org.datanucleus</groupId>
|
||||
<artifactId>datanucleus-api-jdo</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<scope>compile</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.jdo</groupId>
|
||||
<artifactId>jdo2-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.datanucleus</groupId>
|
||||
<artifactId>datanucleus-core</artifactId>
|
||||
<version>3.2.2</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.datanucleus</groupId>
|
||||
<artifactId>datanucleus-rdbms</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
|
||||
<!-- This driver supports PostgreSQL 7.2 and newer -->
|
||||
<dependency>
|
||||
<groupId>postgresql</groupId>
|
||||
@@ -176,9 +236,25 @@
|
||||
<version>9.0-801.jdbc4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<<<<<<< HEAD
|
||||
<groupId>org.apache.derby</groupId>
|
||||
<artifactId>derby</artifactId>
|
||||
<version>10.4.2.0</version>
|
||||
=======
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.jdo</groupId>
|
||||
<artifactId>jdo-api</artifactId>
|
||||
<version>3.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr-runtime</artifactId>
|
||||
<version>3.3</version>
|
||||
>>>>>>> c034ff7... Changes to make Impala work on CDH5. Mostly fixing up dependency versions. Minor code changes to address HBase API changes.
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
||||
@@ -20,36 +20,31 @@ import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HRegionLocation;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.io.hfile.Compression;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hive.hbase.HBaseSerDe;
|
||||
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
|
||||
import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.hadoop.hive.metastore.api.MetaException;
|
||||
import org.apache.hadoop.hive.serde2.SerDeException;
|
||||
import org.apache.hive.service.cli.thrift.TColumn;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.cloudera.impala.common.Pair;
|
||||
import com.cloudera.impala.thrift.TCatalogObjectType;
|
||||
import com.cloudera.impala.thrift.TColumn;
|
||||
import com.cloudera.impala.thrift.THBaseTable;
|
||||
import com.cloudera.impala.thrift.TPrimitiveType;
|
||||
import com.cloudera.impala.thrift.TResultSet;
|
||||
import com.cloudera.impala.thrift.TResultSetMetadata;
|
||||
import com.cloudera.impala.thrift.TTable;
|
||||
import com.cloudera.impala.thrift.TTableDescriptor;
|
||||
@@ -400,15 +395,15 @@ public class HBaseTable extends Table {
|
||||
Result r = rs.next();
|
||||
if (r == null) break;
|
||||
currentRowCount += 1;
|
||||
for (KeyValue kv : r.list()) {
|
||||
for (Cell c: r.list()) {
|
||||
// some extra row size added to make up for shared overhead
|
||||
currentRowSize += kv.getRowLength() // row key
|
||||
currentRowSize += c.getRowLength() // row key
|
||||
+ 4 // row key length field
|
||||
+ kv.getFamilyLength() // Column family bytes
|
||||
+ c.getFamilyLength() // Column family bytes
|
||||
+ 4 // family length field
|
||||
+ kv.getQualifierLength() // qualifier bytes
|
||||
+ c.getQualifierLength() // qualifier bytes
|
||||
+ 4 // qualifier length field
|
||||
+ kv.getValueLength() // length of the value
|
||||
+ c.getValueLength() // length of the value
|
||||
+ 4 // value length field
|
||||
+ 10; // extra overhead for hfile index, checksums, metadata, etc
|
||||
}
|
||||
@@ -454,6 +449,17 @@ public class HBaseTable extends Table {
|
||||
return fs.getContentSummary(regionDir).getLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns hbase's root directory: i.e. <code>hbase.rootdir</code> from
|
||||
* the given configuration as a qualified Path.
|
||||
* Method copied from HBase FSUtils.java to avoid depending on HBase server.
|
||||
*/
|
||||
public static Path getRootDir(final Configuration c) throws IOException {
|
||||
Path p = new Path(c.get(HConstants.HBASE_DIR));
|
||||
FileSystem fs = p.getFileSystem(c);
|
||||
return p.makeQualified(fs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hive returns the columns in order of their declaration for HBase tables.
|
||||
*/
|
||||
|
||||
@@ -94,7 +94,7 @@ public class DescribeResultFactory {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
// First add all the columns (includes partition columns).
|
||||
sb.append(MetaDataFormatUtils.getAllColumnsInformation(msTable.getSd().getCols(),
|
||||
msTable.getPartitionKeys()));
|
||||
msTable.getPartitionKeys(), false));
|
||||
// Add the extended table metadata information.
|
||||
sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));
|
||||
|
||||
|
||||
10
testdata/bin/load-hive-builtins.sh
vendored
10
testdata/bin/load-hive-builtins.sh
vendored
@@ -1,9 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO: remove this once we understand why Hive 0.8.1 looks in HDFS for its builtins jar
|
||||
# TODO: remove this once we understand why Hive looks in HDFS for many of its jars
|
||||
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*builtins*.jar ${HIVE_HOME}/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*.jar ${HIVE_HOME}/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/
|
||||
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/
|
||||
${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/*.jar ${HADOOP_HOME}/share/hadoop/common/
|
||||
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HADOOP_HOME}/share/hadoop/common/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -put ${HADOOP_HOME}/share/hadoop/common/lib/*.jar ${HADOOP_HOME}/share/hadoop/common/lib/
|
||||
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${IMPALA_HOME}/fe/target/
|
||||
${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${IMPALA_HOME}/fe/target/
|
||||
${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_LZO}/build
|
||||
|
||||
35
testdata/pom.xml
vendored
35
testdata/pom.xml
vendored
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
||||
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
@@ -17,8 +17,7 @@
|
||||
<version>0.1-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Builds test data generators</name>
|
||||
|
||||
<name>Build some test data</name>
|
||||
<properties>
|
||||
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
|
||||
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
|
||||
@@ -46,8 +45,36 @@
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase</artifactId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-client-${hbase.version}.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${env.IMPALA_HOME}/thirdparty/hbase-${hbase.version}/lib/hbase-common-${hbase.version}.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>1.6.4</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -2,62 +2,34 @@
|
||||
|
||||
package com.cloudera.impala.datagenerator;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.NavigableMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.util.Merge;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.Chore;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.Stoppable;
|
||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.catalog.MetaReader;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||
import org.apache.hadoop.hbase.client.MetaScanner;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
/**
|
||||
* Splits HBase tables into regions and deterministically assigns regions to region
|
||||
* servers.
|
||||
@@ -187,7 +159,7 @@ class HBaseTestDataRegionAssigment {
|
||||
throws IOException, InterruptedException {
|
||||
long start = System.currentTimeMillis();
|
||||
HRegionInfo daughterA = null, daughterB = null;
|
||||
HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HTable metaTable = new HTable(conf, TableName.META_TABLE_NAME);
|
||||
|
||||
try {
|
||||
while (System.currentTimeMillis() - start < timeout) {
|
||||
@@ -196,9 +168,9 @@ class HBaseTestDataRegionAssigment {
|
||||
break;
|
||||
}
|
||||
|
||||
HRegionInfo region = MetaReader.parseCatalogResult(result).getFirst();
|
||||
HRegionInfo region = HRegionInfo.getHRegionInfo(result);
|
||||
if(region.isSplitParent()) {
|
||||
PairOfSameType<HRegionInfo> pair = MetaReader.getDaughterRegions(result);
|
||||
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result);
|
||||
daughterA = pair.getFirst();
|
||||
daughterB = pair.getSecond();
|
||||
break;
|
||||
@@ -258,7 +230,7 @@ class HBaseTestDataRegionAssigment {
|
||||
while (System.currentTimeMillis() - start < timeout) {
|
||||
Result result = getRegionRow(metaTable, hri.getRegionName());
|
||||
if (result != null) {
|
||||
HRegionInfo info = MetaReader.parseCatalogResult(result).getFirst();
|
||||
HRegionInfo info = HRegionInfo.getHRegionInfo(result);
|
||||
if (info != null && !info.isOffline()) {
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user