Skip to content

Commit

Permalink
[opt] support orc generated from hive 1.x for all file scan node
Browse files Browse the repository at this point in the history
  • Loading branch information
morningman committed Dec 21, 2023
1 parent 4ddef31 commit 40bd7aa
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ public void createScanRangeLocations() throws UserException {
return;
}
TFileFormatType fileFormatType = getFileFormatType();
if (fileFormatType == TFileFormatType.FORMAT_ORC) {
genSlotToSchemaIdMapForOrc();
}
params.setFormatType(fileFormatType);
boolean isCsvOrJson = Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON;
boolean isWal = fileFormatType == TFileFormatType.FORMAT_WAL;
Expand Down Expand Up @@ -467,6 +470,25 @@ private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, List<String> col
return rangeDesc;
}

// To Support Hive 1.x orc internal column name like (_col0, _col1, _col2...)
// We need to save mapping from slot name to schema position
protected void genSlotToSchemaIdMapForOrc() {
Preconditions.checkNotNull(params);
List<Column> baseSchema = desc.getTable().getBaseSchema();
Map<String, Integer> columnNameToPosition = Maps.newHashMap();
for (SlotDescriptor slot : desc.getSlots()) {
int idx = 0;
for (Column col : baseSchema) {
if (col.getName().equals(slot.getColumn().getName())) {
columnNameToPosition.put(col.getName(), idx);
break;
}
idx += 1;
}
}
params.setSlotNameToSchemaPos(columnNameToPosition);
}

protected abstract TFileType getLocationType() throws UserException;

protected abstract TFileType getLocationType(String location) throws UserException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.doris.planner.external;

import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
Expand All @@ -39,7 +38,6 @@
import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
import org.apache.doris.datasource.hive.HivePartition;
import org.apache.doris.datasource.hive.HiveTransaction;
import org.apache.doris.datasource.hive.HiveVersionUtil;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.planner.ListPartitionPrunerV2;
import org.apache.doris.planner.PlanNodeId;
Expand All @@ -55,7 +53,6 @@

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.Setter;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down Expand Up @@ -117,9 +114,6 @@ public HiveScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
@Override
protected void doInitialize() throws UserException {
super.doInitialize();
if (HiveVersionUtil.isHive1(hmsTable.getHiveVersion())) {
genSlotToSchemaIdMap();
}

if (hmsTable.isHiveTransactionalTable()) {
this.hiveTransaction = new HiveTransaction(DebugUtil.printId(ConnectContext.get().queryId()),
Expand Down Expand Up @@ -396,23 +390,6 @@ protected TFileAttributes getFileAttributes() throws UserException {
return fileAttributes;
}

// To Support Hive 1.x orc internal column name like (_col0, _col1, _col2...)
private void genSlotToSchemaIdMap() {
List<Column> baseSchema = desc.getTable().getBaseSchema();
Map<String, Integer> columnNameToPosition = Maps.newHashMap();
for (SlotDescriptor slot : desc.getSlots()) {
int idx = 0;
for (Column col : baseSchema) {
if (col.getName().equals(slot.getColumn().getName())) {
columnNameToPosition.put(col.getName(), idx);
break;
}
idx += 1;
}
}
params.setSlotNameToSchemaPos(columnNameToPosition);
}

@Override
public boolean pushDownAggNoGrouping(FunctionCallExpr aggExpr) {

Expand Down

0 comments on commit 40bd7aa

Please sign in to comment.