Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/acvictor/velox into acvicto…
Browse files Browse the repository at this point in the history
…r/weekday
  • Loading branch information
acvictor committed Feb 13, 2024
2 parents 01865cd + 7830d64 commit 7ecd8ed
Show file tree
Hide file tree
Showing 50 changed files with 1,042 additions and 441 deletions.
101 changes: 0 additions & 101 deletions .circleci/dist_compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,85 +138,8 @@ executors:
check:
docker:
- image : ghcr.io/facebookincubator/velox-dev:check-avx
macos-intel:
macos:
xcode: "14.3.0"
resource_class: macos.x86.medium.gen2
macos-m1:
macos:
xcode: "14.2.0"
resource_class: macos.m1.large.gen1

jobs:
macos-build:
parameters:
os:
type: executor
executor: << parameters.os >>
environment:
ICU_SOURCE: BUNDLED
simdjson_SOURCE: BUNDLED
steps:
- checkout
- update-submodules
- restore_cache:
name: "Restore Dependency Cache"
# The version number in the key can be incremented
# to manually avoid the case where bad dependencies
# are cached, and has no other meaning.
# If you update it, be sure to update save_cache too.
key: velox-circleci-macos-{{ arch }}-deps-v1-{{ checksum ".circleci/config.yml" }}-{{ checksum "scripts/setup-macos.sh" }}
- run:
name: "Install dependencies"
command: |
set -xu
mkdir -p ~/deps ~/deps-src
curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C ~/deps
PATH=~/deps/bin:${PATH} DEPENDENCY_DIR=~/deps-src INSTALL_PREFIX=~/deps PROMPT_ALWAYS_RESPOND=n ./scripts/setup-macos.sh
rm -rf ~/deps/.git ~/deps/Library/Taps/ # Reduce cache size by 70%.
no_output_timeout: 20m
- save_cache:
name: "Save Dependency Cache"
# The version number in the key can be incremented
# to manually avoid the case where bad dependencies
# are cached, and has no other meaning.
# If you update it, be sure to update restore_cache too.
key: velox-circleci-macos-{{ arch }}-deps-v1-{{ checksum ".circleci/config.yml" }}-{{ checksum "scripts/setup-macos.sh" }}
paths:
- ~/deps
- run:
name: "Calculate merge-base date for CCache"
command: git show -s --format=%cd --date="format:%Y%m%d" $(git merge-base origin/main HEAD) | tee merge-base-date
- restore_cache:
name: "Restore CCache cache"
keys:
- velox-ccache-debug-{{ arch }}-{{ checksum "merge-base-date" }}
- run:
name: "Build on MacOS"
command: |
export PATH=~/deps/bin:~/deps/opt/bison/bin:~/deps/opt/flex/bin:${PATH}
mkdir -p .ccache
export CCACHE_DIR=$(pwd)/.ccache
ccache -sz -M 5Gi
cmake \
-B _build/debug \
-GNinja \
-DTREAT_WARNINGS_AS_ERRORS=1 \
-DENABLE_ALL_WARNINGS=1 \
-DVELOX_ENABLE_PARQUET=ON \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_PREFIX_PATH=~/deps \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DFLEX_INCLUDE_DIR=~/deps/opt/flex/include
ninja -C _build/debug
ccache -s
no_output_timeout: 1h
- save_cache:
name: "Save CCache cache"
key: velox-ccache-debug-{{ arch }}-{{ checksum "merge-base-date" }}
paths:
- .ccache/

linux-build:
executor: build
environment:
Expand Down Expand Up @@ -681,25 +604,13 @@ workflows:
- linux-build-options
- linux-adapters
- linux-presto-fuzzer-run
- macos-build:
matrix:
parameters:
os: [macos-intel]
- format-check
- header-check
- doc-gen-job:
filters:
branches:
only:
- main
- macos-build:
matrix:
parameters:
os: [ macos-m1 ]
filters:
branches:
only:
- main

shorter-fuzzer:
unless: << pipeline.parameters.run-longer-expression-fuzzer >>
Expand All @@ -708,22 +619,10 @@ workflows:
- linux-pr-fuzzer-run
- linux-build-options
- linux-adapters
- macos-build:
matrix:
parameters:
os: [ macos-intel ]
- format-check
- header-check
- doc-gen-job:
filters:
branches:
only:
- main
- macos-build:
matrix:
parameters:
os: [ macos-m1 ]
filters:
branches:
only:
- main
81 changes: 81 additions & 0 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: macOS Build

on:
push:
pull_request:

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.repository }}-${{ github.head_ref || github.sha }}
cancel-in-progress: true

jobs:
macos-build:
name: "${{ matrix.os }}"
strategy:
fail-fast: false
matrix:
# macos-13 = x86_64 Mac
# macos-14 = arm64 Mac
os: [macos-13, macos-14]
runs-on: ${{ matrix.os }}
env:
CCACHE_DIR: '${{ github.workspace }}/.ccache'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Dependencies
run: |
brew install \
bison boost ccache double-conversion flex fmt gflags glog \
icu4c libevent libsodium lz4 lzo ninja openssl range-v3 simdjson \
snappy thrift xz xsimd zstd
echo "NJOBS=`sysctl -n hw.ncpu`" >> $GITHUB_ENV
- name: Cache ccache
uses: actions/cache@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-macos-${{ matrix.os }}-${{ hashFiles('velox/*') }}
restore-keys: ccache-macos-${{ matrix.os }}

- name: Configure Build
env:
folly_SOURCE: BUNDLED
run: |
ccache -sz -M 5Gi
cmake \
-B _build/debug \
-GNinja \
-DTREAT_WARNINGS_AS_ERRORS=1 \
-DENABLE_ALL_WARNINGS=1 \
-DVELOX_ENABLE_PARQUET=ON \
-DCMAKE_BUILD_TYPE=Debug
- name: Build
run: |
cmake --build _build/debug -j $NJOBS
ccache -s
- name: Run Tests
if: false
run: ctest -j $NJOBS --test-dir _build/debug --output-on-failure

2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ if(${VELOX_ENABLE_DUCKDB})
endif()

set_source(fmt)
resolve_dependency(fmt)
resolve_dependency(fmt 9.0.0)

if(NOT ${VELOX_BUILD_MINIMAL})
find_package(ZLIB REQUIRED)
Expand Down
3 changes: 3 additions & 0 deletions scripts/setup-adapters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ function install_gcs-sdk-cpp {
}

function install_azure-storage-sdk-cpp {
# Disable VCPKG to install additional static dependencies under the VCPKG installed path
# instead of using system pre-installed dependencies.
export AZURE_SDK_DISABLE_AUTO_VCPKG=ON
vcpkg_commit_id=7a6f366cefd27210f6a8309aed10c31104436509
github_checkout azure/azure-sdk-for-cpp azure-storage-files-datalake_12.8.0
sed -i "s/set(VCPKG_COMMIT_STRING .*)/set(VCPKG_COMMIT_STRING $vcpkg_commit_id)/" cmake-modules/AzureVcpkg.cmake
Expand Down
10 changes: 8 additions & 2 deletions scripts/setup-ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ CPU_TARGET="${CPU_TARGET:-avx}"
COMPILER_FLAGS=$(get_cxx_flags "$CPU_TARGET")
export COMPILER_FLAGS
FB_OS_VERSION=v2023.12.04.00
FMT_VERSION=10.1.1
NPROC=$(getconf _NPROCESSORS_ONLN)
DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)}
export CMAKE_BUILD_TYPE=Release

# Install all velox and folly dependencies.
# Install all velox and folly dependencies.
# The is an issue on 22.04 where a version conflict prevents glog install,
# installing libunwind first fixes this.
apt update && apt install sudo
Expand All @@ -46,7 +47,6 @@ sudo --preserve-env apt update && sudo --preserve-env apt install -y libunwind-d
libboost-all-dev \
libicu-dev \
libdouble-conversion-dev \
libfmt-dev \
libgoogle-glog-dev \
libbz2-dev \
libgflags-dev \
Expand Down Expand Up @@ -87,6 +87,11 @@ function prompt {
) 2> /dev/null
}

function install_fmt {
github_checkout fmtlib/fmt "${FMT_VERSION}"
cmake_install -DFMT_TEST=OFF
}

function install_folly {
github_checkout facebook/folly "${FB_OS_VERSION}"
cmake_install -DBUILD_TESTS=OFF -DFOLLY_HAVE_INT128_T=ON
Expand Down Expand Up @@ -120,6 +125,7 @@ function install_conda {
}

function install_velox_deps {
run_and_time install_fmt
run_and_time install_folly
run_and_time install_fizz
run_and_time install_wangle
Expand Down
19 changes: 13 additions & 6 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
}

std::unique_ptr<dwio::common::SerDeOptions> parseSerdeParameters(
const std::unordered_map<std::string, std::string>& serdeParameters) {
const std::unordered_map<std::string, std::string>& serdeParameters,
const std::unordered_map<std::string, std::string>& tableParameters) {
auto fieldIt = serdeParameters.find(dwio::common::SerDeOptions::kFieldDelim);
if (fieldIt == serdeParameters.end()) {
fieldIt = serdeParameters.find("serialization.format");
Expand All @@ -393,9 +394,13 @@ std::unique_ptr<dwio::common::SerDeOptions> parseSerdeParameters(
auto mapKeyIt =
serdeParameters.find(dwio::common::SerDeOptions::kMapKeyDelim);

auto nullStringIt = tableParameters.find(
dwio::common::TableParameter::kSerializationNullFormat);

if (fieldIt == serdeParameters.end() &&
collectionIt == serdeParameters.end() &&
mapKeyIt == serdeParameters.end()) {
mapKeyIt == serdeParameters.end() &&
nullStringIt == tableParameters.end()) {
return nullptr;
}

Expand All @@ -413,22 +418,23 @@ std::unique_ptr<dwio::common::SerDeOptions> parseSerdeParameters(
}
auto serDeOptions = std::make_unique<dwio::common::SerDeOptions>(
fieldDelim, collectionDelim, mapKeyDelim);
serDeOptions->nullString = nullStringIt->second;
return serDeOptions;
}

void configureReaderOptions(
dwio::common::ReaderOptions& readerOptions,
const std::shared_ptr<HiveConfig>& hiveConfig,
const Config* sessionProperties,
const RowTypePtr& fileSchema,
std::shared_ptr<HiveConnectorSplit> hiveSplit) {
const std::shared_ptr<HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<HiveConnectorSplit>& hiveSplit) {
readerOptions.setMaxCoalesceBytes(hiveConfig->maxCoalescedBytes());
readerOptions.setMaxCoalesceDistance(hiveConfig->maxCoalescedDistanceBytes());
readerOptions.setFileColumnNamesReadAsLowerCase(
hiveConfig->isFileColumnNamesReadAsLowerCase(sessionProperties));
readerOptions.setUseColumnNamesForColumnMapping(
hiveConfig->isOrcUseColumnNames(sessionProperties));
readerOptions.setFileSchema(fileSchema);
readerOptions.setFileSchema(hiveTableHandle->dataColumns());
readerOptions.setFooterEstimatedSize(hiveConfig->footerEstimatedSize());
readerOptions.setFilePreloadThreshold(hiveConfig->filePreloadThreshold());

Expand All @@ -439,7 +445,8 @@ void configureReaderOptions(
dwio::common::toString(readerOptions.getFileFormat()),
dwio::common::toString(hiveSplit->fileFormat));
} else {
auto serDeOptions = parseSerdeParameters(hiveSplit->serdeParameters);
auto serDeOptions = parseSerdeParameters(
hiveSplit->serdeParameters, hiveTableHandle->tableParameters());
if (serDeOptions) {
readerOptions.setSerDeOptions(*serDeOptions);
}
Expand Down
5 changes: 3 additions & 2 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
namespace facebook::velox::connector::hive {

class HiveColumnHandle;
class HiveTableHandle;
class HiveConfig;
struct HiveConnectorSplit;

Expand Down Expand Up @@ -57,8 +58,8 @@ void configureReaderOptions(
dwio::common::ReaderOptions& readerOptions,
const std::shared_ptr<HiveConfig>& config,
const Config* sessionProperties,
const RowTypePtr& fileSchema,
std::shared_ptr<HiveConnectorSplit> hiveSplit);
const std::shared_ptr<HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<HiveConnectorSplit>& hiveSplit);

void configureRowReaderOptions(
dwio::common::RowReaderOptions& rowReaderOptions,
Expand Down
2 changes: 1 addition & 1 deletion velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void SplitReader::configureReaderOptions() {
baseReaderOpts_,
hiveConfig_,
connectorQueryCtx_->sessionProperties(),
hiveTableHandle_->dataColumns(),
hiveTableHandle_,
hiveSplit_);
}

Expand Down
Loading

0 comments on commit 7ecd8ed

Please sign in to comment.