diff --git a/.github/.rat-excludes b/.github/.rat-excludes new file mode 100644 index 0000000..b1fb2b8 --- /dev/null +++ b/.github/.rat-excludes @@ -0,0 +1,14 @@ +build/* +build-debug/* +build-release/* +test_data/* +third_party/* +build_support/* +scripts/* +cmake_modules/* +.codespell_ignore +.gitignore +rat-report.txt +requirements.txt +.gitattributes +.*\.svg$ diff --git a/.github/actions/setup-ccache/action.yml b/.github/actions/setup-ccache/action.yml new file mode 100644 index 0000000..62de283 --- /dev/null +++ b/.github/actions/setup-ccache/action.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Setup ccache +description: Install, configure and cache ccache for CI builds + +inputs: + cache-key-prefix: + description: Prefix for the cache key (e.g., ccache-gcc-test) + required: true + +runs: + using: composite + steps: + - name: Install ccache + shell: bash + run: sudo apt-get update && sudo apt-get install -y ccache + + - name: Restore ccache + uses: actions/cache@v4 + with: + path: ~/.ccache + key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-${{ github.ref_name }}-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} + restore-keys: | + ${{ inputs.cache-key-prefix }}-${{ runner.os }}-${{ github.ref_name }}- + ${{ inputs.cache-key-prefix }}-${{ runner.os }}- + + - name: Configure ccache + shell: bash + run: ci/scripts/setup_ccache.sh diff --git a/LICENSE b/LICENSE index 2665c1e..b037915 100644 --- a/LICENSE +++ b/LICENSE @@ -292,6 +292,7 @@ This product includes code from Apache Arrow. - include/paimon/status.h - include/paimon/string_builder.h - src/paimon/common/utils/status.cpp + * Arrow C Data Interface in include/paimon/arrow/abi.h * Build support utilities: * build_support/asan-suppressions.txt * build_support/get-upstream-commit.sh @@ -314,6 +315,8 @@ This product includes code from Apache Arrow. * third-party toolchain and patches: - cmake_modules/ThirdpartyToolchain.cmake - cmake_modules/arrow.diff + - third_party/download_dependencies.sh + - third_party/versions.txt Copyright: 2016-2024 The Apache Software Foundation. Home page: https://arrow.apache.org/ diff --git a/PaimonConfig.cmake.in b/PaimonConfig.cmake.in new file mode 100644 index 0000000..e56d9d5 --- /dev/null +++ b/PaimonConfig.cmake.in @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Main library +add_library(paimon_shared SHARED IMPORTED) +set_target_properties(paimon_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_static STATIC IMPORTED) +set_target_properties(paimon_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_parquet_file_format +add_library(paimon_parquet_file_format_shared SHARED IMPORTED) +set_target_properties(paimon_parquet_file_format_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_parquet_file_format.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_parquet_file_format_static STATIC IMPORTED) +set_target_properties(paimon_parquet_file_format_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_parquet_file_format.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_orc_file_format +add_library(paimon_orc_file_format_shared SHARED IMPORTED) +set_target_properties(paimon_orc_file_format_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_orc_file_format.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_orc_file_format_static STATIC IMPORTED) +set_target_properties(paimon_orc_file_format_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_orc_file_format.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_local_file_system +add_library(paimon_local_file_system_shared SHARED IMPORTED) +set_target_properties(paimon_local_file_system_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_local_file_system.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_local_file_system_static STATIC IMPORTED) +set_target_properties(paimon_local_file_system_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_local_file_system.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_avro_file_format +add_library(paimon_avro_file_format_shared SHARED IMPORTED) +set_target_properties(paimon_avro_file_format_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_avro_file_format.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_avro_file_format_static STATIC IMPORTED) +set_target_properties(paimon_avro_file_format_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_avro_file_format.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_blob_file_format +add_library(paimon_blob_file_format_shared SHARED IMPORTED) +set_target_properties(paimon_blob_file_format_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_blob_file_format.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_blob_file_format_static STATIC IMPORTED) +set_target_properties(paimon_blob_file_format_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_blob_file_format.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_file_index +add_library(paimon_file_index_shared SHARED IMPORTED) +set_target_properties(paimon_file_index_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_file_index.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_file_index_static STATIC IMPORTED) +set_target_properties(paimon_file_index_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_file_index.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_global_index +add_library(paimon_global_index_shared SHARED IMPORTED) +set_target_properties(paimon_global_index_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_global_index.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_global_index_static STATIC IMPORTED) +set_target_properties(paimon_global_index_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_global_index.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_jindo_file_system +add_library(paimon_jindo_file_system_shared SHARED IMPORTED) +set_target_properties(paimon_jindo_file_system_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_jindo_file_system.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_jindo_file_system_static STATIC IMPORTED) +set_target_properties(paimon_jindo_file_system_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_jindo_file_system.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_lance_file_format +add_library(paimon_lance_file_format_shared SHARED IMPORTED) +set_target_properties(paimon_lance_file_format_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_lance_file_format.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_lance_file_format_static STATIC IMPORTED) +set_target_properties(paimon_lance_file_format_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_lance_file_format.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) + +# paimon_lumina_index +add_library(paimon_lumina_index_shared SHARED IMPORTED) +set_target_properties(paimon_lumina_index_shared PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_lumina_index.so" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) +add_library(paimon_lumina_index_static STATIC IMPORTED) +set_target_properties(paimon_lumina_index_static PROPERTIES + IMPORTED_LOCATION "@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libpaimon_lumina_index.a" + INTERFACE_INCLUDE_DIRECTORIES "@CMAKE_INSTALL_PREFIX@/include" +) diff --git a/build_and_package.sh b/build_and_package.sh new file mode 100755 index 0000000..f9e671e --- /dev/null +++ b/build_and_package.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +SOURCE_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +OUTPUT_DIR="$SOURCE_ROOT/output" +BUILD_TYPE="Release" +BUILD_DIR="$SOURCE_ROOT/build-release" +BUILD_NAME="paimon-cpp" +MAKE_CLEAN=false +PACKAGE=false +CMAKE_OPTIONS=() +JOBS="" + +show_help() { + cat << EOF +Usage: $0 [options] [cmake_options...] + +Options: + -r, --release Build release version (default) + -d, --debug Build debug version + -c, --clean Clean build directory before building + -p, --package Package creation + -j, --jobs Number of parallel jobs for building (default: auto-detect) + -h, --help Show this help message + +CMake Options: + Any unrecognized options will be passed directly to CMake. + You can specify multiple CMake options. + +Examples: + $0 -r -p -j 8 -DPAIMON_BUILD_SHARED=ON -DPAIMON_BUILD_STATIC=OFF + $0 --debug --clean --package --jobs 4 + +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -r|--release) + BUILD_TYPE="Release" + BUILD_DIR="$SOURCE_ROOT/build-release" + BUILD_NAME="paimon-cpp" + shift + ;; + -d|--debug) + BUILD_TYPE="Debug" + BUILD_DIR="$SOURCE_ROOT/build-debug" + BUILD_NAME="paimon-cpp-debug" + shift + ;; + -c|--clean) + MAKE_CLEAN=true + shift + ;; + -p|--package) + PACKAGE=true + shift + ;; + -j|--jobs) + shift + if [[ $# -gt 0 && $1 =~ ^[0-9]+$ ]]; then + JOBS="$1" + shift + else + echo "Error: -j/--jobs requires a numeric argument" >&2 + exit 1 + fi + ;; + -h|--help) + show_help + exit 0 + ;; + *) + # All remaining parameters are CMake options + CMAKE_OPTIONS+=("$1") + shift + ;; + esac +done + +echo "========== Build Configuration ==========" +echo "Build Type: $BUILD_TYPE" +echo "Package Name: $BUILD_NAME" +echo "Clean Build: $MAKE_CLEAN" +echo "Package: $PACKAGE" +if [ -n "$JOBS" ]; then + echo "Parallel Jobs: $JOBS" +else + echo "Parallel Jobs: auto-detect" +fi +if [ ${#CMAKE_OPTIONS[@]} -gt 0 ]; then + echo "CMake Options: ${CMAKE_OPTIONS[*]}" +else + echo "CMake Options: None" +fi +echo "=========================================" + +echo "Step 1: Downloading dependencies..." +"$SOURCE_ROOT"/third_party/download_dependencies.sh + +echo "Step 2: Building Paimon..." +PACKAGE_DIR="$OUTPUT_DIR/$BUILD_NAME" + +if [ "$MAKE_CLEAN" = true ]; then + echo "Cleaning build directory: $BUILD_DIR" + rm -rf "$BUILD_DIR" +fi +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +CMAKE_ARGS=( + -G "Ninja" + -DCMAKE_BUILD_TYPE="$BUILD_TYPE" + -DCMAKE_INSTALL_PREFIX="$PACKAGE_DIR" +) + +if [ ${#CMAKE_OPTIONS[@]} -gt 0 ]; then + CMAKE_ARGS+=("${CMAKE_OPTIONS[@]}") +fi + +cmake "${CMAKE_ARGS[@]}" .. + +# Set default JOBS if not specified +if [ -z "$JOBS" ]; then + JOBS=$(nproc 2>/dev/null || echo 4) +fi + +ninja -j"$JOBS" + +if [ "$PACKAGE" = true ]; then + echo "Step 3: Packaging..." + mkdir -p "$OUTPUT_DIR" + cd "$BUILD_DIR" + ninja install + tar -czvf "$OUTPUT_DIR/$BUILD_NAME.tar.gz" -C "$OUTPUT_DIR" "$BUILD_NAME" + echo "Package created: $OUTPUT_DIR/$BUILD_NAME.tar.gz" +else + echo "Step 3: Packaging skipped." +fi diff --git a/ci/scripts/build_paimon.sh b/ci/scripts/build_paimon.sh new file mode 100755 index 0000000..da28fef --- /dev/null +++ b/ci/scripts/build_paimon.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +source_dir=${1} +enable_sanitizer=${2:-false} +check_clang_tidy=${3:-false} +build_type=${4:-Debug} +build_dir=${1}/build + +# Display ccache status if available +if command -v ccache &> /dev/null; then + echo "=== ccache found: $(ccache --version | head -1) ===" + ccache -p | grep -E "cache_dir|max_size|compression" || true + ccache -z # Reset statistics for this build +else + echo "=== ccache not found, compiling without cache acceleration ===" +fi + +mkdir ${build_dir} +pushd ${build_dir} + +ENABLE_LUMINA="ON" +ENABLE_LANCE="ON" +if [[ "${CC:-}" == *"gcc-8"* ]] || [[ "${CXX:-}" == *"g++-8"* ]]; then + ENABLE_LUMINA="OFF" # Lumina is only supported on GCC 9 or higher. + ENABLE_LANCE="OFF" + # Lance's prebuilt binaries can only be compiled on Ubuntu 22.04 and above + # which requires a higher version of glibc, + # but Ubuntu 22.04 and above no longer ships with gcc-8 by default. + # Consider supporting Lance from source compilation in the future +fi + +CMAKE_ARGS=( + "-G Ninja" + "-DCMAKE_BUILD_TYPE=${build_type}" + "-DPAIMON_BUILD_TESTS=ON" + "-DPAIMON_ENABLE_LANCE=${ENABLE_LANCE}" + "-DPAIMON_ENABLE_JINDO=ON" + "-DPAIMON_ENABLE_LUMINA=${ENABLE_LUMINA}" + "-DPAIMON_ENABLE_LUCENE=ON" +) + +if [[ "${enable_sanitizer}" == "true" ]]; then + CMAKE_ARGS+=( + "-DPAIMON_USE_ASAN=ON" + "-DPAIMON_USE_UBSAN=ON" + ) +fi + +cmake "${CMAKE_ARGS[@]}" ${source_dir} +cmake --build . -- -j$(nproc) +ctest --output-on-failure -j $(nproc) + +if [[ "${check_clang_tidy}" == "true" ]]; then + cmake --build . --target check-clang-tidy +fi + +# Print ccache statistics after build +if command -v ccache &> /dev/null; then + echo "=== ccache statistics after build ===" + ccache -s +fi + +popd + +rm -rf ${build_dir} diff --git a/ci/scripts/setup_ccache.sh b/ci/scripts/setup_ccache.sh new file mode 100755 index 0000000..d82536c --- /dev/null +++ b/ci/scripts/setup_ccache.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "PAIMON_USE_CCACHE=ON" >> $GITHUB_ENV + +echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV +echo "CCACHE_DIR=${HOME}/.ccache" >> $GITHUB_ENV +echo "CCACHE_MAXSIZE=1G" >> $GITHUB_ENV +echo "CCACHE_COMPRESS=true" >> $GITHUB_ENV +echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV + +mkdir -p "${HOME}/.ccache" diff --git a/examples/clean_demo.cpp b/examples/clean_demo.cpp new file mode 100644 index 0000000..d34cb83 --- /dev/null +++ b/examples/clean_demo.cpp @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "arrow/api.h" +#include "arrow/c/bridge.h" +#include "arrow/ipc/api.h" +#include "paimon/api.h" +#include "paimon/catalog/catalog.h" +#include "paimon/orphan_files_cleaner.h" + +namespace fs = std::filesystem; +namespace paimon { +Status CleanOrphanFiles(const std::string& table_path, int64_t older_than_ms) { + CleanContextBuilder clean_context_builder(table_path); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr clean_context, + clean_context_builder.WithOlderThanMs(older_than_ms).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr orphan_files_cleaner, + OrphanFilesCleaner::Create(std::move(clean_context))); + PAIMON_ASSIGN_OR_RAISE(std::set cleaned_paths, orphan_files_cleaner->Clean()); + + for (const auto& clean_file : cleaned_paths) { + std::cout << "clean_file_path : " << clean_file << std::endl; + } + + return Status::OK(); +} + +Status DropPartition(const std::string& table_path, + const std::vector>& partitions) { + CommitContextBuilder commit_context_builder(table_path, /*commit_user=*/"commit_user_1"); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr commit_context, + commit_context_builder.Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr committer, + FileStoreCommit::Create(std::move(commit_context))); + PAIMON_RETURN_NOT_OK(committer->DropPartition(partitions, /*commit_identifier=*/10)); + + return Status::OK(); +} + +Status ExpireSnapshot(const std::string& table_path) { + CommitContextBuilder commit_context_builder(table_path, /*commit_user=*/"commit_user_1"); + std::map commit_options = { + {Options::SNAPSHOT_NUM_RETAINED_MAX, "2"}, + {Options::SNAPSHOT_NUM_RETAINED_MIN, "1"}, + {Options::SNAPSHOT_TIME_RETAINED, "1ms"}, + {Options::SNAPSHOT_CLEAN_EMPTY_DIRECTORIES, "true"}}; + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr commit_context, + commit_context_builder.SetOptions(commit_options).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr committer, + FileStoreCommit::Create(std::move(commit_context))); + PAIMON_RETURN_NOT_OK(committer->Expire()); + + return Status::OK(); +} + +} // namespace paimon + +bool CopyToTempDirectory(const fs::path& src, const fs::path& dst) { + try { + if (!fs::exists(dst)) { + fs::create_directories(dst); + } + for (const auto& entry : fs::recursive_directory_iterator(src)) { + const auto& relativePath = fs::relative(entry.path(), src); + const auto& targetPath = dst / relativePath; + if (entry.is_directory()) { + fs::create_directories(targetPath); + } else { + fs::copy_file(entry.path(), targetPath, fs::copy_options::overwrite_existing); + } + } + } catch (const fs::filesystem_error& e) { + std::cerr << "filesystem error: " << e.what() << std::endl; + return false; + } + return true; +} + +int main(int argc, char* argv[]) { + if (argc != 4) { + std::cout << "Usage: " << argv[0] << " " + << std::endl; + return -1; + } + std::string origin_table_path = std::string(argv[1]); + std::string temp_table_path = std::string(argv[2]); + std::string clean_mode = std::string(argv[3]); + + if (!CopyToTempDirectory(origin_table_path, temp_table_path)) { + return -1; + } + + std::map clean_options; + paimon::Status status; + if (clean_mode == "orphan_file") { + std::cout << "enter the timestamp (ms) before which orphan files will be deleted" + << std::endl; + int64_t older_than_ms; + std::cin >> older_than_ms; + status = paimon::CleanOrphanFiles(temp_table_path, older_than_ms); + } else if (clean_mode == "drop_partition") { + std::cout << "enter partition key-value pairs to drop. type 'EOF EOF' to finish" + << std::endl; + std::string partition_key, value; + std::vector> partitions; + while (std::cin >> partition_key >> value) { + if (partition_key == "EOF" && value == "EOF") break; + partitions.push_back({{partition_key, value}}); + } + status = paimon::DropPartition(temp_table_path, partitions); + } else if (clean_mode == "expire_snapshot") { + status = paimon::ExpireSnapshot(temp_table_path); + } + + if (!status.ok()) { + std::cout << status.ToString() << std::endl; + return -1; + } + + return 0; +} diff --git a/examples/read_write_demo.cpp b/examples/read_write_demo.cpp new file mode 100644 index 0000000..946b350 --- /dev/null +++ b/examples/read_write_demo.cpp @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "arrow/api.h" +#include "arrow/c/bridge.h" +#include "arrow/ipc/api.h" +#include "paimon/api.h" +#include "paimon/catalog/catalog.h" + +arrow::Result> PrepareData(const arrow::FieldVector& fields) { + arrow::StringBuilder f0_builder; + arrow::Int32Builder f1_builder; + arrow::Int32Builder f2_builder; + arrow::DoubleBuilder f3_builder; + + std::vector> data = { + {"Alice", 1, 0, 11.0}, {"Bob", 1, 1, 12.1}, {"Cathy", 1, 2, 13.2}}; + + for (const auto& row : data) { + ARROW_RETURN_NOT_OK(f0_builder.Append(std::get<0>(row))); + ARROW_RETURN_NOT_OK(f1_builder.Append(std::get<1>(row))); + ARROW_RETURN_NOT_OK(f2_builder.Append(std::get<2>(row))); + ARROW_RETURN_NOT_OK(f3_builder.Append(std::get<3>(row))); + } + + std::shared_ptr f0_array, f1_array, f2_array, f3_array; + ARROW_RETURN_NOT_OK(f0_builder.Finish(&f0_array)); + ARROW_RETURN_NOT_OK(f1_builder.Finish(&f1_array)); + ARROW_RETURN_NOT_OK(f2_builder.Finish(&f2_array)); + ARROW_RETURN_NOT_OK(f3_builder.Finish(&f3_array)); + + std::vector> children = {f0_array, f1_array, f2_array, f3_array}; + auto struct_type = arrow::struct_(fields); + return std::make_shared(struct_type, f0_array->length(), children); +} + +paimon::Status Run(const std::string& root_path, const std::string& db_name, + const std::string& table_name) { + std::map options = {{paimon::Options::MANIFEST_FORMAT, "orc"}, + {paimon::Options::FILE_FORMAT, "parquet"}, + {paimon::Options::FILE_SYSTEM, "local"}}; + + // create table + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr catalog, + paimon::Catalog::Create(root_path, options)); + PAIMON_RETURN_NOT_OK(catalog->CreateDatabase(db_name, options, /*ignore_if_exists=*/false)); + arrow::FieldVector fields = { + arrow::field("f0", arrow::utf8()), + arrow::field("f1", arrow::int32()), + arrow::field("f2", arrow::int32()), + arrow::field("f3", arrow::float64()), + }; + std::shared_ptr schema = arrow::schema(fields); + ::ArrowSchema arrow_schema; + arrow::Status arrow_status = arrow::ExportSchema(*schema, &arrow_schema); + if (!arrow_status.ok()) { + return paimon::Status::Invalid(arrow_status.message()); + } + PAIMON_RETURN_NOT_OK(catalog->CreateTable(paimon::Identifier(db_name, table_name), + &arrow_schema, + /*partition_keys=*/{}, + /*primary_keys=*/{}, options, + /*ignore_if_exists=*/false)); + + std::string table_path = root_path + "/" + db_name + ".db/" + table_name; + + std::string commit_user = "some_commit_user"; + // write + paimon::WriteContextBuilder context_builder(table_path, commit_user); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr write_context, + context_builder.SetOptions(options).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr writer, + paimon::FileStoreWrite::Create(std::move(write_context))); + + // prepare data + auto struct_array = PrepareData(fields); + if (!struct_array.ok()) { + return paimon::Status::Invalid(struct_array.status().ToString()); + } + ::ArrowArray arrow_array; + arrow_status = arrow::ExportArray(*struct_array.ValueUnsafe(), &arrow_array); + if (!arrow_status.ok()) { + return paimon::Status::Invalid(arrow_status.message()); + } + paimon::RecordBatchBuilder batch_builder(&arrow_array); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr record_batch, + batch_builder.Finish()); + PAIMON_RETURN_NOT_OK(writer->Write(std::move(record_batch))); + PAIMON_ASSIGN_OR_RAISE(std::vector> commit_message, + writer->PrepareCommit()); + + // commit + paimon::CommitContextBuilder commit_context_builder(table_path, commit_user); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr commit_context, + commit_context_builder.SetOptions(options).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr committer, + paimon::FileStoreCommit::Create(std::move(commit_context))); + PAIMON_RETURN_NOT_OK(committer->Commit(commit_message)); + + // scan + paimon::ScanContextBuilder scan_context_builder(table_path); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr scan_context, + scan_context_builder.SetOptions(options).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr scanner, + paimon::TableScan::Create(std::move(scan_context))); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr plan, scanner->CreatePlan()); + + // read + paimon::ReadContextBuilder read_context_builder(table_path); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_context, + read_context_builder.SetOptions(options).Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr table_read, + paimon::TableRead::Create(std::move(read_context))); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr batch_reader, + table_read->CreateReader(plan->Splits())); + arrow::ArrayVector result_array_vector; + while (true) { + PAIMON_ASSIGN_OR_RAISE(paimon::BatchReader::ReadBatch batch, batch_reader->NextBatch()); + if (paimon::BatchReader::IsEofBatch(batch)) { + break; + } + auto& [c_array, c_schema] = batch; + auto arrow_result = arrow::ImportArray(c_array.get(), c_schema.get()); + if (!arrow_result.ok()) { + return paimon::Status::Invalid(arrow_result.status().ToString()); + } + auto result_array = arrow_result.ValueUnsafe(); + result_array_vector.push_back(result_array); + } + auto chunk_result = arrow::ChunkedArray::Make(result_array_vector); + if (!chunk_result.ok()) { + return paimon::Status::Invalid(chunk_result.status().ToString()); + } + std::cout << chunk_result.ValueUnsafe()->ToString() << std::endl; + return paimon::Status::OK(); +} + +int main(int argc, char** argv) { + if (argc != 4) { + std::cout << "Usage: " << argv[0] << " " + << std::endl; + return -1; + } + const std::string root_path = argv[1]; + const std::string db_name = argv[2]; + const std::string table_name = argv[3]; + paimon::Status status = Run(root_path, db_name, table_name); + if (!status.ok()) { + std::cerr << "Failed to run example:" << status.ToString() << std::endl; + return -1; + } + return 0; +} diff --git a/include/paimon/api.h b/include/paimon/api.h new file mode 100644 index 0000000..1771a4d --- /dev/null +++ b/include/paimon/api.h @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Coarse public API while the library is in development + +#pragma once + +#include "paimon/commit_context.h" // IWYU pragma: export +#include "paimon/defs.h" // IWYU pragma: export +#include "paimon/factories/factory.h" // IWYU pragma: export +#include "paimon/file_store_commit.h" // IWYU pragma: export +#include "paimon/file_store_write.h" // IWYU pragma: export +#include "paimon/fs/file_system_factory.h" // IWYU pragma: export +#include "paimon/memory/memory_pool.h" // IWYU pragma: export +#include "paimon/predicate/predicate.h" // IWYU pragma: export +#include "paimon/read_context.h" // IWYU pragma: export +#include "paimon/reader/batch_reader.h" // IWYU pragma: export +#include "paimon/record_batch.h" // IWYU pragma: export +#include "paimon/result.h" // IWYU pragma: export +#include "paimon/scan_context.h" // IWYU pragma: export +#include "paimon/status.h" // IWYU pragma: export +#include "paimon/table/source/table_read.h" // IWYU pragma: export +#include "paimon/table/source/table_scan.h" // IWYU pragma: export +#include "paimon/write_context.h" // IWYU pragma: export + +/// Top-level namespace for Paimon C++ API. +namespace paimon {} diff --git a/include/paimon/arrow/abi.h b/include/paimon/arrow/abi.h new file mode 100644 index 0000000..1640842 --- /dev/null +++ b/include/paimon/arrow/abi.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Adapted from Apache Arrow +// https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h + +/// \file abi.h Arrow C Data Interface +/// +/// The Arrow C Data interface defines a very small, stable set +/// of C definitions which can be easily copied into any project's +/// source code and vendored to be used for columnar data interchange +/// in the Arrow format. For non-C/C++ languages and runtimes, +/// it should be almost as easy to translate the C definitions into +/// the corresponding C FFI declarations. +/// +/// Applications and libraries can therefore work with Arrow memory +/// without necessarily using the Arrow libraries or reinventing +/// the wheel. Developers can choose between tight integration +/// with the Arrow software project or minimal integration with +/// the Arrow format only. + +#pragma once + +#include + +// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE + +#ifdef __cplusplus +} +#endif diff --git a/src/paimon/symbols.map b/src/paimon/symbols.map new file mode 100644 index 0000000..aaddab8 --- /dev/null +++ b/src/paimon/symbols.map @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{ + global: + extern "C++" { + *paimon::*; + }; + + # Symbols marked as 'local' are not exported by the DSO and thus may not + # be used by client applications. Everything except the above falls here. + # This ensures we hide symbols of static dependencies. + local: + *; +}; diff --git a/test/generate_coverage.sh b/test/generate_coverage.sh new file mode 100755 index 0000000..c3bfba6 --- /dev/null +++ b/test/generate_coverage.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +cd $(dirname "$0")/../ +mkdir -p build && cd build +cmake ../ -DCMAKE_BUILD_TYPE=Debug -DPAIMON_BUILD_TESTS=ON -DPAIMON_USE_ASAN=ON -DPAIMON_GENERATE_COVERAGE=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +make -j +make test +lcov --capture --directory src/paimon --directory test --output-file coverage.info +genhtml coverage.info --output-directory coverage + +ip=$(hostname -I | awk '{print $1}') +echo +echo "See coverage files at: $PWD/coverage.info" +echo "See coverage html files at: $PWD/coverage/" +echo "View code coverage at: http://$ip:8000/coverage/index.html" +python3 -m http.server diff --git a/third_party/download_dependencies.sh b/third_party/download_dependencies.sh new file mode 100755 index 0000000..55a0103 --- /dev/null +++ b/third_party/download_dependencies.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Adapted from Apache Arrow +# https://github.com/apache/arrow/blob/main/cpp/thirdparty/download_dependencies.sh + +# This script downloads all the thirdparty dependencies as a series of tarballs +# that can be used for offline builds, etc. + +set -eu + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + orig_destdir=${SOURCE_DIR} +else + orig_destdir=$1 +fi + +# Try to canonicalize. Not all platforms support `readlink -f` or `realpath`. +# This only matters if there are symlinks you need to resolve before downloading +DESTDIR=$(readlink -f "${orig_destdir}" 2> /dev/null) || DESTDIR="${orig_destdir}" + +download_dependency() { + local url=$1 + local out=$2 + local expected_checksum=$3 + + # Determine which checksum command is available + local checksum_cmd="" + if command -v sha256sum >/dev/null 2>&1; then + checksum_cmd=(sha256sum) + checksum_field=1 + elif command -v shasum >/dev/null 2>&1; then + checksum_cmd=(shasum -a 256) + checksum_field=1 + elif command -v openssl >/dev/null 2>&1; then + checksum_cmd=(openssl dgst -sha256) + checksum_field=2 + else + echo "Error: No checksum command available (sha256sum, shasum, or openssl)" 1>&2 + exit 1 + fi + + # Function to calculate checksum + calculate_checksum() { + local file=$1 + "${checksum_cmd[@]}" "${file}" | cut -d' ' -f"${checksum_field}" + } + + # Check if the file already exists + if [ -f "${out}" ]; then + echo "File ${out} already exists, verifying checksum..." + # Calculate checksum of existing file + local actual_checksum + actual_checksum=$(calculate_checksum "${out}") + + # Compare checksums + if [ "${actual_checksum}" = "${expected_checksum}" ]; then + echo "Checksum matches, skipping download ${out}" + return 0 + else + echo "Checksum mismatch (expected: ${expected_checksum}, actual: ${actual_checksum}), re-downloading..." + rm -f "${out}" + fi + fi + + echo "Downloading ${url} to ${out}..." + wget --continue --output-document="${out}" "${url}" || \ + (echo "Failed downloading ${url}" 1>&2; exit 1) + + # Verify checksum after download + echo "Verifying checksum of downloaded file..." + local actual_checksum + actual_checksum=$(calculate_checksum "${out}") + if [ "${actual_checksum}" != "${expected_checksum}" ]; then + echo "Error: Checksum mismatch (expected: ${expected_checksum}, actual: ${actual_checksum})" 1>&2 + rm -f "${out}" + exit 1 + fi + echo "Checksum verification passed" +} + +main() { + mkdir -p "${DESTDIR}" + + # Load `DEPENDENCIES` variable. + source "${SOURCE_DIR}"/versions.txt + + echo "# Environment variables for offline Paimon build" + for ((i = 0; i < ${#DEPENDENCIES[@]}; i++)); do + local dep_packed=${DEPENDENCIES[$i]} + + # Unpack each entry of the form "$home_var $tar_out $dep_url" + IFS=" " read -r dep_url_var dep_tar_name dep_url <<< "${dep_packed}" + + # Get dependency name for finding checksum + local dep_name=${dep_url_var%_URL} + local checksum_var="${dep_name}_BUILD_SHA256_CHECKSUM" + local expected_checksum=${!checksum_var} + + local out=${DESTDIR}/${dep_tar_name} + download_dependency "${dep_url}" "${out}" "${expected_checksum}" + + echo "export ${dep_url_var}=${out}" + done +} + +main diff --git a/third_party/versions.txt b/third_party/versions.txt new file mode 100644 index 0000000..c275999 --- /dev/null +++ b/third_party/versions.txt @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Adapted from Apache Arrow +# https://github.com/apache/arrow/blob/main/cpp/thirdparty/versions.txt + +# Toolchain library versions +# +# This file is used by `download_dependencies.sh` and cmake to figure out which +# version of a dependency to fetch. In order to add a new dependency, add a +# version variable, e.g. MY_DEP_VERSION and append an entry in the +# `DEPENDENCIES` array (see the comment on top of the declaration for the +# format). + +THIRDPARTY_MIRROR_URL= + +PAIMON_ZLIB_BUILD_VERSION=1.3.1 +PAIMON_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 +PAIMON_ZLIB_PKG_NAME=zlib-${PAIMON_ZLIB_BUILD_VERSION}.tar.gz + +PAIMON_ZSTD_BUILD_VERSION=1.5.7 +PAIMON_ZSTD_BUILD_SHA256_CHECKSUM=eb33e51f49a15e023950cd7825ca74a4a2b43db8354825ac24fc1b7ee09e6fa3 +PAIMON_ZSTD_PKG_NAME=zstd-${PAIMON_ZSTD_BUILD_VERSION}.tar.gz + +PAIMON_SNAPPY_BUILD_VERSION=1.1.10 +PAIMON_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90 +PAIMON_SNAPPY_PKG_NAME=snappy-${PAIMON_SNAPPY_BUILD_VERSION}.tar.gz + +PAIMON_LZ4_BUILD_VERSION=v1.9.4 +PAIMON_LZ4_BUILD_SHA256_CHECKSUM=0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b +PAIMON_LZ4_PKG_NAME=lz4-${PAIMON_LZ4_BUILD_VERSION}.tar.gz + +PAIMON_PROTOBUF_BUILD_VERSION=3.8.0 +PAIMON_PROTOBUF_BUILD_SHA256_CHECKSUM=b7220b41481011305bf9100847cf294393973e869973a9661046601959b2960b +PAIMON_PROTOBUF_PKG_NAME=protobuf-${PAIMON_PROTOBUF_BUILD_VERSION}.tar.gz + +PAIMON_TBB_BUILD_VERSION=v2021.13.0 +PAIMON_TBB_BUILD_SHA256_CHECKSUM=3ad5dd08954b39d113dc5b3f8a8dc6dc1fd5250032b7c491eb07aed5c94133e1 +PAIMON_TBB_PKG_NAME=tbb-${PAIMON_TBB_BUILD_VERSION}.tar.gz + +PAIMON_ORC_BUILD_VERSION=v2.1.1 +PAIMON_ORC_BUILD_SHA256_CHECKSUM=1f8eef537814fdcd003de13e49c6edb35427b45eb40bafd3355f775d99a0ff99 +PAIMON_ORC_PKG_NAME=orc-${PAIMON_ORC_BUILD_VERSION}.tar.gz + +PAIMON_GTEST_BUILD_VERSION=1.11.0 +PAIMON_GTEST_BUILD_SHA256_CHECKSUM=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5 +PAIMON_GTEST_PKG_NAME=gtest-${PAIMON_GTEST_BUILD_VERSION}.tar.gz + +PAIMON_ARROW_BUILD_VERSION=17.0.0 +PAIMON_ARROW_BUILD_SHA256_CHECKSUM=9d280d8042e7cf526f8c28d170d93bfab65e50f94569f6a790982a878d8d898d +PAIMON_ARROW_PKG_NAME=apache-arrow-${PAIMON_ARROW_BUILD_VERSION}.tar.gz + +PAIMON_AVRO_BUILD_VERSION=c499eefb48aa2db906c7bca14a047223806f36db +PAIMON_AVRO_BUILD_SHA256_CHECKSUM=9771f1dcfe3c01aff7ff670e873e66d3406362f71941821d482de65f3d32d780 +PAIMON_AVRO_PKG_NAME=avro-${PAIMON_AVRO_BUILD_VERSION}.tar.gz + +PAIMON_FMT_BUILD_VERSION=11.2.0 +PAIMON_FMT_BUILD_SHA256_CHECKSUM=bc23066d87ab3168f27cef3e97d545fa63314f5c79df5ea444d41d56f962c6af +PAIMON_FMT_PKG_NAME=fmt-${PAIMON_FMT_BUILD_VERSION}.tar.gz + +PAIMON_GLOG_BUILD_VERSION=v0.7.1 +PAIMON_GLOG_BUILD_SHA256_CHECKSUM=00e4a87e87b7e7612f519a41e491f16623b12423620006f59f5688bfd8d13b08 +PAIMON_GLOG_PKG_NAME=glog-${PAIMON_GLOG_BUILD_VERSION}.tar.gz + +PAIMON_RE2_BUILD_VERSION=2022-06-01 +PAIMON_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f +PAIMON_RE2_PKG_NAME=re2-${PAIMON_RE2_BUILD_VERSION}.tar.gz + +PAIMON_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709 +PAIMON_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806 +PAIMON_RAPIDJSON_PKG_NAME=rapidjson-${PAIMON_RAPIDJSON_BUILD_VERSION}.tar.gz + +PAIMON_JINDOSDK_C_BUILD_VERSION=6.10.2 +PAIMON_JINDOSDK_C_LINUX_X86_64_BUILD_SHA256_CHECKSUM=23e61c9815fab1cd88c369445bdbe1eab02cc09bafed3bb5118ecaf5b2fbc518 +PAIMON_JINDOSDK_C_LINUX_X86_64_PKG_NAME=jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux.tar.gz +PAIMON_JINDOSDK_C_LINUX_AARCH64_BUILD_SHA256_CHECKSUM=b4afbf6abaa9bad2e3c6bd6a02af2acd81f4dc418b313ef959e1c55f9fb13ebd +PAIMON_JINDOSDK_C_LINUX_AARCH64_PKG_NAME=jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux-el7-aarch64.tar.gz +PAIMON_JINDOSDK_C_MACOS_X86_64_BUILD_SHA256_CHECKSUM=3c79e2be018a486423fa63dd5ffe5a9a08477f9cf51ffdae81c0320e2190b489 +PAIMON_JINDOSDK_C_MACOS_X86_64_PKG_NAME=jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-x86_64.tar.gz +PAIMON_JINDOSDK_C_MACOS_AARCH64_BUILD_SHA256_CHECKSUM=7db9e47b7311f6ffcaa661a4d73c989640f9bbbbd2ad48e35788aa99d44759db +PAIMON_JINDOSDK_C_MACOS_AARCH64_PKG_NAME=jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-aarch64.tar.gz + +PAIMON_LUCENE_BUILD_VERSION=3.0.9 +PAIMON_LUCENE_BUILD_SHA256_CHECKSUM=4e69e29d5d79a976498ef71eab70c9c88c7014708be4450a9fda7780fe93584e +PAIMON_LUCENE_PKG_NAME=lucene-${PAIMON_LUCENE_BUILD_VERSION}.tar.gz + +PAIMON_LIMONP_BUILD_VERSION=1.0.1 +PAIMON_LIMONP_BUILD_SHA256_CHECKSUM=c7b18794f020dbaa1006229b49a39217a463da0cb3586aee83eb7471f4ae71df +PAIMON_LIMONP_PKG_NAME=limonp-${PAIMON_LIMONP_BUILD_VERSION}.tar.gz + +PAIMON_JIEBA_BUILD_VERSION=v5.6.0 +PAIMON_JIEBA_BUILD_SHA256_CHECKSUM=e6e517b778e0f4a99cbed1ee3eaa041616b74bc685e03a6ca08887ad9cedfe49 +PAIMON_JIEBA_PKG_NAME=jieba-${PAIMON_JIEBA_BUILD_VERSION}.tar.gz + +# Boost source package is bundled at third_party/boost/ +PAIMON_BOOST_BUILD_VERSION=1_66_0 +PAIMON_BOOST_BUILD_SHA256_CHECKSUM=28e9200637800fbfd1292b2c6876189dba7e8e1c5282c71fac6515e96f7af2b0 +PAIMON_BOOST_PKG_NAME=boost_${PAIMON_BOOST_BUILD_VERSION}.tar.gz + +# The first field is the name of the environment variable expected by cmake. +# This _must_ match what is defined. The second field is the name of the +# generated archive file. The third field is the url of the project for the +# given version. +DEPENDENCIES=( + "PAIMON_ZLIB_URL ${PAIMON_ZLIB_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/madler/zlib/releases/download/v${PAIMON_ZLIB_BUILD_VERSION}/zlib-${PAIMON_ZLIB_BUILD_VERSION}.tar.gz" + "PAIMON_ZSTD_URL ${PAIMON_ZSTD_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/facebook/zstd/releases/download/v${PAIMON_ZSTD_BUILD_VERSION}/zstd-${PAIMON_ZSTD_BUILD_VERSION}.tar.gz" + "PAIMON_SNAPPY_URL ${PAIMON_SNAPPY_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/google/snappy/archive/${PAIMON_SNAPPY_BUILD_VERSION}.tar.gz" + "PAIMON_LZ4_URL ${PAIMON_LZ4_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/lz4/lz4/archive/${PAIMON_LZ4_BUILD_VERSION}.tar.gz" + "PAIMON_PROTOBUF_URL ${PAIMON_PROTOBUF_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/protocolbuffers/protobuf/releases/download/v${PAIMON_PROTOBUF_BUILD_VERSION}/protobuf-all-${PAIMON_PROTOBUF_BUILD_VERSION}.tar.gz" + "PAIMON_TBB_URL ${PAIMON_TBB_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/uxlfoundation/oneTBB/archive/refs/tags/${PAIMON_TBB_BUILD_VERSION}.tar.gz" + "PAIMON_ORC_URL ${PAIMON_ORC_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/apache/orc/archive/refs/tags/${PAIMON_ORC_BUILD_VERSION}.tar.gz" + "PAIMON_GTEST_URL ${PAIMON_GTEST_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/google/googletest/archive/release-${PAIMON_GTEST_BUILD_VERSION}.tar.gz" + "PAIMON_ARROW_URL ${PAIMON_ARROW_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/apache/arrow/releases/download/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}.tar.gz" + "PAIMON_AVRO_URL ${PAIMON_AVRO_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/apache/avro/archive/${PAIMON_AVRO_BUILD_VERSION}.tar.gz" + "PAIMON_FMT_URL ${PAIMON_FMT_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/fmtlib/fmt/archive/refs/tags/${PAIMON_FMT_BUILD_VERSION}.tar.gz" + "PAIMON_GLOG_URL ${PAIMON_GLOG_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/google/glog/archive/${PAIMON_GLOG_BUILD_VERSION}.tar.gz" + "PAIMON_RAPIDJSON_URL ${PAIMON_RAPIDJSON_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/miloyip/rapidjson/archive/${PAIMON_RAPIDJSON_BUILD_VERSION}.tar.gz" + "PAIMON_RE2_URL ${PAIMON_RE2_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/google/re2/archive/${PAIMON_RE2_BUILD_VERSION}.tar.gz" + "PAIMON_JINDOSDK_C_LINUX_X86_64_URL ${PAIMON_JINDOSDK_C_LINUX_X86_64_PKG_NAME} https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux.tar.gz" + "PAIMON_JINDOSDK_C_LINUX_AARCH64_URL ${PAIMON_JINDOSDK_C_LINUX_AARCH64_PKG_NAME} https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux-el7-aarch64.tar.gz" + "PAIMON_JINDOSDK_C_MACOS_X86_64_URL ${PAIMON_JINDOSDK_C_MACOS_X86_64_PKG_NAME} https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-x86_64.tar.gz" + "PAIMON_JINDOSDK_C_MACOS_AARCH64_URL ${PAIMON_JINDOSDK_C_MACOS_AARCH64_PKG_NAME} https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-aarch64.tar.gz" + "PAIMON_LUCENE_URL ${PAIMON_LUCENE_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/luceneplusplus/LucenePlusPlus/archive/refs/tags/rel_${PAIMON_LUCENE_BUILD_VERSION}.tar.gz" + "PAIMON_LIMONP_URL ${PAIMON_LIMONP_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/limonp/archive/refs/tags/v${PAIMON_LIMONP_BUILD_VERSION}.tar.gz" + "PAIMON_JIEBA_URL ${PAIMON_JIEBA_PKG_NAME} ${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/cppjieba/archive/refs/tags/${PAIMON_JIEBA_BUILD_VERSION}.tar.gz" +)