From e08e1b3876676dfff5483e1a368b0857a682f850 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 23 Jun 2026 12:08:09 -0700 Subject: [PATCH 01/39] Scalable Topics: typed C++ SDK public API (pulsar::st) Header-only public API for the scalable-topics SDK under a new pulsar::st namespace (PIP-460/468/483): client, producers, the three consumer modes, transactions, schemas (reflect-cpp JSON/Avro and protobuf), and the Expected/Future result types, plus examples under examples/st. API definition only -- no lib/st implementation or C API yet. The new API requires C++20; the rest of the client stays C++17. Signed-off-by: Matteo Merli --- examples/CMakeLists.txt | 30 ++ examples/st/README.md | 40 ++ examples/st/SampleStCheckpointConsumer.cc | 69 +++ examples/st/SampleStJsonSchema.cc | 85 ++++ examples/st/SampleStProducer.cc | 91 ++++ examples/st/SampleStQueueConsumer.cc | 68 +++ examples/st/SampleStStreamConsumer.cc | 63 +++ include/pulsar/st/AvroSchema.h | 76 +++ include/pulsar/st/Checkpoint.h | 121 +++++ include/pulsar/st/CheckpointConsumer.h | 318 ++++++++++++ include/pulsar/st/Client.h | 360 +++++++++++++ include/pulsar/st/Consumer.h | 77 +++ include/pulsar/st/Error.h | 103 ++++ include/pulsar/st/Expected.h | 305 +++++++++++ include/pulsar/st/Future.h | 205 ++++++++ include/pulsar/st/JsonSchema.h | 76 +++ include/pulsar/st/Message.h | 237 +++++++++ include/pulsar/st/MessageId.h | 109 ++++ include/pulsar/st/Policies.h | 155 ++++++ include/pulsar/st/Producer.h | 478 ++++++++++++++++++ include/pulsar/st/ProtobufNativeSchema.h | 71 +++ include/pulsar/st/QueueConsumer.h | 372 ++++++++++++++ include/pulsar/st/Schema.h | 275 ++++++++++ include/pulsar/st/StreamConsumer.h | 416 +++++++++++++++ include/pulsar/st/Transaction.h | 149 ++++++ .../pulsar/st/detail/CheckpointConsumerCore.h | 65 +++ include/pulsar/st/detail/ClientCore.h | 73 +++ include/pulsar/st/detail/Cxx20.h | 26 + include/pulsar/st/detail/MessageCore.h | 82 +++ include/pulsar/st/detail/ProducerCore.h | 65 +++ include/pulsar/st/detail/QueueConsumerCore.h | 68 +++ include/pulsar/st/detail/SharedState.h | 96 ++++ include/pulsar/st/detail/StreamConsumerCore.h | 69 +++ vcpkg.json | 4 + 34 files changed, 4897 insertions(+) create mode 100644 examples/st/README.md create mode 100644 examples/st/SampleStCheckpointConsumer.cc create mode 100644 examples/st/SampleStJsonSchema.cc create mode 100644 examples/st/SampleStProducer.cc create mode 100644 examples/st/SampleStQueueConsumer.cc create mode 100644 examples/st/SampleStStreamConsumer.cc create mode 100644 include/pulsar/st/AvroSchema.h create mode 100644 include/pulsar/st/Checkpoint.h create mode 100644 include/pulsar/st/CheckpointConsumer.h create mode 100644 include/pulsar/st/Client.h create mode 100644 include/pulsar/st/Consumer.h create mode 100644 include/pulsar/st/Error.h create mode 100644 include/pulsar/st/Expected.h create mode 100644 include/pulsar/st/Future.h create mode 100644 include/pulsar/st/JsonSchema.h create mode 100644 include/pulsar/st/Message.h create mode 100644 include/pulsar/st/MessageId.h create mode 100644 include/pulsar/st/Policies.h create mode 100644 include/pulsar/st/Producer.h create mode 100644 include/pulsar/st/ProtobufNativeSchema.h create mode 100644 include/pulsar/st/QueueConsumer.h create mode 100644 include/pulsar/st/Schema.h create mode 100644 include/pulsar/st/StreamConsumer.h create mode 100644 include/pulsar/st/Transaction.h create mode 100644 include/pulsar/st/detail/CheckpointConsumerCore.h create mode 100644 include/pulsar/st/detail/ClientCore.h create mode 100644 include/pulsar/st/detail/Cxx20.h create mode 100644 include/pulsar/st/detail/MessageCore.h create mode 100644 include/pulsar/st/detail/ProducerCore.h create mode 100644 include/pulsar/st/detail/QueueConsumerCore.h create mode 100644 include/pulsar/st/detail/SharedState.h create mode 100644 include/pulsar/st/detail/StreamConsumerCore.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 6ffb4078..81796c09 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -105,3 +105,33 @@ target_link_libraries(SampleReaderCApi ${CLIENT_LIBS} pulsarShar target_link_libraries(SampleKeyValueSchemaConsumer ${CLIENT_LIBS} pulsarShared) target_link_libraries(SampleKeyValueSchemaProducer ${CLIENT_LIBS} pulsarShared) target_link_libraries(SampleCustomLoggerCApi ${CLIENT_LIBS} pulsarShared) + +# --- Scalable topics (pulsar::st) examples --------------------------------- +# These use the new typed scalable-topics API under include/pulsar/st. Its +# implementation (lib/st) does not exist yet, so the examples are COMPILED here +# for header/API verification but are NOT linked into executables (there are no +# symbols to link against). Building this OBJECT library on every build keeps the +# examples from bit-rotting while the API is reviewed. +# +# TODO(scalable-topics): once lib/st lands, replace this with one +# add_executable + target_link_libraries(... pulsarShared) per file, exactly like +# the samples above. +set(SAMPLE_ST_SOURCES + st/SampleStProducer.cc + st/SampleStStreamConsumer.cc + st/SampleStQueueConsumer.cc + st/SampleStCheckpointConsumer.cc + st/SampleStJsonSchema.cc +) +# reflect-cpp powers jsonSchema() (reflection-based JSON SerDe + schema) and is +# a required dependency of the scalable-topics API. +find_package(reflectcpp CONFIG REQUIRED) + +add_library(StExamples OBJECT ${SAMPLE_ST_SOURCES}) +# The scalable-topics (pulsar::st) API targets C++20; the rest of the client stays +# C++17. Set the standard per-target so only this code requires C++20. +set_target_properties(StExamples PROPERTIES CXX_STANDARD 20 CXX_STANDARD_REQUIRED ON) +# PRIVATE link gives the object sources pulsarShared's and reflect-cpp's include +# directories; an OBJECT library is not itself linked, so the missing lib/st +# symbols are fine. +target_link_libraries(StExamples PRIVATE ${CLIENT_LIBS} pulsarShared reflectcpp::reflectcpp) diff --git a/examples/st/README.md b/examples/st/README.md new file mode 100644 index 00000000..f095f09d --- /dev/null +++ b/examples/st/README.md @@ -0,0 +1,40 @@ +# Scalable Topics (`pulsar::st`) — API preview examples + +These examples exercise the new typed scalable-topics C++ API under +[`include/pulsar/st/`](../../include/pulsar/st). They illustrate the proposed +surface and exist to gather community feedback. + +> **Status: API definition only.** The implementation (`lib/st/`) does not exist +> yet, so these examples **compile but do not yet link**. They are wired into the +> CMake build as a compile-only `OBJECT` library (`StExamples` in +> [`examples/CMakeLists.txt`](../CMakeLists.txt)) — header-verified on every build, +> but not linked. Once `lib/st` lands they become normal `add_executable` targets. + +The `pulsar::st` API requires **C++20** (the rest of the client stays C++17). +Syntax-check an example against the headers (no linking): + +```sh +clang++ -std=c++20 -I ../../include -Wall -fsyntax-only SampleStProducer.cc +``` + +| File | Shows | +|---|---| +| `SampleStProducer.cc` | blocking + asynchronous publishing, transactions | +| `SampleStStreamConsumer.cc` | ordered (per-key) delivery, cumulative ack | +| `SampleStQueueConsumer.cc` | parallel delivery, individual ack + nack, dead-letter | +| `SampleStCheckpointConsumer.cc`| externally held position via `Checkpoint` | +| `SampleStJsonSchema.cc` | a struct as JSON with zero boilerplate (`jsonSchema()`, reflect-cpp) | + +## API at a glance + +- **Typed builders** off one `PulsarClient`: `newProducer` / `newStreamConsumer` / + `newQueueConsumer` / `newCheckpointConsumer`, each taking a `Schema`. +- **Synchronous calls return `Expected`** (a stand-in for `std::expected`, + which is C++23): check it, or call `.value()` to throw `ClientException`. + `Expected` is `[[nodiscard]]`, so a failure cannot be silently dropped. +- **Asynchronous calls return `Future`**: `addListener(...)` to react on + completion without blocking, `get()` to block, or `co_await` it. +- **Schemas**: primitives are built in; structured types use `jsonSchema()` / + `avroSchema()` (reflect-cpp derives the SerDe **and** the declared schema from + the struct — no boilerplate), `protobufNativeSchema()`, or a custom + `Schema(serde)`. reflect-cpp is a required dependency of `pulsar::st`. diff --git a/examples/st/SampleStCheckpointConsumer.cc b/examples/st/SampleStCheckpointConsumer.cc new file mode 100644 index 00000000..7baa34c8 --- /dev/null +++ b/examples/st/SampleStCheckpointConsumer.cc @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Scalable-topics CheckpointConsumer: the application owns the position. Read, +// snapshot a Checkpoint, persist it externally, and later resume from it. + +#include + +#include +#include + +using namespace pulsar::st; + +int main() { + auto clientResult = PulsarClient::builder().serviceUrl("pulsar://localhost:6650").build(); + if (!clientResult) { + std::cerr << "failed to build client: " << clientResult.error() << "\n"; + return 1; + } + PulsarClient client = std::move(clientResult).value(); + + // Restore from a previously stored checkpoint if you have one; else start at + // the earliest message. (Checkpoint::fromByteArray(savedBytes) to resume.) + auto consumerResult = client.newCheckpointConsumer(Schema{}) + .topic("topic://public/default/orders") + .startPosition(Checkpoint::earliest()) + .create(); // NOTE: create(), not subscribe() + if (!consumerResult) { + std::cerr << "failed to create consumer: " << consumerResult.error() << "\n"; + return 1; + } + CheckpointConsumer consumer = std::move(consumerResult).value(); + + for (int i = 0; i < 5; i++) { + auto msg = consumer.receive(std::chrono::seconds(5)); + if (!msg) { + if (msg.error().result == ResultTimeout) break; + std::cerr << "receive failed: " << msg.error() << "\n"; + break; + } + std::cout << "read: " << msg->value() << "\n"; + } + + // Atomic position snapshot across all segments. Store the bytes yourself + // (Flink/Spark state backend, a file, etc.) — there is no broker-side cursor. + Checkpoint checkpoint = consumer.checkpoint(); + std::string persisted = checkpoint.toByteArray(); + std::cout << "checkpoint is " << persisted.size() << " bytes\n"; + + (void)consumer.close(); + (void)client.close(); + return 0; +} diff --git a/examples/st/SampleStJsonSchema.cc b/examples/st/SampleStJsonSchema.cc new file mode 100644 index 00000000..d647042d --- /dev/null +++ b/examples/st/SampleStJsonSchema.cc @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Passing a struct as JSON: `jsonSchema()` derives both the SerDe and the +// declared schema from the struct's fields (via reflect-cpp) — NO macros, NO base +// class, NO schema string, NO serializer. Nested structs and containers included. +// `avroSchema()` is identical for Avro. + +#include +#include + +#include +#include +#include + +// Plain value types — that is the entire schema "declaration". +struct Address { + std::string street; + std::string city; +}; +struct Order { + std::string orderId; + int quantity; + double unitPrice; + Address shipTo; // nested struct — handled automatically + std::vector tags; // container — handled automatically +}; + +using namespace pulsar::st; + +int main() { + auto clientResult = PulsarClient::builder().serviceUrl("pulsar://localhost:6650").build(); + if (!clientResult) { + std::cerr << clientResult.error() << "\n"; + return 1; + } + PulsarClient client = std::move(clientResult).value(); + + auto producerResult = + client.newProducer(jsonSchema()).topic("topic://public/default/orders").create(); + if (!producerResult) { + std::cerr << producerResult.error() << "\n"; + return 1; + } + Producer producer = std::move(producerResult).value(); + + Order order{"ord-1", 3, 9.99, {"1 Main St", "Springfield"}, {"priority", "gift"}}; + if (auto sent = producer.send(order); sent) { + std::cout << "sent " << *sent << "\n"; + } + + auto consumerResult = client.newStreamConsumer(jsonSchema()) + .topic("topic://public/default/orders") + .subscriptionName("orders-sub") + .subscribe(); + if (consumerResult) { + StreamConsumer consumer = std::move(consumerResult).value(); + if (auto msg = consumer.receive(std::chrono::seconds(5))) { + Order received = msg->value(); // decoded straight back into the struct + std::cout << received.orderId << " -> " << received.shipTo.city << "\n"; + consumer.acknowledgeCumulative(msg->id()); + } + (void)consumer.close(); + } + + (void)producer.close(); + (void)client.close(); + return 0; +} diff --git a/examples/st/SampleStProducer.cc b/examples/st/SampleStProducer.cc new file mode 100644 index 00000000..2252c3dd --- /dev/null +++ b/examples/st/SampleStProducer.cc @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Scalable-topics producer: blocking and asynchronous publishing. + +#include + +#include + +using namespace pulsar::st; + +int main() { + // One client per application; keep it for the whole lifetime. + auto clientResult = PulsarClient::builder().serviceUrl("pulsar://localhost:6650").build(); + if (!clientResult) { + std::cerr << "failed to build client: " << clientResult.error() << "\n"; + return 1; + } + PulsarClient client = std::move(clientResult).value(); + + auto producerResult = client.newProducer(Schema{}) + .topic("topic://public/default/orders") + .sendTimeout(std::chrono::seconds(30)) + .create(); + if (!producerResult) { + std::cerr << "failed to create producer: " << producerResult.error() << "\n"; + return 1; + } + Producer producer = std::move(producerResult).value(); + + // Blocking send: returns Expected (must be checked — [[nodiscard]]). + for (int i = 0; i < 10; i++) { + auto sent = producer.newMessage() + .key("order-" + std::to_string(i % 4)) // per-key ordering + .value("payload-" + std::to_string(i)) + .property("attempt", "1") + .send(); + if (sent) { + std::cout << "sent " << *sent << "\n"; + } else { + std::cerr << "send failed: " << sent.error() << "\n"; + } + } + + // Asynchronous send: react on completion without blocking. + producer.newMessage().key("order-async").value("async-payload").sendAsync().addListener( + [](const Expected& result) { + if (result) { + std::cout << "async sent " << *result << "\n"; + } else { + std::cerr << "async send failed: " << result.error() << "\n"; + } + }); + + // Transaction: produced messages become visible atomically on commit. + if (auto txnResult = client.newTransaction()) { + Transaction txn = *txnResult; + auto a = producer.newMessage().value("tx-a").transaction(txn).send(); + auto b = producer.newMessage().value("tx-b").transaction(txn).send(); + if (a && b) { + if (auto committed = txn.commit(); !committed) { + std::cerr << "commit failed: " << committed.error() << "\n"; + } + } else { + (void)txn.abort(); + } + } + + (void)producer.flush(); // await all sends issued before this call + if (auto closed = producer.close(); !closed) { + std::cerr << "close failed: " << closed.error() << "\n"; + } + (void)client.close(); + return 0; +} diff --git a/examples/st/SampleStQueueConsumer.cc b/examples/st/SampleStQueueConsumer.cc new file mode 100644 index 00000000..f8171440 --- /dev/null +++ b/examples/st/SampleStQueueConsumer.cc @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Scalable-topics QueueConsumer: parallel consumption, individual ack + nack, +// with a dead-letter policy. + +#include + +#include + +using namespace pulsar::st; + +int main() { + auto clientResult = PulsarClient::builder().serviceUrl("pulsar://localhost:6650").build(); + if (!clientResult) { + std::cerr << "failed to build client: " << clientResult.error() << "\n"; + return 1; + } + PulsarClient client = std::move(clientResult).value(); + + auto consumerResult = client.newQueueConsumer(Schema{}) + .topic("topic://public/default/orders") + .subscriptionName("shared-sub") + .deadLetterPolicy({.maxRedeliverCount = 5}) // DLQ after 5 redeliveries + .subscribe(); + if (!consumerResult) { + std::cerr << "failed to subscribe: " << consumerResult.error() << "\n"; + return 1; + } + QueueConsumer consumer = std::move(consumerResult).value(); + + // No ordering guarantee; ack each message individually, or nack to redeliver. + for (;;) { + auto msg = consumer.receive(std::chrono::seconds(5)); + if (!msg) { + if (msg.error().result == ResultTimeout) break; + std::cerr << "receive failed: " << msg.error() << "\n"; + break; + } + + const bool processed = !msg->value().empty(); + if (processed) { + consumer.acknowledge(msg->id()); // fire-and-forget; never blocks or errors + } else { + consumer.negativeAcknowledge(msg->id()); // schedule redelivery + } + } + + (void)consumer.close(); + (void)client.close(); + return 0; +} diff --git a/examples/st/SampleStStreamConsumer.cc b/examples/st/SampleStStreamConsumer.cc new file mode 100644 index 00000000..57d9f355 --- /dev/null +++ b/examples/st/SampleStStreamConsumer.cc @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Scalable-topics StreamConsumer: ordered (per-key) delivery with cumulative ack. + +#include + +#include + +using namespace pulsar::st; + +int main() { + auto clientResult = PulsarClient::builder().serviceUrl("pulsar://localhost:6650").build(); + if (!clientResult) { + std::cerr << "failed to build client: " << clientResult.error() << "\n"; + return 1; + } + PulsarClient client = std::move(clientResult).value(); + + auto consumerResult = client.newStreamConsumer(Schema{}) + .topic("topic://public/default/orders") + .subscriptionName("ordered-sub") + .subscriptionInitialPosition(SubscriptionInitialPosition::Earliest) + .subscribe(); + if (!consumerResult) { + std::cerr << "failed to subscribe: " << consumerResult.error() << "\n"; + return 1; + } + StreamConsumer consumer = std::move(consumerResult).value(); + + // Ordered delivery; a single cumulative ack advances every segment to this + // message's position (there is no individual ack in this mode). + for (int i = 0; i < 10; i++) { + auto msg = consumer.receive(std::chrono::seconds(10)); + if (!msg) { + if (msg.error().result == ResultTimeout) continue; + std::cerr << "receive failed: " << msg.error() << "\n"; + break; + } + std::cout << "key=" << msg->key().value_or("") << " value=" << msg->value() << "\n"; + consumer.acknowledgeCumulative(msg->id()); // fire-and-forget; never blocks or errors + } + + (void)consumer.close(); + (void)client.close(); + return 0; +} diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h new file mode 100644 index 00000000..a0ac13eb --- /dev/null +++ b/include/pulsar/st/AvroSchema.h @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +// avroSchema() is the Avro counterpart of jsonSchema(): reflect-cpp derives +// the SerDe and the Avro schema from T's fields — no per-type serializer. The +// reflect-cpp Avro backend is assumed always present (a required dependency). +// +// NOTE: the `rfl::` calls live here (not in a lib/st .cc) because the SerDe is a +// template instantiated on the user's `T` — that instantiation must happen in the +// including TU. reflect-cpp is therefore confined to this opt-in schema header, +// not the core API headers. + +namespace pulsar::st { + +/// @cond INTERNAL +/// Internal: the reflect-cpp-backed Avro SerDe used by avroSchema(). Not part +/// of the public API. +namespace detail { +template +struct AvroSerDe { + SchemaInfo info() const { return SchemaInfo(SchemaType::AVRO, "AVRO", rfl::avro::to_schema()); } + std::string encode(const T& value) const { return rfl::avro::write(value); } + T decode(const char* data, std::size_t size) const { + return rfl::avro::read(std::string(data, size)).value(); + } +}; +} // namespace detail +/// @endcond + +/** + * @brief Creates an Avro schema for `T`, with no boilerplate. + * + * The Avro counterpart of jsonSchema(): reflect-cpp derives both the SerDe and the + * Avro schema directly from the struct's fields, with no per-type serializer. + * + * @code + * auto producer = client.newProducer(avroSchema()).topic(t).create(); + * @endcode + * + * @tparam T the struct type to serialize as Avro; its fields must be reflectable + * by reflect-cpp. + * @return a `Schema` whose `encode`/`decode` use Avro. + * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes + * are not a valid Avro encoding for `T`. + */ +template +Schema avroSchema() { + return Schema(detail::AvroSerDe{}); +} + +} // namespace pulsar::st diff --git a/include/pulsar/st/Checkpoint.h b/include/pulsar/st/Checkpoint.h new file mode 100644 index 00000000..d3a32e34 --- /dev/null +++ b/include/pulsar/st/Checkpoint.h @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace pulsar::st { + +class CheckpointImpl; +class CheckpointFactory; + +/** + * @brief An opaque, serializable position vector marking a consistent point across + * all segments of a scalable topic. + * + * A `Checkpoint` is the only position type accepted by a `CheckpointConsumer`: + * unlike `MessageId`, which identifies a single message within one segment, a + * `Checkpoint` captures the read position of *every* segment at once, so it can + * express a position that spans the whole topic. + * + * The intended workflow is store-and-restore, owned entirely by the application: + * - `CheckpointConsumer::checkpoint()` captures an atomic snapshot of the current + * per-segment read positions; + * - the application serializes the snapshot with `toByteArray()` and persists the + * bytes in its own state backend; + * - on restart it rebuilds the `Checkpoint` with `fromByteArray()` and resumes + * from it via `CheckpointConsumerBuilder::startPosition(...)`. + * + * The value is opaque: it carries no observable ledger/entry/segment structure and + * is not comparable or ordered. Timestamp-based positioning is performed + * out-of-band through the `scalable-topics seek` admin operation, not through this + * type. + */ +class PULSAR_PUBLIC Checkpoint { + public: + /** + * @brief Construct an empty/invalid checkpoint. + * + * The result is falsy under `operator bool` and must not be passed as a start + * position; use `earliest()` or `latest()`, or a value restored from + * `fromByteArray()`, instead. + */ + Checkpoint(); + + /** + * @brief Well-known sentinel positioned before the earliest available message + * of every segment. + * + * Use as a start position to replay a scalable topic from the very beginning. + * + * @return Reference to the shared earliest-position sentinel. + */ + static const Checkpoint& earliest(); + + /** + * @brief Well-known sentinel positioned after the latest published message of + * every segment. + * + * Use as a start position to consume only messages published after the + * consumer is created. This is the default start position of + * `CheckpointConsumerBuilder`. + * + * @return Reference to the shared latest-position sentinel. + */ + static const Checkpoint& latest(); + + /** + * @brief Serialize this checkpoint to a portable binary form for external + * storage. + * + * The returned bytes are an opaque blob suitable for persisting in any state + * backend; restore them later with `fromByteArray()`. + * + * @return Byte string encoding the cross-segment position. + */ + std::string toByteArray() const; + + /** + * @brief Restore a `Checkpoint` previously produced by `toByteArray()`. + * + * @param data Bytes returned by an earlier `toByteArray()` call. + * @return The reconstructed `Checkpoint`. + */ + static Checkpoint fromByteArray(const std::string& data); + + /** + * @brief Test whether this checkpoint holds a valid position. + * + * @return `true` for a sentinel or a value restored from `fromByteArray()`; + * `false` for a default-constructed (empty) checkpoint. + */ + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class CheckpointFactory; + explicit Checkpoint(std::shared_ptr impl); + + std::shared_ptr impl_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h new file mode 100644 index 00000000..e5bb80d7 --- /dev/null +++ b/include/pulsar/st/CheckpointConsumer.h @@ -0,0 +1,318 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + + +/** + * @brief Configuration accumulated by `CheckpointConsumerBuilder`. + * + * Populated through the builder's fluent setters and consumed by + * `CheckpointConsumerBuilder::create()` / `createAsync()`; applications normally do + * not construct this directly. + */ +struct CheckpointConsumerConfig { + std::string topic; ///< Scalable topic to read. REQUIRED; no default. + Checkpoint startPosition = Checkpoint::latest(); ///< Position to start from. Default `Checkpoint::latest()`. + std::optional consumerGroup; ///< Consumer group to join. Unset (default) => ungrouped, reads every segment. + std::optional consumerName; ///< Human-readable consumer name. Unset (default) => auto-generated. + Properties properties; ///< Free-form key/value metadata attached to the consumer. Default empty. + SchemaInfo schema; ///< Schema descriptor; filled in from `Schema` by the builder. +}; + + + + +template +class CheckpointConsumerBuilder; + +/** + * @brief Unmanaged consumer with an externally held position — reader semantics, + * no broker-managed cursor and no acknowledgment (spec §7.3). + * + * A `CheckpointConsumer` reads from every segment of a scalable topic like a + * reader: the broker keeps no durable cursor for it and there is no acknowledgment. + * The application owns the position. It captures a `Checkpoint` with + * `checkpoint()`, persists it (see `Checkpoint::toByteArray()`), and on restart + * resumes from it through `CheckpointConsumerBuilder::startPosition(...)`. + * + * Instances are created with `CheckpointConsumerBuilder::create()` / + * `createAsync()` (note: not `subscribe()`, since there is no subscription). A + * default-constructed `CheckpointConsumer` is empty and falsy under `operator + * bool`. + * + * @tparam T Message payload type; decoded according to the configured `Schema`. + */ +template +class CheckpointConsumer { + public: + /** @brief Construct an empty, unusable consumer (falsy under `operator bool`). */ + CheckpointConsumer() = default; + + /** + * @brief Block until the next message is available and return it. + * + * Waits indefinitely for the next message at the current read position. + * + * @return `Expected>` holding the decoded message, or an `Error` if + * the receive fails (e.g. the consumer is closed or disconnected, or the + * payload cannot be decoded). + */ + Expected> receive() { return toTyped(core_.receiveAsync().get()); } + + /** + * @brief Block for at most `timeout` waiting for the next message. + * + * @param timeout Maximum time to wait (`std::chrono::milliseconds`). + * @return `Expected>` holding the decoded message, or an `Error`; a + * timeout surfaces as `Error{ResultTimeout}`. May also fail on + * close/disconnect or a decode error. + */ + Expected> receive(std::chrono::milliseconds timeout) { + return toTyped(core_.receiveAsync(timeout.count()).get()); + } + + /** + * @brief Asynchronously receive the next message. + * + * Non-blocking counterpart of `receive()`; the returned future completes when a + * message is available or the receive fails. + * + * @return `Future>` resolving to the decoded message, or an `Error`. + */ + Future> receiveAsync() { + Schema schema = schema_; + return core_.receiveAsync().thenApply( + [schema](const detail::MessageCore& core) { return Message(core, schema); }); + } + + /** + * @brief Block to receive up to `maxMessages`, returning early on `timeout`. + * + * Accumulates whatever messages are available, returning as soon as + * `maxMessages` is reached or `timeout` elapses (whichever comes first). The + * returned batch may contain fewer than `maxMessages` entries. + * + * @param maxMessages Maximum number of messages to return in one call. + * @param timeout Maximum time to wait for the batch to fill + * (`std::chrono::milliseconds`). + * @return `Expected>` holding the decoded batch, or an `Error` on + * failure. + */ + Expected> receiveMulti(int maxMessages, std::chrono::milliseconds timeout) { + return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout.count()).get()); + } + + /** + * @brief Capture an atomic snapshot of the current read positions across all + * segments (spec §8). + * + * Records, as a single consistent `Checkpoint`, the per-segment positions read + * so far. The snapshot is taken locally with no broker round-trip. Persist the + * returned value (see `Checkpoint::toByteArray()`) to be able to resume from + * exactly this point later. + * + * @return A `Checkpoint` representing the current cross-segment position. + */ + Checkpoint checkpoint() const { return core_.checkpoint(); } + + /** + * @brief Close the consumer and release its resources. + * + * Blocks until the close completes. After closing, no further receives succeed. + * + * @return `Expected` holding success, or an `Error` if the close failed. + */ + Expected close() { return core_.closeAsync().get(); } + + /** + * @brief Asynchronously close the consumer. + * + * Non-blocking counterpart of `close()`. + * + * @return `Future` resolving to success, or an `Error` on failure. + */ + Future closeAsync() { return core_.closeAsync(); } + + /** + * @brief Return the topic this consumer reads from. + * + * @return Reference to the topic name. + */ + const std::string& topic() const { return core_.topic(); } + + /** + * @brief Test whether this consumer is usable. + * + * @return `true` for a consumer produced by the builder; `false` for a + * default-constructed (empty) one. + */ + explicit operator bool() const { return static_cast(core_); } + + private: + template + friend class CheckpointConsumerBuilder; + CheckpointConsumer(detail::CheckpointConsumerCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} + + Expected> toTyped(Expected r) const { + if (r) return Message(*r, schema_); + return Expected>(r.error()); + } + Expected> toTypedBatch(Expected> r) const { + if (!r) return Expected>(r.error()); + std::vector> out; + out.reserve(r->size()); + for (auto& core : *r) out.emplace_back(core, schema_); + return Messages(std::move(out)); + } + + detail::CheckpointConsumerCore core_; + Schema schema_; +}; + +/** + * @brief Fluent builder for a `CheckpointConsumer`. + * + * Obtained from `PulsarClient`. Configure it through the chainable setters, then + * call the terminal `create()` / `createAsync()` to build the consumer. Note the + * terminal is `create()`, not `subscribe()`, because a checkpoint consumer has no + * broker-managed subscription. + * + * @tparam T Message payload type of the consumer being built. + */ +template +class CheckpointConsumerBuilder { + public: + /** + * @brief Set the scalable topic to read from. REQUIRED; no default. + * + * @param t Topic name. + * @return `*this`, for call chaining. + */ + CheckpointConsumerBuilder& topic(std::string t) { + config_.topic = std::move(t); + return *this; + } + /** + * @brief Set the position to start reading from. Default `Checkpoint::latest()`. + * + * Pass `Checkpoint::earliest()` to replay from the beginning, or a value + * restored via `Checkpoint::fromByteArray()` to resume from a persisted + * position. + * + * @param c Start position. + * @return `*this`, for call chaining. + */ + CheckpointConsumerBuilder& startPosition(Checkpoint c) { + config_.startPosition = std::move(c); + return *this; + } + /** + * @brief Join a consumer group so segments are shared across its members. + * + * Members of the same group divide the topic's segments among themselves; + * leaving this unset (the default) makes the consumer ungrouped, so it reads + * every segment on its own. + * + * @param g Consumer group name. + * @return `*this`, for call chaining. + */ + CheckpointConsumerBuilder& consumerGroup(std::string g) { + config_.consumerGroup = std::move(g); + return *this; + } + /** + * @brief Set a human-readable consumer name. Default: auto-generated when unset. + * + * @param n Consumer name. + * @return `*this`, for call chaining. + */ + CheckpointConsumerBuilder& consumerName(std::string n) { + config_.consumerName = std::move(n); + return *this; + } + /** + * @brief Attach a free-form key/value property to the consumer. + * + * May be called repeatedly to add several properties; defaults to none. + * + * @param k Property key. + * @param v Property value. + * @return `*this`, for call chaining. + */ + CheckpointConsumerBuilder& property(const std::string& k, const std::string& v) { + config_.properties[k] = v; + return *this; + } + + /** + * @brief Build the consumer, blocking until it is ready. + * + * @return `Expected>` holding the consumer, or an `Error` + * if creation failed. + */ + Expected> create() { return createAsync().get(); } + + /** + * @brief Asynchronously build the consumer. + * + * Non-blocking counterpart of `create()`. + * + * @return `Future>` resolving to the consumer, or an + * `Error` on failure. + */ + Future> createAsync() { + Schema schema = schema_; + CheckpointConsumerConfig config = config_; + config.schema = schema.info(); + return client_.createCheckpointAsync(std::move(config)) + .thenApply([schema](const detail::CheckpointConsumerCore& core) { + return CheckpointConsumer(core, schema); + }); + } + + private: + friend class PulsarClient; + CheckpointConsumerBuilder(detail::ClientCore client, Schema schema) + : client_(std::move(client)), schema_(std::move(schema)) {} + + detail::ClientCore client_; + Schema schema_; + CheckpointConsumerConfig config_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Client.h b/include/pulsar/st/Client.h new file mode 100644 index 00000000..3cd82529 --- /dev/null +++ b/include/pulsar/st/Client.h @@ -0,0 +1,360 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace pulsar::st { + +class PulsarClientBuilder; + +/** + * The heavyweight, thread-safe entry point of the scalable-topics SDK. It owns the + * connection pool, IO threads, and memory buffers. An application SHOULD create a + * single instance and keep it for the whole application lifetime, sharing it + * across all producers and consumers, and close it exactly once at shutdown + * (spec §3). + * + * A `PulsarClient` is a lightweight, copyable handle to that shared state, built + * only through `PulsarClient::builder()`. + */ +class PULSAR_PUBLIC PulsarClient { + public: + /** + * Begin configuring a client. + * + * This is the only way to obtain a PulsarClient: the default constructor is + * private. Set at least PulsarClientBuilder::serviceUrl, then call + * PulsarClientBuilder::build. + * + * @return a fresh, unconfigured client builder + */ + static PulsarClientBuilder builder(); + + /** + * Start building a producer for values of type `T`. + * + * The schema governs serialization and broker-side compatibility: built-ins + * cover `Bytes` (the default), `std::string` and numeric primitives; + * structured types use `jsonSchema()`, `avroSchema()`, + * `protobufNativeSchema()`, or a custom SerDe (see Schema.h and the + * dedicated schema headers). + * + * @tparam T the value type produced; defaults to `Bytes` (raw payload). + * @param schema the schema describing how `T` is encoded; defaults to the + * built-in schema for `T`. + * @return a ProducerBuilder to further configure and create the producer + */ + template + ProducerBuilder newProducer(Schema schema = {}) { + return ProducerBuilder(core_, std::move(schema)); + } + + /** + * Start building a stream consumer: an ordered, broker-managed, + * cumulative-ack consumer (spec §7.1). + * + * @tparam T the value type consumed; defaults to `Bytes` (raw payload). + * @param schema the schema describing how `T` is decoded; defaults to the + * built-in schema for `T`. + * @return a StreamConsumerBuilder to further configure and create the consumer + */ + template + StreamConsumerBuilder newStreamConsumer(Schema schema = {}) { + return StreamConsumerBuilder(core_, std::move(schema)); + } + + /** + * Start building a queue consumer: a parallel, broker-managed, + * individual-ack consumer (spec §7.2). + * + * @tparam T the value type consumed; defaults to `Bytes` (raw payload). + * @param schema the schema describing how `T` is decoded; defaults to the + * built-in schema for `T`. + * @return a QueueConsumerBuilder to further configure and create the consumer + */ + template + QueueConsumerBuilder newQueueConsumer(Schema schema = {}) { + return QueueConsumerBuilder(core_, std::move(schema)); + } + + /** + * Start building a checkpoint consumer: an unmanaged consumer whose read + * position is held by the client rather than the broker (spec §7.3). + * + * @tparam T the value type consumed; defaults to `Bytes` (raw payload). + * @param schema the schema describing how `T` is decoded; defaults to the + * built-in schema for `T`. + * @return a CheckpointConsumerBuilder to further configure and create the consumer + */ + template + CheckpointConsumerBuilder newCheckpointConsumer(Schema schema = {}) { + return CheckpointConsumerBuilder(core_, std::move(schema)); + } + + /** + * Open a new transaction synchronously (spec §9). + * + * Blocks until the transaction has been started by the broker. The + * transaction uses the default timeout configured via + * PulsarClientBuilder::transactionPolicy. + * + * @return the new Transaction, or an Error if it could not be started + */ + Expected newTransaction() { return core_.newTransactionAsync().get(); } + + /** + * Open a new transaction asynchronously (spec §9). + * + * @return a Future that completes with the new Transaction, or an Error if it + * could not be started + */ + Future newTransactionAsync() { return core_.newTransactionAsync(); } + + /** + * Close the client gracefully. + * + * Awaits all pending operations to complete, then releases every resource the + * client owns (connections, IO threads, buffers). Blocks until the shutdown + * finishes. Call this exactly once at application shutdown. + * + * @return an empty Expected on success, or an Error if the close failed + */ + Expected close() { return core_.closeAsync().get(); } + + /** + * Close the client gracefully, asynchronously. + * + * Like close() but returns immediately with a Future that completes once all + * pending operations have drained and resources have been released. + * + * @return a Future that completes empty on success, or with an Error if the + * close failed + */ + Future closeAsync() { return core_.closeAsync(); } + + /** + * Shut the client down immediately. + * + * Drops any pending operations without waiting for them and releases + * resources at once. Prefer close() for an orderly shutdown; use this only + * when a graceful close is not possible or not desired. + */ + void shutdown() { core_.shutdown(); } + + /** + * Test whether this handle refers to a live client. + * + * @return true if the handle is backed by shared client state; false if it + * has been moved from + */ + explicit operator bool() const { return static_cast(core_); } + + private: + friend class PulsarClientBuilder; + PulsarClient() = default; + explicit PulsarClient(detail::ClientCore core) : core_(std::move(core)) {} + + detail::ClientCore core_; +}; + +/** + * Configures and builds a `PulsarClient`. `serviceUrl` is the only required + * setting; everything else has sensible defaults and is grouped into policy + * objects (spec Appendix A). With C++20 designated initializers: + * + * auto client = PulsarClient::builder() + * .serviceUrl("pulsar://localhost:6650") + * .connectionPolicy({.connectionsPerBroker = 4, .connectionTimeout = 10s}) + * .build(); + */ +class PULSAR_PUBLIC PulsarClientBuilder { + public: + /** + * REQUIRED — the Pulsar endpoint, e.g. `pulsar://localhost:6650`. + * + * This is the only required setting; build() fails if it is not set. + * + * @param url the broker or proxy service URL to connect to + * @return `*this`, for call chaining + */ + PulsarClientBuilder& serviceUrl(std::string url) { + serviceUrl_ = std::move(url); + return *this; + } + + /** + * Set the authentication provider used when connecting to the broker. + * + * Optional. When unset, the client connects without authentication. + * + * @param auth the authentication provider to use + * @return `*this`, for call chaining + */ + PulsarClientBuilder& authentication(AuthenticationPtr auth) { + authentication_ = std::move(auth); + return *this; + } + + /** + * Set the number of threads used for network IO. + * + * Optional. Defaults to 1 when unset. + * + * @param n the number of IO threads + * @return `*this`, for call chaining + */ + PulsarClientBuilder& ioThreads(int n) { + ioThreads_ = n; + return *this; + } + /** + * Set the number of threads used to run message listeners. + * + * Optional. Defaults to 1 when unset. + * + * @param n the number of message-listener threads + * @return `*this`, for call chaining + */ + PulsarClientBuilder& messageListenerThreads(int n) { + messageListenerThreads_ = n; + return *this; + } + /** + * Set the client-wide memory budget for pending (in-flight) messages. + * + * Optional. When unset, the client applies its built-in default limit. + * + * @param size the maximum memory, in bytes, for buffered messages + * @return `*this`, for call chaining + */ + PulsarClientBuilder& memoryLimit(MemorySize size) { + memoryLimit_ = size; + return *this; + } + + /** + * Set connection-pool, lookup, and request-timeout tuning. + * + * Optional. Any field left unset within the policy falls back to the client + * default for that setting. + * + * @param policy the connection policy to apply + * @return `*this`, for call chaining + */ + PulsarClientBuilder& connectionPolicy(ConnectionPolicy policy) { + connectionPolicy_ = std::move(policy); + return *this; + } + /** + * Set the reconnection backoff policy. + * + * Optional. Any field left unset within the policy falls back to the client + * default for that bound. + * + * @param policy the backoff policy to apply + * @return `*this`, for call chaining + */ + PulsarClientBuilder& backoffPolicy(BackoffPolicy policy) { + backoffPolicy_ = std::move(policy); + return *this; + } + /** + * Set the transport security (TLS) policy. + * + * Optional. When unset, TLS is disabled and connections are plaintext. + * + * @param policy the TLS policy to apply + * @return `*this`, for call chaining + */ + PulsarClientBuilder& tlsPolicy(TlsPolicy policy) { + tlsPolicy_ = std::move(policy); + return *this; + } + /** + * Set client-wide transaction settings (spec §9). + * + * Optional. Transactions are always available; this only tunes the default + * transaction timeout. When unset, the client applies its built-in default. + * + * @param policy the transaction policy to apply + * @return `*this`, for call chaining + */ + PulsarClientBuilder& transactionPolicy(TransactionPolicy policy) { + transactionPolicy_ = std::move(policy); + return *this; + } + + /** + * Set the advertised listener name to use for broker discovery. + * + * Optional. Used in multi-listener deployments to select which set of + * advertised addresses the client connects through. When unset, the broker's + * default listener is used. + * + * @param name the configured listener name + * @return `*this`, for call chaining + */ + PulsarClientBuilder& listenerName(std::string name) { + listenerName_ = std::move(name); + return *this; + } + + /** + * Build the client from the configured settings. + * + * @return the new PulsarClient on success, or an Error describing the + * configuration problem (for example, a missing or invalid + * serviceUrl()) + */ + Expected build(); + + private: + std::string serviceUrl_; + AuthenticationPtr authentication_; + std::optional ioThreads_; + std::optional messageListenerThreads_; + std::optional memoryLimit_; + ConnectionPolicy connectionPolicy_; + BackoffPolicy backoffPolicy_; + TlsPolicy tlsPolicy_; + TransactionPolicy transactionPolicy_; + std::optional listenerName_; +}; + +inline PulsarClientBuilder PulsarClient::builder() { return PulsarClientBuilder{}; } + +} // namespace pulsar::st diff --git a/include/pulsar/st/Consumer.h b/include/pulsar/st/Consumer.h new file mode 100644 index 00000000..81b430b9 --- /dev/null +++ b/include/pulsar/st/Consumer.h @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include + +#include +#include +#include + +/** + * @file + * Enumerations and policy types shared across the consumer modes (spec §7). + */ + +namespace pulsar::st { + +/** + * Where a brand-new subscription starts reading. + * + * Determines the initial position of the cursor the first time a subscription is + * created. It is ignored once the subscription exists and has a durable cursor: + * an already-established subscription always resumes from its stored position. + */ +enum class SubscriptionInitialPosition { + Earliest, ///< Start from the oldest available message on the topic. + Latest ///< Start from the newest message, skipping anything published before subscribing. +}; + +/** + * Acknowledgment tuning (mirrors the Java v5 config package). + * + * Controls how the consumer batches acknowledgments and, for a QueueConsumer, how + * long redelivery is delayed after a negative acknowledgment. Both fields are + * optional and fall back to the client default when unset. + */ +struct AckPolicy { + /** Time window over which acknowledgments are batched before being sent, in milliseconds; 0 acks immediately. Unset uses the client default. */ + std::optional groupTime; + /** Delay before a negatively-acknowledged message is redelivered, in milliseconds. QueueConsumer only. Unset uses the client default. */ + std::optional negativeAckRedeliveryDelay; +}; + +/** + * Dead-letter handling for a QueueConsumer (spec §7.2). + * + * When a message is redelivered more than #maxRedeliverCount times, it is moved + * to a dead-letter topic instead of being redelivered indefinitely. Dead-lettering + * applies to a QueueConsumer only and is disabled unless #maxRedeliverCount is set + * to a positive value. + */ +struct DeadLetterPolicy { + /** Maximum number of redeliveries before a message is routed to the dead-letter topic. Defaults to 0, which disables dead-lettering. */ + int maxRedeliverCount = 0; + /** Name of the dead-letter topic. Unset defaults to "<topic>-<subscription>-DLQ". */ + std::optional deadLetterTopic; + /** If set, creates this subscription on the dead-letter topic up front so no messages are missed before a consumer attaches. Unset creates no initial subscription. */ + std::optional initialSubscriptionName; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Error.h b/include/pulsar/st/Error.h new file mode 100644 index 00000000..9e90e016 --- /dev/null +++ b/include/pulsar/st/Error.h @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace pulsar::st { + +// The scalable-topics SDK reuses the existing `pulsar::Result` code taxonomy and +// the {result, message} `Error` pair rather than introducing a parallel one: the +// underlying core already emits `pulsar::Result`, so reusing it avoids a lossy +// translation layer and keeps `strResult()` / error strings consistent. C++20 +// `using enum` re-exports the result codes into `pulsar::st`, so they are usable +// unqualified here (e.g. `ResultTimeout`) — no scoped parallel enum needed. +/** Re-export of `pulsar::Error`: the `{result, message}` pair describing a failure. */ +using pulsar::Error; +/** Re-export of `pulsar::Result`: the enumeration of machine-readable result codes. */ +using pulsar::Result; +/** Re-export the `Result` enumerators into `pulsar::st` so they are usable unqualified (e.g. `ResultTimeout`). */ +using enum pulsar::Result; + +/** + * The exception type of the scalable-topics API, wrapping a {result, message} + * pair. The API is **non-throwing by default**: synchronous calls return + * `Expected` and asynchronous calls deliver `Expected` to a `Future` + * listener. `ClientException` is thrown only when you opt in — by calling + * `Expected::value()` on a result that holds an error. Code that never calls + * `value()` never throws (and the C API under `pulsar/c/st/` is fully non-throwing + * and ABI-stable). + */ +class PULSAR_PUBLIC ClientException : public std::exception { + public: + /** + * Construct from a result code and detail message. + * + * The `what()` string is composed from the code's `strResult()` name and, when + * non-empty, @p message. + * + * @param result the machine-readable result code describing the failure. + * @param message the human-readable detail message (may be empty). + */ + ClientException(Result result, std::string message) + : error_{result, std::move(message)}, + what_(error_.message.empty() ? std::string{strResult(result)} + : std::string{strResult(result)} + ": " + error_.message) {} + + /** + * Construct from an existing `{result, message}` `Error` pair. + * + * This is the constructor `Expected::value()` uses to turn a stored error + * into a thrown exception. + * + * @param error the error pair to wrap. + */ + explicit ClientException(Error error) : ClientException(error.result, std::move(error.message)) {} + + /** The machine-readable result code. */ + Result result() const noexcept { return error_.result; } + + /** The human-readable detail message (may be empty). */ + const std::string& message() const noexcept { return error_.message; } + + /** The full {result, message} pair. */ + const Error& error() const noexcept { return error_; } + + /** + * The formatted exception description, as required by `std::exception`. + * + * Combines the result code's `strResult()` name with the detail message when one + * is present. + * + * @return a null-terminated string owned by this exception. + */ + const char* what() const noexcept override { return what_.c_str(); } + + private: + Error error_; + std::string what_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Expected.h b/include/pulsar/st/Expected.h new file mode 100644 index 00000000..05871493 --- /dev/null +++ b/include/pulsar/st/Expected.h @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +// A C++17 stand-in for std::expected (C++23), used as the single +// synchronous result type of the scalable-topics API. The public surface mirrors +// std::expected so call sites read identically on every toolchain; on compilers +// that ship we could later alias to std::expected without changing +// user code. The one intentional deviation: `value()` on an error throws our +// `ClientException` (not std::bad_expected_access), keeping one error channel. + +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) +#define PULSAR_ST_THROW(ex) throw(ex) +#else +#define PULSAR_ST_THROW(ex) (static_cast(ex), std::abort()) +#endif + +namespace pulsar::st { + +/** Wrap an `Error` to construct an `Expected` in its error state. */ +struct Unexpected { + /** The wrapped error to seed the `Expected`'s error state. */ + Error error; +}; + +/** + * Build an `Unexpected` from an existing `Error`. + * + * Convenience factory for returning a failure from a function whose return type + * is an `Expected`, mirroring `std::unexpected`. + * + * @param error the error to wrap. + * @return an `Unexpected` carrying @p error, implicitly convertible to any `Expected`. + */ +inline Unexpected unexpected(Error error) { return Unexpected{std::move(error)}; } + +/** + * Build an `Unexpected` from a result code and an optional detail message. + * + * Convenience overload that constructs the `Error` pair in place, so call sites + * can write `return unexpected(ResultTimeout, "...")` without naming `Error`. + * + * @param result the machine-readable result code describing the failure. + * @param message an optional human-readable detail message (empty by default). + * @return an `Unexpected` carrying `Error{result, message}`. + */ +inline Unexpected unexpected(Result result, std::string message = {}) { + return Unexpected{Error{result, std::move(message)}}; +} + +/** + * Holds either a value of type `T` or an `Error`. Returned by every synchronous + * operation. `[[nodiscard]]` so a failure can't be silently dropped — the one + * weakness of value-based errors, closed at compile time. + */ +template +class [[nodiscard]] Expected { + public: + /** The contained value type. */ + using value_type = T; + /** The error type held in the failure state. */ + using error_type = Error; + + /** Construct in the value state by copying @p value. */ + Expected(const T& value) : storage_(std::in_place_index<0>, value) {} + /** Construct in the value state by moving @p value. */ + Expected(T&& value) : storage_(std::in_place_index<0>, std::move(value)) {} + /** Construct in the error state by copying @p error. */ + Expected(const Error& error) : storage_(std::in_place_index<1>, error) {} + /** Construct in the error state by moving @p error. */ + Expected(Error&& error) : storage_(std::in_place_index<1>, std::move(error)) {} + /** Construct in the error state from an `Unexpected` wrapper. */ + Expected(Unexpected u) : storage_(std::in_place_index<1>, std::move(u.error)) {} + + /** + * Whether this holds a value (as opposed to an error). + * + * @return `true` if a value is present, `false` if it holds an error. + */ + bool has_value() const noexcept { return storage_.index() == 0; } + + /** + * Whether this holds a value, for use in a boolean context. + * + * Equivalent to `has_value()`. Enables the non-throwing pattern + * `if (result) { use(*result); } else { handle(result.error()); }`. + * + * @return `true` if a value is present, `false` if it holds an error. + */ + explicit operator bool() const noexcept { return has_value(); } + + /** + * Returns the value, or throws `ClientException` if this holds an error. + * + * This mirrors `std::expected::value()`, which throws + * `std::bad_expected_access` on an error. Modern C++ has no checked + * exception specifications (dynamic specs were removed in C++17), so the thrown + * type is documented here, not declared in the signature. Under `-fno-exceptions` + * this aborts. Use `operator bool` + `operator*` for the non-throwing path. + */ + const T& value() const& { + if (!has_value()) PULSAR_ST_THROW(ClientException(std::get<1>(storage_))); + return std::get<0>(storage_); + } + T& value() & { + if (!has_value()) PULSAR_ST_THROW(ClientException(std::get<1>(storage_))); + return std::get<0>(storage_); + } + T&& value() && { + if (!has_value()) PULSAR_ST_THROW(ClientException(std::get<1>(storage_))); + return std::get<0>(std::move(storage_)); + } + + /** + * Unchecked access to the contained value. + * + * Unlike `value()`, this never throws and performs no check. Behaviour is + * undefined if this holds an error; verify with `operator bool` first. + * + * @return a reference to the contained value (lvalue or rvalue per ref-qualifier). + */ + const T& operator*() const& noexcept { return std::get<0>(storage_); } + /** @copydoc operator*() const& */ + T& operator*() & noexcept { return std::get<0>(storage_); } + /** @copydoc operator*() const& */ + T&& operator*() && noexcept { return std::get<0>(std::move(storage_)); } + + /** + * Unchecked member access to the contained value. + * + * Behaviour is undefined if this holds an error; verify with `operator bool` + * first. + * + * @return a pointer to the contained value. + */ + const T* operator->() const noexcept { return std::get_if<0>(&storage_); } + /** @copydoc operator->() const */ + T* operator->() noexcept { return std::get_if<0>(&storage_); } + + /** + * Access the contained error. + * + * @pre `!has_value()`. Behaviour is undefined if this holds a value. + * @return a reference to the contained `Error`. + */ + const Error& error() const& noexcept { return std::get<1>(storage_); } + /** @copydoc error() const& */ + Error& error() & noexcept { return std::get<1>(storage_); } + + /** + * Return the contained value, or @p fallback if this holds an error. + * + * @tparam U a type convertible to `T`. + * @param fallback the value to return when no value is present. + * @return a copy of the contained value, or `static_cast(fallback)` on error. + */ + template + T value_or(U&& fallback) const& { + return has_value() ? std::get<0>(storage_) : static_cast(std::forward(fallback)); + } + + /** + * Monadic chaining: invoke @p f on the value, or propagate the error. + * + * If this holds a value, returns `f(value)` — which must itself be an + * `Expected`. If this holds an error, the error is forwarded unchanged into a + * fresh `Expected` of @p f's return type, and @p f is not called. Mirrors + * `std::expected::and_then`. + * + * @tparam F a callable taking `const T&` and returning some `Expected`. + * @param f the continuation to invoke on the value. + * @return `f(value)` on success, or that result type's error state on failure. + */ + template + auto and_then(F&& f) const& { + using R = std::remove_cv_t>>; + return has_value() ? std::forward(f)(std::get<0>(storage_)) : R(error()); + } + + /** + * Monadic mapping: transform the value through @p f, or propagate the error. + * + * If this holds a value, returns `Expected(f(value))` where `U` is @p f's + * return type. If this holds an error, the error is forwarded unchanged and + * @p f is not called. Mirrors `std::expected::transform`. + * + * @tparam F a callable taking `const T&` and returning a plain value `U`. + * @param f the mapping function to apply to the value. + * @return `Expected` holding `f(value)` on success, or the error on failure. + */ + template + auto transform(F&& f) const& { + using U = std::remove_cv_t>>; + return has_value() ? Expected(std::forward(f)(std::get<0>(storage_))) + : Expected(error()); + } + + /** + * Monadic error recovery: invoke @p f on the error, or pass the value through. + * + * If this holds an error, returns `f(error)` — used to recover or substitute an + * alternative result. If this holds a value, `*this` is returned unchanged and + * @p f is not called. Mirrors `std::expected::or_else`. + * + * @tparam F a callable taking `const Error&` and returning an `Expected`. + * @param f the recovery function to invoke on the error. + * @return `*this` on success, or `f(error)` on failure. + */ + template + Expected or_else(F&& f) const& { + return has_value() ? *this : std::forward(f)(error()); + } + + private: + std::variant storage_; +}; + +/** + * Specialization for value-less results (`close`, `flush`, `commit`, …). + * + * Carries no value: it is either in the success state or holds an `Error`. Used as + * the synchronous result type of operations that either succeed or fail without + * producing data. + */ +template <> +class [[nodiscard]] Expected { + public: + /** The (absent) value type. */ + using value_type = void; + /** The error type held in the failure state. */ + using error_type = Error; + + /** Construct in the success state. */ + Expected() noexcept = default; // success + /** Construct in the error state by copying @p error. */ + Expected(const Error& error) : error_(error) {} + /** Construct in the error state by moving @p error. */ + Expected(Error&& error) : error_(std::move(error)) {} + /** Construct in the error state from an `Unexpected` wrapper. */ + Expected(Unexpected u) : error_(std::move(u.error)) {} + + /** + * Whether this represents success (as opposed to an error). + * + * @return `true` on success, `false` if it holds an error. + */ + bool has_value() const noexcept { return !error_.has_value(); } + + /** + * Whether this represents success, for use in a boolean context. + * + * @return `true` on success, `false` if it holds an error. + */ + explicit operator bool() const noexcept { return has_value(); } + + /** + * Throw `ClientException` if this holds an error; otherwise return normally. + * + * The value-less analogue of `Expected::value()`: it has no value to yield, + * so on success it simply returns. As with the primary template, the thrown + * type is documented rather than declared in the signature; under + * `-fno-exceptions` this aborts. + */ + void value() const { + if (!has_value()) PULSAR_ST_THROW(ClientException(*error_)); + } + + /** + * Access the contained error. + * + * @pre `!has_value()`. Behaviour is undefined on a success value. + * @return a reference to the contained `Error`. + */ + const Error& error() const& noexcept { return *error_; } + + private: + std::optional error_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Future.h b/include/pulsar/st/Future.h new file mode 100644 index 00000000..000f06f9 --- /dev/null +++ b/include/pulsar/st/Future.h @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + +namespace detail { +template +class Promise; +} + +/** + * The result of an asynchronous operation, available now or later. + * + * Unlike `std::future`, this is **continuation-capable**: `addListener()` runs a + * callback when the operation completes, so you can react without blocking. The + * callback receives an `Expected` (value or error). `get()` is available when + * you do want to block. On a C++20 toolchain the future is also `co_await`-able. + * + * A Future is cheap to copy (it shares the underlying state). Listeners run on + * whichever thread completes the operation — do not block inside one. + * + * @tparam T the type of the value the operation produces (`void` for value-less + * operations, which complete with an `Expected`). + */ +template +class Future { + public: + /** Callback type accepted by `addListener`, invoked with `const Expected&`. */ + using Listener = typename detail::SharedState::Listener; + + /** + * Register a continuation to run when the operation completes. + * + * @p listener is invoked with the `Expected` result (value or error) on the + * thread that completes the operation, or synchronously on the calling thread if + * the operation has already completed. It does not block. Do not block inside + * the listener. Multiple listeners may be registered. + * + * @param listener the callback to invoke on completion. + * @return `*this`, to allow chaining. + */ + Future& addListener(Listener listener) { + state_->addListener(std::move(listener)); + return *this; + } + + /** + * Block the calling thread until the operation completes and return its result. + * + * @return the `Expected` result, holding either the value or the error. + */ + Expected get() const { return state_->get(); } + + /** + * Block until the operation completes or @p timeout elapses, whichever first. + * + * @tparam Rep the `std::chrono::duration` representation type. + * @tparam Period the `std::chrono::duration` period type. + * @param timeout the maximum time to wait for completion. + * @return the `Expected` result if it completed in time, or `std::nullopt` + * if @p timeout elapsed first. + */ + template + std::optional> get(std::chrono::duration timeout) const { + return state_->get(timeout); + } + + /** + * Whether the operation has already completed. + * + * @return `true` if the result is available (so `get()` would not block), + * `false` otherwise. + */ + bool isReady() const { return state_->isReady(); } + + /** + * Return a new Future whose value is `f` applied to this one's value on + * completion; an error propagates unchanged. Lets a typed facade map a Future + * of a (non-templated) core into a Future of its typed wrapper. + * + * @p f is applied on whichever thread completes this Future. If this Future + * completes with an error, @p f is not called and the error is forwarded to the + * returned Future. Only participates in overload resolution when `T` is not + * `void`. + * + * @tparam F a callable taking `const T&` and returning the mapped value. + * @tparam U defaults to `T`; an implementation detail of the `void` constraint. + * @param f the mapping function to apply to the value. + * @return a `Future` of `f`'s return type, completed with the mapped value on + * success or the propagated error on failure. + */ + template , int> = 0> + Future> thenApply(F f) const { + using R = std::invoke_result_t; + detail::Promise promise; + state_->addListener([promise, f = std::move(f)](const Expected& result) { + if (result) { + promise.setValue(f(*result)); + } else { + promise.setError(result.error()); + } + }); + return promise.getFuture(); + } + + /** + * Coroutine support: whether the awaiting coroutine may skip suspension. + * + * Part of the C++20 awaitable interface so a `Future` can be used as + * `Expected r = co_await someFuture;`. Not called directly. + * + * @return `true` if the result is already available, `false` otherwise. + */ + bool await_ready() const { return state_->isReady(); } + + /** + * Coroutine support: suspend the awaiting coroutine until completion. + * + * Registers a listener that resumes @p handle when the operation completes. Part + * of the C++20 awaitable interface; not called directly. + * + * @param handle the suspended coroutine to resume on completion. + */ + void await_suspend(std::coroutine_handle<> handle) { + state_->addListener([handle](const Expected&) { handle.resume(); }); + } + + /** + * Coroutine support: produce the value of a `co_await` expression. + * + * Part of the C++20 awaitable interface; not called directly. + * + * @return the `Expected` result of the awaited operation. + */ + Expected await_resume() const { return state_->get(); } + + private: + template + friend class detail::Promise; + explicit Future(std::shared_ptr> state) : state_(std::move(state)) {} + + std::shared_ptr> state_; +}; + +namespace detail { + +/** + * INTERNAL producing side of a `Future`. The SDK fulfils these to complete the + * futures it returns; applications only ever consume `Future` and never build + * a Promise, so it lives in `detail` rather than on the public surface. + */ +template +class Promise { + public: + Promise() : state_(std::make_shared>()) {} + + Future getFuture() const { return Future(state_); } + + bool complete(Expected result) const { return state_->complete(std::move(result)); } + bool setError(Error error) const { return state_->complete(Expected(std::move(error))); } + + template , int> = 0> + bool setValue(U value) const { + return state_->complete(Expected(std::move(value))); + } + + template , int> = 0> + bool setSuccess() const { + return state_->complete(Expected()); + } + + private: + std::shared_ptr> state_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h new file mode 100644 index 00000000..03af1a22 --- /dev/null +++ b/include/pulsar/st/JsonSchema.h @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +// jsonSchema() derives BOTH the JSON SerDe and the declared schema from T's +// fields via reflect-cpp (https://github.com/getml/reflect-cpp) — no per-type +// serializer, no schema string. This is the Jackson-equivalent for the Java +// client: you pass a plain struct and it just works. reflect-cpp is a required +// dependency of the scalable-topics API (C++20). + +namespace pulsar::st { + +/// @cond INTERNAL +/// Internal: the reflect-cpp-backed JSON SerDe used by jsonSchema(). Not part +/// of the public API. +namespace detail { +template +struct JsonSerDe { + SchemaInfo info() const { return SchemaInfo(SchemaType::JSON, "JSON", rfl::json::to_schema()); } + std::string encode(const T& value) const { return rfl::json::write(value); } + T decode(const char* data, std::size_t size) const { + return rfl::json::read(std::string(data, size)).value(); + } +}; +} // namespace detail +/// @endcond + +/** + * @brief Creates a JSON schema for `T`, with no boilerplate. + * + * reflect-cpp derives both the JSON SerDe and the declared schema directly from + * the struct's fields (nested structs and containers included) — there is no + * per-type serializer or hand-written schema string. This is the equivalent of the + * Java client's Jackson-based JSON schema: pass a plain struct and it just works. + * + * @code + * struct Order { std::string id; int qty; }; // the whole "declaration" + * auto producer = client.newProducer(jsonSchema()).topic(t).create(); + * @endcode + * + * @tparam T the struct type to serialize as JSON; its fields must be reflectable + * by reflect-cpp. + * @return a `Schema` whose `encode`/`decode` use JSON. + * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes + * are not valid JSON for `T`. + */ +template +Schema jsonSchema() { + return Schema(detail::JsonSerDe{}); +} + +} // namespace pulsar::st diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h new file mode 100644 index 00000000..055ba7a7 --- /dev/null +++ b/include/pulsar/st/Message.h @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + +/** + * A message received from a scalable topic, carrying a value of type `T`. + * + * The value is decoded lazily through `Schema` on each `value()` call; the raw + * bytes and all metadata are available without decoding. + * + * @tparam T the application type the payload decodes to, via `Schema`. + */ +template +class Message { + public: + /** Construct an empty message (`operator bool` is `false`). */ + Message() = default; + + /** + * Wrap a received core payload together with the schema used to decode it. + * + * Constructed by the SDK on delivery; applications obtain `Message` objects from + * a consumer rather than building them. + * + * @param core the raw received message (payload and metadata). + * @param schema the schema used to decode the payload in `value()`. + */ + Message(detail::MessageCore core, Schema schema) : core_(std::move(core)), schema_(std::move(schema)) {} + + /** + * Decode the payload through `Schema` and return the typed value. + * + * Decoding happens on every call (the result is not cached). May throw if the + * payload bytes are malformed for the schema. + * + * @return the decoded value of type `T`. + */ + T value() const { return schema_.decode(core_.data(), core_.size()); } + + /** + * Pointer to the raw, undecoded payload bytes. + * + * @return a pointer to `size()` bytes of payload, valid for this message's lifetime. + */ + const char* data() const { return core_.data(); } + + /** + * Size of the raw payload in bytes. + * + * @return the number of bytes pointed to by `data()`. + */ + std::size_t size() const { return core_.size(); } + + /** + * The message's position within the topic. + * + * @return the `MessageId` identifying this message. + */ + MessageId id() const { return core_.id(); } + + /** + * The optional partition/routing key the message was published with. + * + * @return the key, or `std::nullopt` if the message has none. + */ + std::optional key() const { + return core_.hasKey() ? std::optional(core_.key()) : std::nullopt; + } + + /** + * The application-defined string properties attached to the message. + * + * @return a reference to the properties map. + */ + const Properties& properties() const { return core_.properties(); } + + /** + * The broker-assigned publish time. + * + * @return the timestamp at which the message was published. + */ + Timestamp publishTime() const { return Timestamp(std::chrono::milliseconds(core_.publishTimeMs())); } + + /** + * The optional application-supplied event time. + * + * @return the event time, or `std::nullopt` if the producer did not set one. + */ + std::optional eventTime() const { + auto ms = core_.eventTimeMs(); + return ms != 0 ? std::optional(Timestamp(std::chrono::milliseconds(ms))) : std::nullopt; + } + + /** + * The producer-assigned sequence id of the message. + * + * @return the sequence id. + */ + int64_t sequenceId() const { return core_.sequenceId(); } + + /** + * The name of the producer that published the message, if available. + * + * @return the producer name, or `std::nullopt` if not present. + */ + std::optional producerName() const { + return core_.hasProducerName() ? std::optional(core_.producerName()) : std::nullopt; + } + + /** + * The resolved canonical topic the message was received from. + * + * @return the fully-qualified topic name. + */ + std::string topic() const { return core_.topic(); } + + /** + * How many times this message has been redelivered. + * + * @return the redelivery count (0 on first delivery). + */ + int redeliveryCount() const { return core_.redeliveryCount(); } + + /** + * The source cluster this message was replicated from, if any. + * + * @return the originating cluster name, or `std::nullopt` if the message was not + * replicated from another cluster. + */ + std::optional replicatedFrom() const { + return core_.hasReplicatedFrom() ? std::optional(core_.replicatedFrom()) : std::nullopt; + } + + /** + * Whether this is a non-empty message. + * + * @return `true` for a real received message, `false` for a default-constructed one. + */ + explicit operator bool() const { return static_cast(core_); } + + private: + detail::MessageCore core_; + Schema schema_; +}; + +/** + * An iterable batch of messages, as returned by `receiveMulti`. Carries up to the + * requested count. + * + * @tparam T the application type of each contained `Message`. + */ +template +class Messages { + public: + /** The element type of the batch. */ + using value_type = Message; + /** Const iterator over the contained messages. */ + using const_iterator = typename std::vector>::const_iterator; + + /** + * Construct a batch wrapping the given messages. + * + * @param messages the messages to hold (empty by default). + */ + explicit Messages(std::vector> messages = {}) : messages_(std::move(messages)) {} + + /** + * The number of messages in the batch. + * + * @return the element count. + */ + std::size_t size() const { return messages_.size(); } + + /** + * Whether the batch contains no messages. + * + * @return `true` if empty, `false` otherwise. + */ + bool empty() const { return messages_.empty(); } + + /** + * Access the message at index @p i. + * + * @param i a zero-based index, which must be less than `size()`. + * @return a reference to the message at that index. + */ + const Message& operator[](std::size_t i) const { return messages_[i]; } + + /** + * Iterator to the first message. + * + * @return a const iterator to the beginning of the batch. + */ + const_iterator begin() const { return messages_.begin(); } + + /** + * Iterator past the last message. + * + * @return a const iterator to the end of the batch. + */ + const_iterator end() const { return messages_.end(); } + + private: + std::vector> messages_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/MessageId.h b/include/pulsar/st/MessageId.h new file mode 100644 index 00000000..52abacb8 --- /dev/null +++ b/include/pulsar/st/MessageId.h @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace pulsar::st { + +class MessageIdImpl; +class MessageIdFactory; + +/** + * The identifier of a single message within a scalable topic. + * + * It is **opaque** — no ledger/entry/segment structure is exposed. Internally it + * encodes the segment the message belongs to (so a later ack can be routed to the + * right segment), but that is not part of the contract. A `MessageId` is: + * - serializable, via `toByteArray()` / `fromByteArray()`, for external storage; + * - totally ordered within a single topic, via the comparison operators. + * + * For a consistent position across *all* segments of a topic, use `Checkpoint`, + * not `MessageId` — a single id cannot express a multi-segment position. + */ +class PULSAR_PUBLIC MessageId { + public: + /** Construct an empty/invalid id (compares equal only to other empty ids). */ + MessageId(); + + /** Sentinel: the earliest (oldest) message available in the topic. */ + static const MessageId& earliest(); + + /** Sentinel: the latest (most recently published) message in the topic. */ + static const MessageId& latest(); + + /** Serialize to a portable binary form for external storage. */ + std::string toByteArray() const; + + /** Restore a `MessageId` previously produced by `toByteArray()`. */ + static MessageId fromByteArray(const std::string& data); + + // Totally ordered within a topic; `<=>` and `==` synthesize <, <=, >, >=, !=. + /** + * Three-way comparison establishing a total order within a single topic. + * + * Synthesizes `<`, `<=`, `>`, and `>=`. Ordering of ids from different topics is + * unspecified. + * + * @param other the id to compare against. + * @return the relative ordering of `*this` and @p other. + */ + std::strong_ordering operator<=>(const MessageId& other) const; + + /** + * Equality comparison; also synthesizes `!=`. + * + * @param other the id to compare against. + * @return `true` if the two ids denote the same position. + */ + bool operator==(const MessageId& other) const; + + /** + * Whether this is a valid (non-empty) id. + * + * @return `true` for a real id, `false` for a default-constructed empty one. + */ + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class MessageIdFactory; + explicit MessageId(std::shared_ptr impl); + + /** + * Write a human-readable representation of @p messageId to @p s. + * + * Intended for logging and debugging; the format is not a stable contract and + * must not be parsed (use `toByteArray()` for serialization). + * + * @param s the output stream to write to. + * @param messageId the id to format. + * @return the stream @p s, to allow chaining. + */ + friend PULSAR_PUBLIC std::ostream& operator<<(std::ostream& s, const MessageId& messageId); + + std::shared_ptr impl_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Policies.h b/include/pulsar/st/Policies.h new file mode 100644 index 00000000..5ffb44ba --- /dev/null +++ b/include/pulsar/st/Policies.h @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include + +#include +#include +#include +#include + +/** + * @file + * Grouped client configuration ("policies"), mirroring the Java v5 `config` + * package (spec Appendix A). These are plain aggregates, so C++20 designated + * initializers read cleanly at the call site: + * + * client.builder() + * .serviceUrl("pulsar://localhost:6650") + * .connectionPolicy({.connectionsPerBroker = 4, .connectionTimeout = 10s}) + * .tlsPolicy({.enabled = true, .trustCertsFilePath = "/etc/ca.pem"}) + * .build(); + * + * Durations are stored as std::chrono types (not raw counts); any coarser unit + * converts implicitly (e.g. `30s` into a milliseconds field). + */ + +namespace pulsar::st { + +/** + * A quantity of bytes (mirrors Java `MemorySize`). + * + * A plain aggregate wrapping a byte count, used wherever the configuration takes + * a memory budget (e.g. PulsarClientBuilder::memoryLimit). Prefer the `of*` + * factory helpers over writing a raw byte literal, as they make the unit explicit + * at the call site. + */ +struct MemorySize { + /** The size, expressed in bytes. Defaults to 0. */ + std::uint64_t bytes = 0; + + /** + * Construct a MemorySize from a count of bytes. + * + * @param b the size in bytes + * @return a MemorySize of @p b bytes + */ + static constexpr MemorySize ofBytes(std::uint64_t b) { return {b}; } + + /** + * Construct a MemorySize from a count of kibibytes (1 KiB = 1024 bytes). + * + * @param k the size in kibibytes + * @return a MemorySize of @p k * 1024 bytes + */ + static constexpr MemorySize ofKiB(std::uint64_t k) { return {k * 1024}; } + + /** + * Construct a MemorySize from a count of mebibytes (1 MiB = 1024 * 1024 bytes). + * + * @param m the size in mebibytes + * @return a MemorySize of @p m * 1024 * 1024 bytes + */ + static constexpr MemorySize ofMiB(std::uint64_t m) { return {m * 1024 * 1024}; } +}; + +/** + * Connection-pool, lookup, and request-timeout tuning. + * + * Every field is optional: when left unset the client applies its built-in + * default for that setting. Populate only the fields you wish to override, using + * C++20 designated initializers. + */ +struct ConnectionPolicy { + /** Number of physical connections opened to each broker. Unset uses the client default. */ + std::optional connectionsPerBroker; + /** Maximum time to wait for a TCP/TLS connection to be established, in milliseconds. Unset uses the client default. */ + std::optional connectionTimeout; + /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete, in milliseconds. Unset uses the client default. */ + std::optional operationTimeout; + /** Interval between keep-alive pings sent on an idle connection, in seconds. Unset uses the client default. */ + std::optional keepAliveInterval; + /** Maximum number of concurrent topic-lookup requests in flight. Unset uses the client default. */ + std::optional maxLookupRequests; + /** Maximum number of lookup redirects to follow before failing a lookup. Unset uses the client default. */ + std::optional maxLookupRedirects; + /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the client default. */ + std::optional maxConnectionIdleTime; +}; + +/** + * Reconnection backoff (mirrors Java `BackoffPolicy`). + * + * Controls the exponential delay applied between automatic reconnection attempts + * after a connection is lost. Both fields are optional; when unset the client + * applies its built-in default for that bound. + */ +struct BackoffPolicy { + /** Delay before the first reconnection attempt, in milliseconds. Unset uses the client default. */ + std::optional initialBackoff; + /** Upper bound on the backoff delay as it grows across retries, in milliseconds. Unset uses the client default. */ + std::optional maxBackoff; +}; + +/** + * Transport security (mirrors Java `TlsPolicy`). + * + * Configures TLS for connections to the broker, including the trust store and an + * optional client certificate for mutual TLS (mTLS). TLS is off unless #enabled + * is set to true. + */ +struct TlsPolicy { + /** Whether TLS is used for broker connections. Defaults to false (plaintext). */ + bool enabled = false; + /** Path to the PEM file of trusted CA certificates used to verify the broker. Unset uses the system trust store. */ + std::optional trustCertsFilePath; + /** Path to the client certificate PEM file, for mutual TLS. Unset disables client-certificate authentication. */ + std::optional certificateFilePath; + /** Path to the client private key PEM file, for mutual TLS. Unset disables client-certificate authentication. */ + std::optional privateKeyFilePath; + /** Whether to accept the broker's certificate without validating it against the trust store. Defaults to false (validation enforced). */ + bool allowInsecureConnection = false; + /** Whether to verify that the broker's certificate hostname matches the endpoint. Defaults to true. */ + bool validateHostname = true; +}; + +/** + * Client-wide transaction settings (spec §9). + * + * Transactions are always available; this policy only tunes the default + * transaction timeout applied to transactions opened by the client. The field is + * optional and the client supplies a built-in default when it is unset. + */ +struct TransactionPolicy { + /** Default lifetime of a transaction before it is automatically aborted, in milliseconds. Unset uses the client default. */ + std::optional timeout; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h new file mode 100644 index 00000000..0fe4fc10 --- /dev/null +++ b/include/pulsar/st/Producer.h @@ -0,0 +1,478 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + +/** + * How a producer claims write access to a topic (spec Appendix A / §4.1). + * + * The access mode is fixed at creation time via `ProducerBuilder::accessMode` + * and controls how the broker arbitrates between multiple producers on the same + * topic. + */ +enum class ProducerAccessMode { + /** Multiple producers may publish to the topic concurrently. The default. */ + Shared, + /** Only one producer may be active at a time; another producer requesting + * `Exclusive` access is rejected while one is already attached. */ + Exclusive, + /** Like `Exclusive`, but a new producer fences out (evicts) any currently + * attached producer rather than being rejected. */ + ExclusiveWithFencing, + /** Like `Exclusive`, but instead of failing when another producer holds the + * topic, this producer waits in line and becomes active once that one + * detaches. */ + WaitForExclusive +}; + +/** + * Producer configuration accumulated by `ProducerBuilder`. + * + * Applications do not populate this directly; the builder fills it in from its + * fluent setters and hands the completed config to the client when creating a + * producer. Field semantics, units and defaults mirror the corresponding builder + * setters. + */ +struct ProducerConfig { + /** Fully-qualified topic to produce to. REQUIRED; no default. */ + std::string topic; + /** Optional producer name. Unset (the default) lets the broker assign one. */ + std::optional producerName; + /** Write-access arbitration mode. Defaults to `ProducerAccessMode::Shared`. */ + ProducerAccessMode accessMode = ProducerAccessMode::Shared; + /** Per-message send timeout in milliseconds. Unset uses the SDK default. */ + std::optional sendTimeoutMs; + /** When true (the default), block the caller while the send queue is full + * instead of failing fast with `ResultProducerQueueIsFull`. */ + bool blockIfQueueFull = true; + /** Sequence id to assign to the first published message. Unset starts from + * the broker-tracked value (0 for a fresh producer). */ + std::optional initialSequenceId; + /** Arbitrary user metadata attached to the producer. Empty by default. */ + Properties properties; + /** Schema descriptor sent to the broker for compatibility checking. Filled in + * by the builder from `Schema::info()`. */ + SchemaInfo schema; +}; + +/** + * A byte-oriented outgoing message assembled by `MessageBuilder`. + * + * This is the encoded, schema-agnostic form of a message: the typed value has + * already been serialized to `payload` bytes. The builder fills these fields from + * its fluent setters and hands the result to the producer core for publishing. + */ +struct OutgoingMessage { + /** Encoded message payload (the value serialized through `Schema`). */ + std::string payload; + /** Whether a routing/ordering key is set. `false` (the default) means no key. */ + bool hasKey = false; + /** Partition/ordering key; meaningful only when `hasKey` is true. */ + std::string key; + /** Per-message user metadata. Empty by default. */ + Properties properties; + int64_t eventTimeMs = 0; ///< Application event time, epoch ms; 0 = unset. + int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. + int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. + /** Target clusters for geo-replication; empty applies the topic's default. */ + std::vector replicationClusters; + std::optional transaction; ///< Enlisting transaction; unset = non-transactional. +}; + +template +class Producer; + +/** + * Fluent builder for a single message, obtained from `Producer::newMessage()`. + * + * Each setter mutates the in-progress message and returns `*this`, so calls can + * be chained. The typed value is encoded through `Schema` on `value()`; the + * terminal `send()` / `sendAsync()` hand the encoded message to the producer. A + * builder describes a single message and is consumed by its terminal call (the + * message is moved out), so it should not be reused afterwards. + */ +template +class MessageBuilder { + public: + /** + * Set the message key, used for per-key ordering and key-affinity routing. + * + * @param k the key; taken by value and moved into the message. + * @return `*this`, for chaining. + */ + MessageBuilder& key(std::string k) { + message_.hasKey = true; + message_.key = std::move(k); + return *this; + } + /** + * Set the message value, encoding it to bytes through this producer's + * `Schema`. + * + * @param v the typed value to publish. + * @return `*this`, for chaining. + */ + MessageBuilder& value(const T& v) { + message_.payload = schema_.encode(v); + return *this; + } + /** + * Add or overwrite a single user property on the message. + * + * @param k property key. + * @param v property value. + * @return `*this`, for chaining. + */ + MessageBuilder& property(const std::string& k, const std::string& v) { + message_.properties[k] = v; + return *this; + } + /** + * Replace all user properties on the message. + * + * @param p the full property map; taken by value and moved in. + * @return `*this`, for chaining. + */ + MessageBuilder& properties(Properties p) { + message_.properties = std::move(p); + return *this; + } + /** + * Set the application-defined event time of the message. + * + * @param t the event time as a wall-clock `Timestamp`; stored as epoch + * milliseconds. Unset by default (event time absent). + * @return `*this`, for chaining. + */ + MessageBuilder& eventTime(Timestamp t) { + message_.eventTimeMs = toEpochMs(t); + return *this; + } + /** + * Set an explicit sequence id for this message, overriding auto-assignment. + * + * @param s the sequence id. By default (-1) the producer assigns one + * automatically. + * @return `*this`, for chaining. + */ + MessageBuilder& sequenceId(int64_t s) { + message_.sequenceId = s; + return *this; + } + /** + * Request delayed delivery: deliver the message after `delay` has elapsed from + * now (spec §4 delayed delivery). + * + * @param delay delay relative to the current time, in milliseconds. Computed + * into an absolute delivery time. Mutually exclusive with + * `deliverAt`; the last of the two called wins. + * @return `*this`, for chaining. + */ + MessageBuilder& deliverAfter(std::chrono::milliseconds delay) { + message_.deliverAtMs = toEpochMs(std::chrono::system_clock::now()) + delay.count(); + return *this; + } + /** + * Request delayed delivery at a specific wall-clock time (spec §4 delayed + * delivery). + * + * @param t absolute delivery time; stored as epoch milliseconds. A time in the + * past delivers immediately. Mutually exclusive with `deliverAfter`; + * the last of the two called wins. + * @return `*this`, for chaining. + */ + MessageBuilder& deliverAt(Timestamp t) { + message_.deliverAtMs = toEpochMs(t); + return *this; + } + /** + * Enlist this publish in a transaction so it becomes visible only on commit + * (spec §9). + * + * @param txn the open transaction to enlist the publish in. + * @return `*this`, for chaining. + */ + MessageBuilder& transaction(const Transaction& txn) { + message_.transaction = txn; + return *this; + } + + /** + * Publish the message and block until the broker acknowledges it. + * + * @return the assigned `MessageId` on success, or the `Error` on failure. Call + * `.value()` on the result to throw `ClientException` instead. + */ + Expected send() { return sendAsync().get(); } + /** + * Publish the message asynchronously without blocking. + * + * @return a `Future` that completes with the assigned id on success + * or the failure. The future may be ignored for fire-and-forget sends. + */ + Future sendAsync() { return core_.sendAsync(std::move(message_)); } + + private: + friend class Producer; + MessageBuilder(detail::ProducerCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} + + static int64_t toEpochMs(Timestamp t) { + return std::chrono::duration_cast(t.time_since_epoch()).count(); + } + + detail::ProducerCore core_; + Schema schema_; + OutgoingMessage message_; +}; + +/** + * A typed producer for a single scalable topic. + * + * Publishes values of type `T`, encoding each through its `Schema`. A producer + * is a lightweight, copyable handle over shared state; a default-constructed + * producer is empty (see `operator bool`) and only a producer obtained from + * `ProducerBuilder::create()` / `createAsync()` is live. All publish methods + * are thread-safe. + */ +template +class Producer { + public: + /** Construct an empty producer; `operator bool` is false until assigned a + * live producer from `ProducerBuilder`. */ + Producer() = default; + + /** + * Begin building a single message with per-message options (key, properties, + * event time, delayed delivery, transaction, ...). + * + * @return a fresh `MessageBuilder` bound to this producer. + */ + MessageBuilder newMessage() { return MessageBuilder(core_, schema_); } + + /** + * Publish a value and block until the broker acknowledges it. Convenience for + * `newMessage().value(value).send()`. + * + * @param value the value to publish. + * @return the assigned `MessageId` on success, or the `Error` on failure. Call + * `.value()` on the result to throw `ClientException` instead. + */ + Expected send(const T& value) { return newMessage().value(value).send(); } + /** + * Publish a value asynchronously without blocking. Convenience for + * `newMessage().value(value).sendAsync()`. + * + * @param value the value to publish. + * @return a `Future` that completes with the assigned id or the + * failure. May be ignored for fire-and-forget sends. + */ + Future sendAsync(const T& value) { return newMessage().value(value).sendAsync(); } + + /** @return the topic this producer publishes to. */ + const std::string& topic() const { return core_.topic(); } + /** @return the producer's name (broker-assigned when none was configured). */ + const std::string& name() const { return core_.name(); } + /** @return the sequence id of the most recently published message, or -1 if + * none has been published yet. */ + int64_t lastSequenceId() const { return core_.lastSequenceId(); } + + /** + * Block until all sends issued before this call have completed. Takes a + * snapshot of in-flight sends at the time of the call; sends issued afterwards + * are not awaited. + * + * @return success, or the first `Error` among the awaited sends. Call + * `.value()` to throw `ClientException` instead. + */ + Expected flush() { return core_.flushAsync().get(); } + /** + * Asynchronously await all sends issued before this call (a snapshot of + * in-flight sends). + * + * @return a `Future` that completes once those sends finish. + */ + Future flushAsync() { return core_.flushAsync(); } + /** + * Block until pending sends complete, then release the producer. Idempotent: + * closing an already-closed or empty producer succeeds. + * + * @return success, or the `Error` if closing failed. Call `.value()` to throw + * `ClientException` instead. + */ + Expected close() { return core_.closeAsync().get(); } + /** + * Asynchronously complete pending sends and release the producer. Idempotent. + * + * @return a `Future` that completes once the producer is closed. + */ + Future closeAsync() { return core_.closeAsync(); } + + /** @return true if this is a live producer handle; false if empty (default + * constructed or moved-from). */ + explicit operator bool() const { return static_cast(core_); } + + private: + template + friend class ProducerBuilder; + Producer(detail::ProducerCore core, Schema schema) : core_(std::move(core)), schema_(std::move(schema)) {} + + detail::ProducerCore core_; + Schema schema_; +}; + +/** + * Builder for a `Producer`, obtained from `PulsarClient::newProducer`. + * + * Each setter returns `*this` for chaining. `topic` is the only required setting; + * the terminal `create()` / `createAsync()` produce the `Producer`. + */ +template +class ProducerBuilder { + public: + /** + * Set the topic to produce to. REQUIRED; there is no default. + * + * @param t the fully-qualified topic name; taken by value and moved in. + * @return `*this`, for chaining. + */ + ProducerBuilder& topic(std::string t) { + config_.topic = std::move(t); + return *this; + } + /** + * Set an explicit producer name. + * + * @param n the producer name; taken by value and moved in. Optional; when + * unset (the default) the broker assigns a name. + * @return `*this`, for chaining. + */ + ProducerBuilder& producerName(std::string n) { + config_.producerName = std::move(n); + return *this; + } + /** + * Set the write-access arbitration mode (spec Appendix A / §4.1). + * + * @param m the access mode. Defaults to `ProducerAccessMode::Shared`. + * @return `*this`, for chaining. + */ + ProducerBuilder& accessMode(ProducerAccessMode m) { + config_.accessMode = m; + return *this; + } + /** + * Set the per-message send timeout: how long a send may stay unacknowledged + * before failing. + * + * @param d the timeout, in milliseconds. Optional; when unset the SDK default + * applies. + * @return `*this`, for chaining. + */ + ProducerBuilder& sendTimeout(std::chrono::milliseconds d) { + config_.sendTimeoutMs = d.count(); + return *this; + } + + /** + * Control behavior when the producer's send queue is full. + * + * @param b when true (the DEFAULT), block the caller until the queue drains; + * when false, fail fast with `ResultProducerQueueIsFull`. + * @return `*this`, for chaining. + */ + ProducerBuilder& blockIfQueueFull(bool b) { + config_.blockIfQueueFull = b; + return *this; + } + /** + * Set the sequence id assigned to the first published message. + * + * @param s the initial sequence id. Optional; when unset the producer starts + * from the broker-tracked value (0 for a fresh producer). + * @return `*this`, for chaining. + */ + ProducerBuilder& initialSequenceId(int64_t s) { + config_.initialSequenceId = s; + return *this; + } + /** + * Add or overwrite a single user property on the producer. + * + * @param k property key. + * @param v property value. + * @return `*this`, for chaining. + */ + ProducerBuilder& property(const std::string& k, const std::string& v) { + config_.properties[k] = v; + return *this; + } + + /** + * Create the producer, blocking until it is ready. + * + * @return the live `Producer` on success, or the `Error` on failure. Call + * `.value()` on the result to throw `ClientException` instead. + */ + Expected> create() { return createAsync().get(); } + /** + * Create the producer asynchronously without blocking. + * + * @return a `Future>` that completes with the live producer on + * success or the failure. + */ + Future> createAsync() { + Schema schema = schema_; + ProducerConfig config = config_; + config.schema = schema.info(); + return client_.createProducerAsync(std::move(config)).thenApply([schema](const detail::ProducerCore& core) { + return Producer(core, schema); + }); + } + + private: + friend class PulsarClient; + ProducerBuilder(detail::ClientCore client, Schema schema) + : client_(std::move(client)), schema_(std::move(schema)) {} + + detail::ClientCore client_; + Schema schema_; + ProducerConfig config_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/ProtobufNativeSchema.h b/include/pulsar/st/ProtobufNativeSchema.h new file mode 100644 index 00000000..348ad051 --- /dev/null +++ b/include/pulsar/st/ProtobufNativeSchema.h @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace pulsar::st { + +/// @cond INTERNAL +/// Internal: the protobuf-backed SerDe used by protobufNativeSchema(). Not part +/// of the public API. +namespace detail { +template +struct ProtobufNativeSerDe { + static_assert(std::is_base_of_v, + "protobufNativeSchema requires T to be a generated protobuf Message"); + SchemaInfo info() const { return pulsar::createProtobufNativeSchema(T::descriptor()); } + std::string encode(const T& value) const { return value.SerializeAsString(); } + T decode(const char* data, std::size_t size) const { + T message; + message.ParseFromArray(data, static_cast(size)); + return message; + } +}; +} // namespace detail +/// @endcond + +/** + * @brief Creates a schema for a generated protobuf message type `T`. + * + * Unlike JSON/Avro, the SerDe is **fully automatic**: protobuf itself provides the + * serialization, and the broker schema is derived from the message descriptor, so + * no per-type mapping or reflection library is needed. + * + * @code + * auto producer = client.newProducer(protobufNativeSchema()).topic(t).create(); + * @endcode + * + * @tparam T the message type; must derive from `google::protobuf::Message` (a + * generated protobuf class). This is enforced at compile time. + * @return a `Schema` whose `encode`/`decode` use protobuf's native wire format. + */ +template +Schema protobufNativeSchema() { + return Schema(detail::ProtobufNativeSerDe{}); +} + +} // namespace pulsar::st diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h new file mode 100644 index 00000000..c82f2256 --- /dev/null +++ b/include/pulsar/st/QueueConsumer.h @@ -0,0 +1,372 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + + +/** + * Plain-old-data configuration accumulated by `QueueConsumerBuilder`. + * + * Each field mirrors a builder setter. Prefer building through + * `QueueConsumerBuilder` rather than populating this struct directly; the + * builder enforces the invariants (notably that exactly one of topic vs. namespace + * mode is selected and that `subscriptionName` is set). + */ +struct QueueConsumerConfig { + /// Selects namespace mode over single-topic mode. When `false` (the default), + /// `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. + bool useNamespace = false; + /// Fully-qualified topic name. Used only when `useNamespace == false`. Mutually + /// exclusive with `namespaceName`. + std::string topic; // when !useNamespace + /// Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. + /// Subscribes to all scalable topics in the namespace with live membership. + std::string namespaceName; // when useNamespace + /// Namespace mode only: AND filters matched against topic properties to select + /// which topics in the namespace are included. Empty means no filtering (all + /// topics). Ignored in single-topic mode. + Properties propertyFilters; // namespace mode: AND filters over topic properties + /// REQUIRED. Subscription name shared by all consumers of this subscription. + std::string subscriptionName; // REQUIRED + /// Where the subscription starts when it is first created. Default + /// `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once + /// the subscription already exists. + SubscriptionInitialPosition initialPosition = SubscriptionInitialPosition::Latest; + /// Optional consumer name (useful for diagnostics and metrics). Default unset, in + /// which case the broker assigns one. + std::optional consumerName; + /// Acknowledgment tuning (e.g. the ack-grouping/batching window and negative-ack + /// redelivery delay). Default-constructed `AckPolicy` when unset. + AckPolicy ackPolicy; + /// Optional dead-letter policy: route messages to a dead-letter topic after + /// repeated redelivery. Default unset (no dead-lettering). + std::optional deadLetterPolicy; + /// Arbitrary client-side consumer properties (reported in topic stats). Default empty. + Properties properties; + /// Schema descriptor for the value type `T`. Populated automatically by the builder + /// from the `Schema` it was constructed with. + SchemaInfo schema; +}; + + + + +template +class QueueConsumerBuilder; + +/** + * Parallel, broker-managed consumer with **individual ack + nack + dead-letter**. + * + * This is the analog of a classic Shared subscription, attached to all segments of + * a scalable topic (spec §7.2): work is distributed across all consumers on the + * subscription with no ordering or key affinity. Each message is acknowledged + * individually with `acknowledge`, can be negatively acknowledged with + * `negativeAcknowledge` to schedule redelivery, and can be routed to a dead-letter + * topic after repeated redelivery (see `QueueConsumerBuilder::deadLetterPolicy`). + * For ordered, cumulative-ack consumption use `StreamConsumer` instead. + * + * Obtain an instance from `QueueConsumerBuilder`. The default-constructed + * consumer is an empty handle (`operator bool` is `false`) until assigned one. + * + * @tparam T the decoded message value type, determined by the `Schema` used to + * build this consumer. + */ +template +class QueueConsumer { + public: + /** Construct an empty, non-live handle. `operator bool` returns `false` until a + * subscribed consumer is move-assigned into it. */ + QueueConsumer() = default; + + /** + * Block until the next message arrives and return it. + * + * Returns `Expected` because a receive can fail *without* yielding a message — + * the consumer was closed, the connection dropped, or the payload failed to + * decode. On such failures the result holds an `Error` instead of a message; + * call `.value()` on the result if you would rather throw a `ClientException`. + * + * @return the next `Message`, or an `Error` describing why no message could + * be delivered. + */ + Expected> receive() { return toTyped(core_.receiveAsync().get()); } + /** + * Block for the next message, but for no longer than `timeout`. + * + * @param timeout maximum time to wait for a message. + * @return the next `Message`; if no message arrives within `timeout`, an + * `Error{ResultTimeout}`; or another `Error` on close/disconnect/decode + * failure. + */ + Expected> receive(std::chrono::milliseconds timeout) { + return toTyped(core_.receiveAsync(timeout.count()).get()); + } + /** + * Request the next message without blocking. + * + * @return a `Future>` completed with the message when one is + * available, or completed with an `Error` (via the future's `Expected` + * result) on close/disconnect/decode failure. + */ + Future> receiveAsync() { + Schema schema = schema_; + return core_.receiveAsync().thenApply( + [schema](const detail::MessageCore& core) { return Message(core, schema); }); + } + + /** + * Individually acknowledge one message. + * + * Fire-and-forget: it does not block and does not report an error. Acks are + * buffered and delivered best-effort; a lost ack simply causes redelivery. + * + * @param id the id of the message to acknowledge. + */ + void acknowledge(const MessageId& id) { core_.acknowledge(id); } + /** + * Transactional individual acknowledge: enlist the ack of `id` in `txn`. + * + * The acknowledgment becomes effective only when `txn` commits; its outcome (and + * any error) surfaces at `Transaction::commit()`, not here. + * + * @param id the id of the message to acknowledge. + * @param txn the transaction the acknowledgment is enlisted in. + */ + void acknowledge(const MessageId& id, const Transaction& txn) { core_.acknowledge(id, txn); } + + /** + * Negatively acknowledge a message, scheduling it for redelivery. + * + * Fire-and-forget `void`: it does not block and does not report an error. The + * redelivery delay is governed by the configured `AckPolicy`. After enough + * redeliveries the message may be sent to the dead-letter topic if a + * `DeadLetterPolicy` was configured. + * + * @param id the id of the message to redeliver. + */ + void negativeAcknowledge(const MessageId& id) { core_.negativeAcknowledge(id); } + + /** + * Close the consumer, releasing its broker-side resources. Blocking. + * + * @return an empty `Expected` on success, or an `Error` if the close + * failed. Call `.value()` to throw instead. + */ + Expected close() { return core_.closeAsync().get(); } + /** + * Close the consumer without blocking. + * + * @return a `Future` completed when the close finishes (or with an `Error` + * on failure). + */ + Future closeAsync() { return core_.closeAsync(); } + + /** @return the topic this consumer is subscribed to. In namespace mode this is the + * namespace-derived subscription target. */ + const std::string& topic() const { return core_.topic(); } + /** @return the subscription name. */ + const std::string& subscription() const { return core_.subscription(); } + /** @return the consumer name (broker-assigned if none was set on the builder). */ + const std::string& consumerName() const { return core_.consumerName(); } + + /** @return `true` if this is a live, subscribed consumer; `false` if it is an empty + * (default-constructed or closed/moved-from) handle. */ + explicit operator bool() const { return static_cast(core_); } + + private: + template + friend class QueueConsumerBuilder; + QueueConsumer(detail::QueueConsumerCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} + + Expected> toTyped(Expected r) const { + if (r) return Message(*r, schema_); + return Expected>(r.error()); + } + + detail::QueueConsumerCore core_; + Schema schema_; +}; + +/** + * Fluent builder for a `QueueConsumer`. + * + * Obtain one from `PulsarClient`. Set **exactly one** of `topic()` or + * `inNamespace()` to choose the subscription target, set the REQUIRED + * `subscriptionName()`, then call `subscribe()` / `subscribeAsync()` to create the + * consumer. All setters return `*this` for chaining. + * + * @tparam T the decoded message value type, fixed by the `Schema` the builder + * was created with. + */ +template +class QueueConsumerBuilder { + public: + /** + * Subscribe to a single scalable topic. Mutually exclusive with `inNamespace()`; + * set exactly one. Calling this clears any namespace selection. + * + * @param t the fully-qualified topic name. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& topic(std::string t) { + config_.useNamespace = false; + config_.topic = std::move(t); + return *this; + } + /** + * Subscribe to all scalable topics in a namespace with live membership — topics + * created or removed later are joined/dropped automatically (spec §7.2). + * Mutually exclusive with `topic()`; set exactly one. + * + * Named `inNamespace` because `namespace` is a C++ keyword. + * + * @param ns the namespace (`tenant/namespace`) to subscribe across. + * @param propertyFilters optional AND filters matched against topic properties; + * only topics matching all entries are included. Default empty (no + * filtering — every topic in the namespace). + * @return `*this` for chaining. + */ + QueueConsumerBuilder& inNamespace(std::string ns, Properties propertyFilters = {}) { + config_.useNamespace = true; + config_.namespaceName = std::move(ns); + config_.propertyFilters = std::move(propertyFilters); + return *this; + } + /** + * REQUIRED. Set the subscription name shared by all consumers of this + * subscription. + * + * @param s the subscription name. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& subscriptionName(std::string s) { + config_.subscriptionName = std::move(s); + return *this; + } + /** + * Set where the subscription starts when first created. + * + * @param p the initial position. Default `SubscriptionInitialPosition::Latest`. + * Has no effect if the subscription already exists. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& subscriptionInitialPosition(SubscriptionInitialPosition p) { + config_.initialPosition = p; + return *this; + } + /** + * Set an explicit consumer name (useful for diagnostics and metrics). + * + * @param n the consumer name. Default unset, in which case the broker assigns one. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& consumerName(std::string n) { + config_.consumerName = std::move(n); + return *this; + } + /** + * Tune acknowledgment behavior (e.g. the ack-grouping/batching window and the + * negative-ack redelivery delay). + * + * @param policy the ack policy. Default-constructed `AckPolicy` when unset. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& ackPolicy(AckPolicy policy) { + config_.ackPolicy = std::move(policy); + return *this; + } + /** + * Route messages to a dead-letter topic after repeated redelivery (spec §7.2). + * QueueConsumer only. + * + * @param policy the dead-letter policy (max redeliveries, DLQ topic name, etc.). + * Default unset (no dead-lettering). + * @return `*this` for chaining. + */ + QueueConsumerBuilder& deadLetterPolicy(DeadLetterPolicy policy) { + config_.deadLetterPolicy = std::move(policy); + return *this; + } + /** + * Add a single client-side consumer property (reported in topic stats). Call + * repeatedly to add multiple; a repeated key overwrites the previous value. + * + * @param k property key. + * @param v property value. + * @return `*this` for chaining. + */ + QueueConsumerBuilder& property(const std::string& k, const std::string& v) { + config_.properties[k] = v; + return *this; + } + + /** + * Create the consumer and subscribe. Blocking. + * + * @return the live `QueueConsumer` on success, or an `Error` if the + * subscription failed (e.g. missing `subscriptionName`, both/neither of + * topic and namespace set, or a broker error). Call `.value()` to throw + * instead. + */ + Expected> subscribe() { return subscribeAsync().get(); } + /** + * Create the consumer and subscribe without blocking. + * + * @return a `Future>` completed with the live consumer, or with + * an `Error` on failure. + */ + Future> subscribeAsync() { + Schema schema = schema_; + QueueConsumerConfig config = config_; + config.schema = schema.info(); + return client_.subscribeQueueAsync(std::move(config)) + .thenApply([schema](const detail::QueueConsumerCore& core) { return QueueConsumer(core, schema); }); + } + + private: + friend class PulsarClient; + QueueConsumerBuilder(detail::ClientCore client, Schema schema) + : client_(std::move(client)), schema_(std::move(schema)) {} + + detail::ClientCore client_; + Schema schema_; + QueueConsumerConfig config_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h new file mode 100644 index 00000000..1ad5d649 --- /dev/null +++ b/include/pulsar/st/Schema.h @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + +using pulsar::SchemaInfo; +using pulsar::SchemaType; + +/** + * @brief The default value type: a raw, uninterpreted byte payload. + * + * Alias for `std::vector`. A `Schema` (the default schema) passes the + * payload through verbatim in both directions, applying no encoding or schema + * declaration to the broker beyond `SchemaType::BYTES`. + */ +using Bytes = std::vector; + +/** + * @brief Constraint identifying a *SerDe* for `T`: a type that can describe `T` to + * the broker and convert it to and from bytes. + * + * A SerDe is any copyable type providing the three const members required below. + * `Schema` is constructible from any value satisfying `SerDeFor`, + * which is how the same `T` can be carried by different encodings (JSON, Avro, + * protobuf, or a fully custom codec). + * + * The required members are: + * - `SchemaInfo info() const` — the schema description sent to the broker for + * compatibility checking. + * - `std::string encode(const T&) const` — serializes a value of `T` to bytes. + * - `T decode(const char*, std::size_t) const` — deserializes bytes back to `T`. + * + * @tparam S the candidate SerDe type. + * @tparam T the value type the SerDe handles. + */ +template +concept SerDeFor = requires(const S& serde, const T& value, const char* data, std::size_t size) { + { serde.info() } -> std::convertible_to; + { serde.encode(value) } -> std::convertible_to; + { serde.decode(data, size) } -> std::convertible_to; +}; + +/** + * `Schema` is the typed seam of the API: `Producer`, `Consumer` and + * `Message` are thin facades that only ever call `Schema::encode` / `decode` + * (and `info`, sent to the broker for compatibility). It is a lightweight, + * copyable **value** that holds a type-erased *SerDe* — so the same `T` can be + * carried by different encodings, and a producer can be handed any schema. + * + * A SerDe is any copyable type providing three const members: + * SchemaInfo info() const; // describes T to the broker + * std::string encode(const T& value) const; // T -> bytes + * T decode(const char* data, size_t) const; // bytes -> T + * + * Construct a `Schema` from a SerDe directly, or use a factory: + * - primitives: `Schema{}`, `Schema{}`, `Schema{}` (default) + * - `jsonSchema()` — (reflect-cpp; no trait) + * - `avroSchema()` — (reflect-cpp; no trait) + * - `protobufNativeSchema()` — (automatic) + * - or a custom SerDe: `Schema(mySerDe)` for full control. + * + * JSON/Avro for a user struct are derived automatically from the struct's fields + * by reflect-cpp; protobuf uses the generated message's own reflection. + */ +template +class Schema { + public: + /** @brief The value type carried by this schema. */ + using value_type = T; + + /** + * @brief Constructs the default schema for `T`. + * + * For a primitive `T` this installs the built-in codec: `Bytes` (the default), + * `std::string`, `std::int32_t`, `std::int64_t`, or `double`. Integers and + * `double` are encoded as fixed-width big-endian, matching the Pulsar wire + * format for primitive schemas. + * + * For any other (non-primitive) `T` this installs an "unset" schema: it reports + * `SchemaType::BYTES` to the broker, but its `encode` and `decode` throw + * `ClientException` on use. Supply a real schema (`jsonSchema()`, + * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) before + * producing or consuming such a `T`. + */ + Schema(); + + /** + * @brief Constructs a schema from any SerDe value. + * + * Wraps and type-erases @p serde so that this `Schema` forwards `info`, + * `encode` and `decode` to it. Use this to plug in a custom encoding for `T`. + * + * @tparam SerDe a copyable type satisfying `SerDeFor`. + * @param serde the SerDe to adopt; taken by value and stored. + */ + template + requires(!std::is_same_v, Schema> && SerDeFor, T>) + Schema(SerDe serde) : self_(std::make_shared>>(std::move(serde))) {} + + /** + * @brief Returns the schema description sent to the broker for compatibility. + * @return the `SchemaInfo` (type, name and schema definition) describing `T`. + */ + SchemaInfo info() const { return self_->info(); } + + /** + * @brief Serializes a value to its wire bytes. + * @param value the value to encode. + * @return the encoded payload as a byte string. + * @throws ClientException if this is an unset schema (non-primitive `T` with no + * SerDe supplied). A custom SerDe may throw on its own encoding errors. + */ + std::string encode(const T& value) const { return self_->encode(value); } + + /** + * @brief Deserializes wire bytes back into a value of `T`. + * @param data pointer to the payload bytes. + * @param size number of bytes available at @p data. + * @return the decoded value. + * @throws ClientException if this is an unset schema (non-primitive `T` with no + * SerDe supplied). A SerDe may also throw on malformed or incompatible + * bytes. + */ + T decode(const char* data, std::size_t size) const { return self_->decode(data, size); } + + private: + struct Concept { + virtual ~Concept() = default; + virtual SchemaInfo info() const = 0; + virtual std::string encode(const T&) const = 0; + virtual T decode(const char*, std::size_t) const = 0; + }; + template + struct Model final : Concept { + SerDe serde; + explicit Model(SerDe s) : serde(std::move(s)) {} + SchemaInfo info() const override { return serde.info(); } + std::string encode(const T& v) const override { return serde.encode(v); } + T decode(const char* d, std::size_t n) const override { return serde.decode(d, n); } + }; + + std::shared_ptr self_; +}; + +/// @cond INTERNAL +/// Internal implementation details: built-in primitive codecs and helpers. Not +/// part of the public API. +namespace detail { + +// Pulsar encodes numeric schemas as fixed-width big-endian. +template +inline std::string encodeBigEndian(U value) { + static_assert(std::is_integral_v, "integral only"); + std::string out(sizeof(U), '\0'); + auto u = static_cast>(value); + for (std::size_t i = 0; i < sizeof(U); ++i) { + out[i] = static_cast((u >> (8 * (sizeof(U) - 1 - i))) & 0xFF); + } + return out; +} +template +inline U decodeBigEndian(const char* data, std::size_t size) { + static_assert(std::is_integral_v, "integral only"); + std::make_unsigned_t u = 0; + for (std::size_t i = 0; i < sizeof(U) && i < size; ++i) { + u = (u << 8) | static_cast(data[i]); + } + return static_cast(u); +} + +[[noreturn]] inline void throwNoSchema() { +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) + throw ClientException(pulsar::ResultInvalidConfiguration, + "no schema configured for this value type — pass an explicit Schema " + "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"); +#else + std::abort(); +#endif +} + +// Built-in SerDe codecs. +struct BytesCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } + std::string encode(const Bytes& v) const { return std::string(v.begin(), v.end()); } + Bytes decode(const char* d, std::size_t n) const { return Bytes(d, d + n); } +}; +struct StringCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "String", ""); } + std::string encode(const std::string& v) const { return v; } + std::string decode(const char* d, std::size_t n) const { return std::string(d, n); } +}; +struct Int32Codec { + SchemaInfo info() const { return SchemaInfo(SchemaType::INT32, "INT32", ""); } + std::string encode(std::int32_t v) const { return encodeBigEndian(v); } + std::int32_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } +}; +struct Int64Codec { + SchemaInfo info() const { return SchemaInfo(SchemaType::INT64, "INT64", ""); } + std::string encode(std::int64_t v) const { return encodeBigEndian(v); } + std::int64_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } +}; +struct DoubleCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "Double", ""); } + std::string encode(double v) const { + std::uint64_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + return encodeBigEndian(static_cast(bits)); + } + double decode(const char* d, std::size_t n) const { + auto bits = static_cast(decodeBigEndian(d, n)); + double v; + std::memcpy(&v, &bits, sizeof(v)); + return v; + } +}; +template +struct UnsetCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } + std::string encode(const T&) const { throwNoSchema(); } + T decode(const char*, std::size_t) const { throwNoSchema(); } +}; + +} // namespace detail +/// @endcond + +template +Schema::Schema() { + if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::BytesCodec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::StringCodec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::Int32Codec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::Int64Codec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::DoubleCodec{}); + } else { + self_ = std::make_shared>>(detail::UnsetCodec{}); + } +} + +} // namespace pulsar::st diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h new file mode 100644 index 00000000..a0f7c198 --- /dev/null +++ b/include/pulsar/st/StreamConsumer.h @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + + +/** + * Plain-old-data configuration accumulated by `StreamConsumerBuilder`. + * + * Each field mirrors a builder setter. Prefer building through + * `StreamConsumerBuilder` rather than populating this struct directly; the + * builder enforces the invariants (notably that exactly one of topic vs. + * namespace mode is selected and that `subscriptionName` is set). + */ +struct StreamConsumerConfig { + /// Selects namespace mode over single-topic mode. When `false` (the default), + /// `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. + bool useNamespace = false; + /// Fully-qualified topic name. Used only when `useNamespace == false`. Mutually + /// exclusive with `namespaceName`. + std::string topic; // when !useNamespace + /// Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. + /// Subscribes to all scalable topics in the namespace with live membership. + std::string namespaceName; // when useNamespace + /// Namespace mode only: AND filters matched against topic properties to select + /// which topics in the namespace are included. Empty means no filtering (all + /// topics). Ignored in single-topic mode. + Properties propertyFilters; // namespace mode: AND filters over topic properties + /// REQUIRED. Subscription name shared by all consumers of this subscription. + std::string subscriptionName; // REQUIRED + /// Where the subscription starts when it is first created. Default + /// `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once + /// the subscription already exists. + SubscriptionInitialPosition initialPosition = SubscriptionInitialPosition::Latest; + /// Optional key/value properties attached to the subscription itself (persisted + /// broker-side). Default empty. + Properties subscriptionProperties; + /// Optional consumer name (useful for diagnostics and metrics). Default unset, in + /// which case the broker assigns one. + std::optional consumerName; + /// Acknowledgment tuning (e.g. the ack-grouping/batching window). Default-constructed + /// `AckPolicy` when unset. + AckPolicy ackPolicy; + /// When set to `true`, read from the topic's compacted view (latest value per key) + /// instead of the full log. Default unset (broker default, i.e. uncompacted). + std::optional readCompacted; + /// When set to `true`, replicate the subscription's acknowledged position across + /// geo-replication clusters. Default unset (broker default, i.e. disabled). + std::optional replicateSubscriptionState; + /// Arbitrary client-side consumer properties (reported in topic stats). Default empty. + Properties properties; + /// Schema descriptor for the value type `T`. Populated automatically by the builder + /// from the `Schema` it was constructed with. + SchemaInfo schema; +}; + + + + +template +class StreamConsumerBuilder; + +/** + * Ordered (per-key), broker-managed consumer with **cumulative ack only**. + * + * This is the closest analog to a classic Failover subscription, but spanning all + * segments of a scalable topic (spec §7.1): messages are delivered in order + * (per-key) and the broker manages segment assignment for you. A single + * `acknowledgeCumulative` advances every segment up to the delivered message's + * position; there is no individual ack, no negative ack, and no dead-letter + * support. For parallel, unordered consumption with per-message ack use + * `QueueConsumer` instead. + * + * Obtain an instance from `StreamConsumerBuilder`. The default-constructed + * consumer is an empty handle (`operator bool` is `false`) until assigned one. + * + * @tparam T the decoded message value type, determined by the `Schema` used to + * build this consumer. + */ +template +class StreamConsumer { + public: + /** Construct an empty, non-live handle. `operator bool` returns `false` until a + * subscribed consumer is move-assigned into it. */ + StreamConsumer() = default; + + /** + * Block until the next message arrives and return it. + * + * Returns `Expected` because a receive can fail *without* yielding a message — + * the consumer was closed, the connection dropped, or the payload failed to + * decode. On such failures the result holds an `Error` instead of a message; + * call `.value()` on the result if you would rather throw a `ClientException`. + * + * @return the next `Message`, or an `Error` describing why no message could + * be delivered. + */ + Expected> receive() { return toTyped(core_.receiveAsync().get()); } + /** + * Block for the next message, but for no longer than `timeout`. + * + * @param timeout maximum time to wait for a message. + * @return the next `Message`; if no message arrives within `timeout`, an + * `Error{ResultTimeout}`; or another `Error` on close/disconnect/decode + * failure. + */ + Expected> receive(std::chrono::milliseconds timeout) { + return toTyped(core_.receiveAsync(timeout.count()).get()); + } + /** + * Request the next message without blocking. + * + * @return a `Future>` completed with the message when one is + * available, or completed with an `Error` (via the future's + * `Expected` result) on close/disconnect/decode failure. + */ + Future> receiveAsync() { + Schema schema = schema_; + return core_.receiveAsync().thenApply( + [schema](const detail::MessageCore& core) { return Message(core, schema); }); + } + + /** + * Block for a batch of up to `maxMessages`, bounded by `timeout`. + * + * Returns as soon as `maxMessages` have been collected or `timeout` elapses, + * whichever comes first, so the returned batch may contain fewer than + * `maxMessages` (including zero on timeout). + * + * @param maxMessages the maximum number of messages to return in the batch. + * @param timeout maximum time to wait while accumulating the batch. + * @return the collected `Messages`, or an `Error` on close/disconnect/decode + * failure. + */ + Expected> receiveMulti(int maxMessages, std::chrono::milliseconds timeout) { + return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout.count()).get()); + } + + /** + * Cumulatively acknowledge every message up to and including `id`, advancing all + * segments up to that position (spec §7.1). + * + * Fire-and-forget: it does not block and does not report an error. Acks are + * buffered and delivered best-effort; a lost ack simply causes redelivery. + * + * @param id the message position up to which (inclusive) to acknowledge. + */ + void acknowledgeCumulative(const MessageId& id) { core_.acknowledgeCumulative(id); } + /** + * Transactional cumulative acknowledge: enlist the cumulative ack up to `id` in + * `txn`. + * + * The acknowledgment becomes effective only when `txn` commits; its outcome (and + * any error) surfaces at `Transaction::commit()`, not here. + * + * @param id the message position up to which (inclusive) to acknowledge. + * @param txn the transaction the acknowledgment is enlisted in. + */ + void acknowledgeCumulative(const MessageId& id, const Transaction& txn) { + core_.acknowledgeCumulative(id, txn); + } + + /** + * Close the consumer, releasing its broker-side resources. Blocking. + * + * @return an empty `Expected` on success, or an `Error` if the close + * failed. Call `.value()` to throw instead. + */ + Expected close() { return core_.closeAsync().get(); } + /** + * Close the consumer without blocking. + * + * @return a `Future` completed when the close finishes (or with an `Error` + * on failure). + */ + Future closeAsync() { return core_.closeAsync(); } + + /** @return the topic this consumer is subscribed to. In namespace mode this is the + * namespace-derived subscription target. */ + const std::string& topic() const { return core_.topic(); } + /** @return the subscription name. */ + const std::string& subscription() const { return core_.subscription(); } + /** @return the consumer name (broker-assigned if none was set on the builder). */ + const std::string& consumerName() const { return core_.consumerName(); } + + /** @return `true` if this is a live, subscribed consumer; `false` if it is an empty + * (default-constructed or closed/moved-from) handle. */ + explicit operator bool() const { return static_cast(core_); } + + private: + template + friend class StreamConsumerBuilder; + StreamConsumer(detail::StreamConsumerCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} + + Expected> toTyped(Expected r) const { + if (r) return Message(*r, schema_); + return Expected>(r.error()); + } + Expected> toTypedBatch(Expected> r) const { + if (!r) return Expected>(r.error()); + std::vector> out; + out.reserve(r->size()); + for (auto& core : *r) out.emplace_back(core, schema_); + return Messages(std::move(out)); + } + + detail::StreamConsumerCore core_; + Schema schema_; +}; + +/** + * Fluent builder for a `StreamConsumer`. + * + * Obtain one from `PulsarClient`. Set **exactly one** of `topic()` or + * `inNamespace()` to choose the subscription target, set the REQUIRED + * `subscriptionName()`, then call `subscribe()` / `subscribeAsync()` to create the + * consumer. All setters return `*this` for chaining. + * + * @tparam T the decoded message value type, fixed by the `Schema` the builder + * was created with. + */ +template +class StreamConsumerBuilder { + public: + /** + * Subscribe to a single scalable topic. Mutually exclusive with `inNamespace()`; + * set exactly one. Calling this clears any namespace selection. + * + * @param t the fully-qualified topic name. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& topic(std::string t) { + config_.useNamespace = false; + config_.topic = std::move(t); + return *this; + } + + + /** + * Subscribe to all scalable topics in a namespace with live membership — topics + * created or removed later are joined/dropped automatically (spec §7.1). + * Mutually exclusive with `topic()`; set exactly one. + * + * Named `inNamespace` because `namespace` is a C++ keyword. + * + * @param ns the namespace (`tenant/namespace`) to subscribe across. + * @param propertyFilters optional AND filters matched against topic properties; + * only topics matching all entries are included. Default empty (no + * filtering — every topic in the namespace). + * @return `*this` for chaining. + */ + StreamConsumerBuilder& inNamespace(std::string ns, Properties propertyFilters = {}) { + config_.useNamespace = true; + config_.namespaceName = std::move(ns); + config_.propertyFilters = std::move(propertyFilters); + return *this; + } + /** + * REQUIRED. Set the subscription name shared by all consumers of this + * subscription. + * + * @param s the subscription name. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& subscriptionName(std::string s) { + config_.subscriptionName = std::move(s); + return *this; + } + /** + * Set where the subscription starts when first created. + * + * @param p the initial position. Default `SubscriptionInitialPosition::Latest`. + * Has no effect if the subscription already exists. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& subscriptionInitialPosition(SubscriptionInitialPosition p) { + config_.initialPosition = p; + return *this; + } + /** + * Attach key/value properties to the subscription itself (persisted broker-side). + * StreamConsumer only. + * + * @param p the subscription properties. Default empty. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& subscriptionProperties(Properties p) { + config_.subscriptionProperties = std::move(p); + return *this; + } + /** + * Set an explicit consumer name (useful for diagnostics and metrics). + * + * @param n the consumer name. Default unset, in which case the broker assigns one. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& consumerName(std::string n) { + config_.consumerName = std::move(n); + return *this; + } + + /** + * Tune acknowledgment behavior (e.g. the ack-grouping/batching window). + * + * @param policy the ack policy. Default-constructed `AckPolicy` when unset. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& ackPolicy(AckPolicy policy) { + config_.ackPolicy = std::move(policy); + return *this; + } + /** + * Read the topic's compacted view (latest value per key) instead of the full log. + * StreamConsumer only. + * + * @param b `true` to read compacted. Default unset (broker default: uncompacted). + * @return `*this` for chaining. + */ + StreamConsumerBuilder& readCompacted(bool b) { + config_.readCompacted = b; + return *this; + } + /** + * Replicate the subscription's acknowledged position across geo-replication + * clusters. StreamConsumer only. + * + * @param b `true` to enable. Default unset (broker default: disabled). + * @return `*this` for chaining. + */ + StreamConsumerBuilder& replicateSubscriptionState(bool b) { + config_.replicateSubscriptionState = b; + return *this; + } + /** + * Add a single client-side consumer property (reported in topic stats). Call + * repeatedly to add multiple; a repeated key overwrites the previous value. + * + * @param k property key. + * @param v property value. + * @return `*this` for chaining. + */ + StreamConsumerBuilder& property(const std::string& k, const std::string& v) { + config_.properties[k] = v; + return *this; + } + + /** + * Create the consumer and subscribe. Blocking. + * + * @return the live `StreamConsumer` on success, or an `Error` if the + * subscription failed (e.g. missing `subscriptionName`, both/neither of + * topic and namespace set, or a broker error). Call `.value()` to throw + * instead. + */ + Expected> subscribe() { return subscribeAsync().get(); } + /** + * Create the consumer and subscribe without blocking. + * + * @return a `Future>` completed with the live consumer, or with + * an `Error` on failure. + */ + Future> subscribeAsync() { + Schema schema = schema_; + StreamConsumerConfig config = config_; + config.schema = schema.info(); + return client_.subscribeStreamAsync(std::move(config)) + .thenApply([schema](const detail::StreamConsumerCore& core) { return StreamConsumer(core, schema); }); + } + + private: + friend class PulsarClient; + StreamConsumerBuilder(detail::ClientCore client, Schema schema) + : client_(std::move(client)), schema_(std::move(schema)) {} + + detail::ClientCore client_; + Schema schema_; + StreamConsumerConfig config_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/Transaction.h b/include/pulsar/st/Transaction.h new file mode 100644 index 00000000..0b69ba5b --- /dev/null +++ b/include/pulsar/st/Transaction.h @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace pulsar::st { + +class TransactionImpl; +using TransactionImplPtr = std::shared_ptr; +namespace detail { +class ClientCore; +class ProducerCore; +class StreamConsumerCore; +class QueueConsumerCore; +} // namespace detail + +/** + * @brief Lifecycle states of a transaction (spec §9). + * + * A transaction starts `Open`, transitions through a transient `Committing` or + * `Aborting` phase while the outcome is being made durable, and ends in one of the + * terminal states `Committed`, `Aborted`, `Error`, or `TimedOut`. Query the + * current state with `Transaction::state()`. + */ +enum class TransactionState { + Open, ///< Active: messages and acks may still be enlisted; not yet committed or aborted. + Committing, ///< Transient: a `commit()`/`commitAsync()` is in progress but not yet durable. + Aborting, ///< Transient: an `abort()`/`abortAsync()` is in progress but not yet finalized. + Committed, ///< Terminal: committed successfully; produced messages are visible and acks durable. + Aborted, ///< Terminal: aborted; produced messages are discarded and acks rolled back. + Error, ///< Terminal: a failure left the transaction in an unrecoverable state. + TimedOut, ///< Terminal: the transaction timeout elapsed before commit, so it was aborted. +}; + +/** + * @brief A transaction providing exactly-once semantics across multiple scalable + * topics and subscriptions (spec §9). + * + * Messages produced and acknowledgments made within a single transaction are + * applied atomically: on commit they all take effect, and on abort none of them + * do. This lets an application consume, transform, and produce across topics with + * no duplicates and no lost work. + * + * Typical usage: + * -# obtain a transaction from `PulsarClient::newTransaction()`; + * -# enlist publishes with `MessageBuilder::transaction(txn)` and acknowledgments + * with `consumer.acknowledge*(id, txn)`; + * -# call `commit()` to make the produced messages visible and the acks durable, + * or `abort()` to discard everything done within the transaction. + * + * A default-constructed `Transaction` is empty (falsy under `operator bool`) and + * must not be used to enlist or commit work. + * + * Holds the hidden `TransactionImpl`; its operations are defined in lib/st. + */ +class PULSAR_PUBLIC Transaction { + public: + /** @brief Construct an empty, unusable transaction (falsy under `operator bool`). */ + Transaction() = default; + + /** + * @brief Return the current lifecycle state of this transaction. + * + * @return The current `TransactionState`. + */ + TransactionState state() const; + + /** + * @brief Commit the transaction: make produced messages visible and acks durable. + * + * Atomically applies every publish and acknowledgment enlisted in this + * transaction. Blocks until the outcome is durable. + * + * @return `Expected` holding success, or an `Error` if the commit failed. + */ + Expected commit() { return commitAsync().get(); } + + /** + * @brief Asynchronously commit the transaction. + * + * Non-blocking counterpart of `commit()`; the returned future completes when the + * commit is durable or fails. + * + * @return `Future` resolving to success, or an `Error` on failure. + */ + Future commitAsync() const; + + /** + * @brief Abort the transaction: discard produced messages and roll back acks. + * + * Atomically discards every publish and acknowledgment enlisted in this + * transaction, as if none had happened. Blocks until the abort is finalized. + * + * @return `Expected` holding success, or an `Error` if the abort failed. + */ + Expected abort() { return abortAsync().get(); } + + /** + * @brief Asynchronously abort the transaction. + * + * Non-blocking counterpart of `abort()`; the returned future completes when the + * abort is finalized or fails. + * + * @return `Future` resolving to success, or an `Error` on failure. + */ + Future abortAsync() const; + + /** + * @brief Test whether this object wraps a live transaction. + * + * @return `true` for a transaction obtained from `PulsarClient::newTransaction()`; + * `false` for a default-constructed (empty) one. + */ + explicit operator bool() const { return static_cast(impl_); } + + private: + // The client constructs a Transaction; the producer/consumer cores read its + // impl to enlist sends and acks. All of these live in lib/st. + friend class detail::ClientCore; + friend class detail::ProducerCore; + friend class detail::StreamConsumerCore; + friend class detail::QueueConsumerCore; + explicit Transaction(TransactionImplPtr impl) : impl_(std::move(impl)) {} + + TransactionImplPtr impl_; +}; + +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/CheckpointConsumerCore.h b/include/pulsar/st/detail/CheckpointConsumerCore.h new file mode 100644 index 00000000..9b3a67c7 --- /dev/null +++ b/include/pulsar/st/detail/CheckpointConsumerCore.h @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace pulsar::st { + +class CheckpointConsumerImpl; +using CheckpointConsumerImplPtr = std::shared_ptr; + +namespace detail { + +class ClientCore; + +/** + * INTERNAL — not part of the public API. Non-templated checkpoint-consumer + * operations over the hidden impl (lib/st). `CheckpointConsumer` wraps it. + */ +class PULSAR_PUBLIC CheckpointConsumerCore { + public: + CheckpointConsumerCore() = default; + + Future receiveAsync() const; + Future receiveAsync(int64_t timeoutMs) const; + Future> receiveMultiAsync(int maxMessages, int64_t timeoutMs) const; + Checkpoint checkpoint() const; + Future closeAsync() const; + const std::string& topic() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class ClientCore; + explicit CheckpointConsumerCore(CheckpointConsumerImplPtr impl) : impl_(std::move(impl)) {} + + CheckpointConsumerImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/ClientCore.h b/include/pulsar/st/detail/ClientCore.h new file mode 100644 index 00000000..e7c957ab --- /dev/null +++ b/include/pulsar/st/detail/ClientCore.h @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace pulsar::st { + +class ClientImpl; +using ClientImplPtr = std::shared_ptr; +class Transaction; +class PulsarClientBuilder; +struct ProducerConfig; +struct StreamConsumerConfig; +struct QueueConsumerConfig; +struct CheckpointConsumerConfig; + +namespace detail { + +class ProducerCore; +class StreamConsumerCore; +class QueueConsumerCore; +class CheckpointConsumerCore; + +/** + * INTERNAL — not part of the public API. The non-templated client operations that + * the templated builders call across to reach the core in lib/st (the typed-API + * equivalent of the methods today's non-templated `Client` defines out-of-line). + * Holds the hidden `ClientImpl`; applications use `PulsarClient`, not this. + */ +class PULSAR_PUBLIC ClientCore { + public: + ClientCore() = default; + + Future createProducerAsync(ProducerConfig config) const; + Future subscribeStreamAsync(StreamConsumerConfig config) const; + Future subscribeQueueAsync(QueueConsumerConfig config) const; + Future createCheckpointAsync(CheckpointConsumerConfig config) const; + Future newTransactionAsync() const; + Future closeAsync() const; + void shutdown() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class pulsar::st::PulsarClientBuilder; + explicit ClientCore(ClientImplPtr impl) : impl_(std::move(impl)) {} + + ClientImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/Cxx20.h b/include/pulsar/st/detail/Cxx20.h new file mode 100644 index 00000000..1fae2385 --- /dev/null +++ b/include/pulsar/st/detail/Cxx20.h @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +// The pulsar::st (scalable topics) API targets C++20. The rest of the Pulsar C++ +// client remains C++17 — only this new API requires C++20 (for concepts, +// coroutine-awaitable Future, `using enum`, reflection-based schemas, etc.). +#if (defined(_MSVC_LANG) ? _MSVC_LANG : __cplusplus) < 202002L +#error "pulsar::st (scalable topics) requires C++20. Build this translation unit with -std=c++20 (or /std:c++20)." +#endif diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h new file mode 100644 index 00000000..3c32ccdd --- /dev/null +++ b/include/pulsar/st/detail/MessageCore.h @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace pulsar::st { + +class MessageImpl; +using MessageImplPtr = std::shared_ptr; + +using Timestamp = std::chrono::system_clock::time_point; +using Properties = std::map; + +namespace detail { + +class StreamConsumerCore; +class QueueConsumerCore; +class CheckpointConsumerCore; + +/** + * INTERNAL — not part of the public API. Non-templated, byte-oriented view of a + * received message; its accessors are defined in lib/st. `Message` wraps this + * and decodes the payload through `Schema`. + */ +class PULSAR_PUBLIC MessageCore { + public: + MessageCore() = default; + + const char* data() const; + std::size_t size() const; + MessageId id() const; + bool hasKey() const; + const std::string& key() const; + const Properties& properties() const; + int64_t publishTimeMs() const; + int64_t eventTimeMs() const; // 0 if unset + int64_t sequenceId() const; + bool hasProducerName() const; + const std::string& producerName() const; + const std::string& topic() const; + int redeliveryCount() const; + bool hasReplicatedFrom() const; + const std::string& replicatedFrom() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class StreamConsumerCore; + friend class QueueConsumerCore; + friend class CheckpointConsumerCore; + explicit MessageCore(MessageImplPtr impl) : impl_(std::move(impl)) {} + + MessageImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/ProducerCore.h b/include/pulsar/st/detail/ProducerCore.h new file mode 100644 index 00000000..543eb750 --- /dev/null +++ b/include/pulsar/st/detail/ProducerCore.h @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace pulsar::st { + +class ProducerImplBase; +using ProducerImplPtr = std::shared_ptr; +struct OutgoingMessage; + +namespace detail { + +class ClientCore; + +/** + * INTERNAL — not part of the public API. Non-templated producer operations over + * the hidden `ProducerImpl` (defined in lib/st). `Producer` / `MessageBuilder` + * are thin wrappers over it. + */ +class PULSAR_PUBLIC ProducerCore { + public: + ProducerCore() = default; + + Future sendAsync(OutgoingMessage message) const; + const std::string& topic() const; + const std::string& name() const; + int64_t lastSequenceId() const; + Future flushAsync() const; + Future closeAsync() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class ClientCore; + explicit ProducerCore(ProducerImplPtr impl) : impl_(std::move(impl)) {} + + ProducerImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/QueueConsumerCore.h b/include/pulsar/st/detail/QueueConsumerCore.h new file mode 100644 index 00000000..ef8e0035 --- /dev/null +++ b/include/pulsar/st/detail/QueueConsumerCore.h @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace pulsar::st { + +class QueueConsumerImpl; +using QueueConsumerImplPtr = std::shared_ptr; +class Transaction; + +namespace detail { + +class ClientCore; + +/** + * INTERNAL — not part of the public API. Non-templated queue-consumer operations + * over the hidden impl (lib/st). `QueueConsumer` is a thin wrapper over it. + */ +class PULSAR_PUBLIC QueueConsumerCore { + public: + QueueConsumerCore() = default; + + Future receiveAsync() const; + Future receiveAsync(int64_t timeoutMs) const; + void acknowledge(const MessageId& id) const; + void acknowledge(const MessageId& id, const Transaction& txn) const; + void negativeAcknowledge(const MessageId& id) const; + Future closeAsync() const; + const std::string& topic() const; + const std::string& subscription() const; + const std::string& consumerName() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class ClientCore; + explicit QueueConsumerCore(QueueConsumerImplPtr impl) : impl_(std::move(impl)) {} + + QueueConsumerImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/include/pulsar/st/detail/SharedState.h b/include/pulsar/st/detail/SharedState.h new file mode 100644 index 00000000..ccbfe4a7 --- /dev/null +++ b/include/pulsar/st/detail/SharedState.h @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +// INTERNAL. The set-once completion state behind Future/Promise. It lives +// in pulsar::st::detail and is not part of the public API; applications use +// Future only. It must sit in a header because Future/Promise are templates. + +namespace pulsar::st::detail { + +template +class SharedState { + public: + using Listener = std::function&)>; + + bool complete(Expected result) { + std::unique_lock lock(mutex_); + if (result_.has_value()) { + return false; + } + result_.emplace(std::move(result)); + cond_.notify_all(); + std::vector listeners = std::move(listeners_); + listeners_.clear(); + lock.unlock(); + for (auto& listener : listeners) { + listener(*result_); + } + return true; + } + + void addListener(Listener listener) { + std::unique_lock lock(mutex_); + if (result_.has_value()) { + Expected snapshot = *result_; + lock.unlock(); + listener(snapshot); + } else { + listeners_.push_back(std::move(listener)); + } + } + + Expected get() { + std::unique_lock lock(mutex_); + cond_.wait(lock, [this] { return result_.has_value(); }); + return *result_; + } + + template + std::optional> get(std::chrono::duration timeout) { + std::unique_lock lock(mutex_); + if (!cond_.wait_for(lock, timeout, [this] { return result_.has_value(); })) { + return std::nullopt; + } + return *result_; + } + + bool isReady() const { + std::lock_guard lock(mutex_); + return result_.has_value(); + } + + private: + mutable std::mutex mutex_; + std::condition_variable cond_; + std::optional> result_; + std::vector listeners_; +}; + +} // namespace pulsar::st::detail diff --git a/include/pulsar/st/detail/StreamConsumerCore.h b/include/pulsar/st/detail/StreamConsumerCore.h new file mode 100644 index 00000000..4952d7ba --- /dev/null +++ b/include/pulsar/st/detail/StreamConsumerCore.h @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace pulsar::st { + +class StreamConsumerImpl; +using StreamConsumerImplPtr = std::shared_ptr; +class Transaction; + +namespace detail { + +class ClientCore; + +/** + * INTERNAL — not part of the public API. Non-templated stream-consumer operations + * over the hidden impl (lib/st). `StreamConsumer` is a thin wrapper over it. + */ +class PULSAR_PUBLIC StreamConsumerCore { + public: + StreamConsumerCore() = default; + + Future receiveAsync() const; + Future receiveAsync(int64_t timeoutMs) const; + Future> receiveMultiAsync(int maxMessages, int64_t timeoutMs) const; + void acknowledgeCumulative(const MessageId& id) const; + void acknowledgeCumulative(const MessageId& id, const Transaction& txn) const; + Future closeAsync() const; + const std::string& topic() const; + const std::string& subscription() const; + const std::string& consumerName() const; + + explicit operator bool() const { return static_cast(impl_); } + + private: + friend class ClientCore; + explicit StreamConsumerCore(StreamConsumerImplPtr impl) : impl_(std::move(impl)) {} + + StreamConsumerImplPtr impl_; +}; + +} // namespace detail +} // namespace pulsar::st diff --git a/vcpkg.json b/vcpkg.json index 3452492d..721371da 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -43,6 +43,10 @@ "name": "protobuf", "version>=": "6.33.4#1" }, + { + "name": "reflectcpp", + "version>=": "0.24.0" + }, { "name": "snappy", "version>=": "1.2.2" From fa4def33d7d427983052e3553b6c254f5d8ac77d Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 23 Jun 2026 12:28:38 -0700 Subject: [PATCH 02/39] Fix CI: clang-format the st sources; make reflect-cpp optional - Apply clang-format-11 to the new pulsar::st headers and examples (the Formatting Check uses clang-format 11; local 18 formats differently). - examples/CMakeLists.txt: build the four dependency-free st samples unconditionally and add the reflect-cpp JSON sample only when reflectcpp is found (find_package CONFIG QUIET instead of REQUIRED), so configure no longer fails where reflect-cpp is absent (e.g. the CodeQL/Analyze job). - vcpkg.json: drop the reflectcpp dependency for now; it returns with the lib/st implementation that actually exercises the JSON/Avro schemas. Signed-off-by: Matteo Merli --- examples/CMakeLists.txt | 25 ++++++++++++------- examples/st/SampleStProducer.cc | 7 ++++-- examples/st/SampleStQueueConsumer.cc | 2 +- include/pulsar/st/AvroSchema.h | 3 +-- include/pulsar/st/CheckpointConsumer.h | 21 ++++++++-------- include/pulsar/st/Client.h | 2 +- include/pulsar/st/Consumer.h | 15 ++++++++---- include/pulsar/st/Error.h | 3 ++- include/pulsar/st/Expected.h | 3 +-- include/pulsar/st/Future.h | 2 +- include/pulsar/st/JsonSchema.h | 3 +-- include/pulsar/st/Message.h | 3 ++- include/pulsar/st/Policies.h | 33 +++++++++++++++++--------- include/pulsar/st/Producer.h | 21 ++++++++-------- include/pulsar/st/QueueConsumer.h | 11 ++++----- include/pulsar/st/Schema.h | 14 +++++++---- include/pulsar/st/StreamConsumer.h | 12 ++++------ include/pulsar/st/Transaction.h | 3 ++- include/pulsar/st/detail/Cxx20.h | 3 ++- vcpkg.json | 4 ---- 20 files changed, 106 insertions(+), 84 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 81796c09..f0c0db2f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -116,22 +116,31 @@ target_link_libraries(SampleCustomLoggerCApi ${CLIENT_LIBS} pulsarShar # TODO(scalable-topics): once lib/st lands, replace this with one # add_executable + target_link_libraries(... pulsarShared) per file, exactly like # the samples above. +# The core samples are header-only previews of the pulsar::st API and build with +# no extra dependency. set(SAMPLE_ST_SOURCES st/SampleStProducer.cc st/SampleStStreamConsumer.cc st/SampleStQueueConsumer.cc st/SampleStCheckpointConsumer.cc - st/SampleStJsonSchema.cc ) -# reflect-cpp powers jsonSchema() (reflection-based JSON SerDe + schema) and is -# a required dependency of the scalable-topics API. -find_package(reflectcpp CONFIG REQUIRED) +# reflect-cpp powers jsonSchema() (reflection-based JSON SerDe + schema). It is +# optional for this API-only PR: when the package is present the JSON sample is +# added and linked against it; when absent, only that one sample is skipped. (The +# reflectcpp vcpkg port does not yet ship an Avro backend, so it is not yet wired +# into the manifest; it will be added with the lib/st implementation.) +find_package(reflectcpp CONFIG QUIET) +if (reflectcpp_FOUND) + list(APPEND SAMPLE_ST_SOURCES st/SampleStJsonSchema.cc) +endif () add_library(StExamples OBJECT ${SAMPLE_ST_SOURCES}) # The scalable-topics (pulsar::st) API targets C++20; the rest of the client stays # C++17. Set the standard per-target so only this code requires C++20. set_target_properties(StExamples PROPERTIES CXX_STANDARD 20 CXX_STANDARD_REQUIRED ON) -# PRIVATE link gives the object sources pulsarShared's and reflect-cpp's include -# directories; an OBJECT library is not itself linked, so the missing lib/st -# symbols are fine. -target_link_libraries(StExamples PRIVATE ${CLIENT_LIBS} pulsarShared reflectcpp::reflectcpp) +# PRIVATE link gives the object sources pulsarShared's include directories; an +# OBJECT library is not itself linked, so the missing lib/st symbols are fine. +target_link_libraries(StExamples PRIVATE ${CLIENT_LIBS} pulsarShared) +if (reflectcpp_FOUND) + target_link_libraries(StExamples PRIVATE reflectcpp::reflectcpp) +endif () diff --git a/examples/st/SampleStProducer.cc b/examples/st/SampleStProducer.cc index 2252c3dd..8f7431a1 100644 --- a/examples/st/SampleStProducer.cc +++ b/examples/st/SampleStProducer.cc @@ -59,8 +59,11 @@ int main() { } // Asynchronous send: react on completion without blocking. - producer.newMessage().key("order-async").value("async-payload").sendAsync().addListener( - [](const Expected& result) { + producer.newMessage() + .key("order-async") + .value("async-payload") + .sendAsync() + .addListener([](const Expected& result) { if (result) { std::cout << "async sent " << *result << "\n"; } else { diff --git a/examples/st/SampleStQueueConsumer.cc b/examples/st/SampleStQueueConsumer.cc index f8171440..1efac129 100644 --- a/examples/st/SampleStQueueConsumer.cc +++ b/examples/st/SampleStQueueConsumer.cc @@ -56,7 +56,7 @@ int main() { const bool processed = !msg->value().empty(); if (processed) { - consumer.acknowledge(msg->id()); // fire-and-forget; never blocks or errors + consumer.acknowledge(msg->id()); // fire-and-forget; never blocks or errors } else { consumer.negativeAcknowledge(msg->id()); // schedule redelivery } diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h index a0ac13eb..ccf88656 100644 --- a/include/pulsar/st/AvroSchema.h +++ b/include/pulsar/st/AvroSchema.h @@ -20,10 +20,9 @@ #include +#include #include #include - -#include #include // avroSchema() is the Avro counterpart of jsonSchema(): reflect-cpp derives diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index e5bb80d7..8f063706 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -20,12 +20,12 @@ #include #include -#include -#include #include #include #include #include +#include +#include #include #include @@ -36,7 +36,6 @@ namespace pulsar::st { - /** * @brief Configuration accumulated by `CheckpointConsumerBuilder`. * @@ -46,16 +45,16 @@ namespace pulsar::st { */ struct CheckpointConsumerConfig { std::string topic; ///< Scalable topic to read. REQUIRED; no default. - Checkpoint startPosition = Checkpoint::latest(); ///< Position to start from. Default `Checkpoint::latest()`. - std::optional consumerGroup; ///< Consumer group to join. Unset (default) => ungrouped, reads every segment. - std::optional consumerName; ///< Human-readable consumer name. Unset (default) => auto-generated. - Properties properties; ///< Free-form key/value metadata attached to the consumer. Default empty. - SchemaInfo schema; ///< Schema descriptor; filled in from `Schema` by the builder. + Checkpoint startPosition = + Checkpoint::latest(); ///< Position to start from. Default `Checkpoint::latest()`. + std::optional + consumerGroup; ///< Consumer group to join. Unset (default) => ungrouped, reads every segment. + std::optional + consumerName; ///< Human-readable consumer name. Unset (default) => auto-generated. + Properties properties; ///< Free-form key/value metadata attached to the consumer. Default empty. + SchemaInfo schema; ///< Schema descriptor; filled in from `Schema` by the builder. }; - - - template class CheckpointConsumerBuilder; diff --git a/include/pulsar/st/Client.h b/include/pulsar/st/Client.h index 3cd82529..be0eedc4 100644 --- a/include/pulsar/st/Client.h +++ b/include/pulsar/st/Client.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -30,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/include/pulsar/st/Consumer.h b/include/pulsar/st/Consumer.h index 81b430b9..8c346656 100644 --- a/include/pulsar/st/Consumer.h +++ b/include/pulsar/st/Consumer.h @@ -38,7 +38,8 @@ namespace pulsar::st { * created. It is ignored once the subscription exists and has a durable cursor: * an already-established subscription always resumes from its stored position. */ -enum class SubscriptionInitialPosition { +enum class SubscriptionInitialPosition +{ Earliest, ///< Start from the oldest available message on the topic. Latest ///< Start from the newest message, skipping anything published before subscribing. }; @@ -51,9 +52,11 @@ enum class SubscriptionInitialPosition { * optional and fall back to the client default when unset. */ struct AckPolicy { - /** Time window over which acknowledgments are batched before being sent, in milliseconds; 0 acks immediately. Unset uses the client default. */ + /** Time window over which acknowledgments are batched before being sent, in milliseconds; 0 acks + * immediately. Unset uses the client default. */ std::optional groupTime; - /** Delay before a negatively-acknowledged message is redelivered, in milliseconds. QueueConsumer only. Unset uses the client default. */ + /** Delay before a negatively-acknowledged message is redelivered, in milliseconds. QueueConsumer only. + * Unset uses the client default. */ std::optional negativeAckRedeliveryDelay; }; @@ -66,11 +69,13 @@ struct AckPolicy { * to a positive value. */ struct DeadLetterPolicy { - /** Maximum number of redeliveries before a message is routed to the dead-letter topic. Defaults to 0, which disables dead-lettering. */ + /** Maximum number of redeliveries before a message is routed to the dead-letter topic. Defaults to 0, + * which disables dead-lettering. */ int maxRedeliverCount = 0; /** Name of the dead-letter topic. Unset defaults to "<topic>-<subscription>-DLQ". */ std::optional deadLetterTopic; - /** If set, creates this subscription on the dead-letter topic up front so no messages are missed before a consumer attaches. Unset creates no initial subscription. */ + /** If set, creates this subscription on the dead-letter topic up front so no messages are missed before a + * consumer attaches. Unset creates no initial subscription. */ std::optional initialSubscriptionName; }; diff --git a/include/pulsar/st/Error.h b/include/pulsar/st/Error.h index 9e90e016..bc9bd97a 100644 --- a/include/pulsar/st/Error.h +++ b/include/pulsar/st/Error.h @@ -38,7 +38,8 @@ namespace pulsar::st { using pulsar::Error; /** Re-export of `pulsar::Result`: the enumeration of machine-readable result codes. */ using pulsar::Result; -/** Re-export the `Result` enumerators into `pulsar::st` so they are usable unqualified (e.g. `ResultTimeout`). */ +/** Re-export the `Result` enumerators into `pulsar::st` so they are usable unqualified (e.g. + * `ResultTimeout`). */ using enum pulsar::Result; /** diff --git a/include/pulsar/st/Expected.h b/include/pulsar/st/Expected.h index 05871493..3ea2f5cb 100644 --- a/include/pulsar/st/Expected.h +++ b/include/pulsar/st/Expected.h @@ -216,8 +216,7 @@ class [[nodiscard]] Expected { template auto transform(F&& f) const& { using U = std::remove_cv_t>>; - return has_value() ? Expected(std::forward(f)(std::get<0>(storage_))) - : Expected(error()); + return has_value() ? Expected(std::forward(f)(std::get<0>(storage_))) : Expected(error()); } /** diff --git a/include/pulsar/st/Future.h b/include/pulsar/st/Future.h index 000f06f9..8e2f6c26 100644 --- a/include/pulsar/st/Future.h +++ b/include/pulsar/st/Future.h @@ -23,9 +23,9 @@ #include #include +#include #include #include -#include #include #include diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h index 03af1a22..15fa6987 100644 --- a/include/pulsar/st/JsonSchema.h +++ b/include/pulsar/st/JsonSchema.h @@ -20,10 +20,9 @@ #include +#include #include #include - -#include #include // jsonSchema() derives BOTH the JSON SerDe and the declared schema from T's diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index 055ba7a7..f029521c 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -55,7 +55,8 @@ class Message { * @param core the raw received message (payload and metadata). * @param schema the schema used to decode the payload in `value()`. */ - Message(detail::MessageCore core, Schema schema) : core_(std::move(core)), schema_(std::move(schema)) {} + Message(detail::MessageCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} /** * Decode the payload through `Schema` and return the typed value. diff --git a/include/pulsar/st/Policies.h b/include/pulsar/st/Policies.h index 5ffb44ba..29c0d739 100644 --- a/include/pulsar/st/Policies.h +++ b/include/pulsar/st/Policies.h @@ -90,17 +90,22 @@ struct MemorySize { struct ConnectionPolicy { /** Number of physical connections opened to each broker. Unset uses the client default. */ std::optional connectionsPerBroker; - /** Maximum time to wait for a TCP/TLS connection to be established, in milliseconds. Unset uses the client default. */ + /** Maximum time to wait for a TCP/TLS connection to be established, in milliseconds. Unset uses the + * client default. */ std::optional connectionTimeout; - /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete, in milliseconds. Unset uses the client default. */ + /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete, in + * milliseconds. Unset uses the client default. */ std::optional operationTimeout; - /** Interval between keep-alive pings sent on an idle connection, in seconds. Unset uses the client default. */ + /** Interval between keep-alive pings sent on an idle connection, in seconds. Unset uses the client + * default. */ std::optional keepAliveInterval; /** Maximum number of concurrent topic-lookup requests in flight. Unset uses the client default. */ std::optional maxLookupRequests; - /** Maximum number of lookup redirects to follow before failing a lookup. Unset uses the client default. */ + /** Maximum number of lookup redirects to follow before failing a lookup. Unset uses the client default. + */ std::optional maxLookupRedirects; - /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the client default. */ + /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the + * client default. */ std::optional maxConnectionIdleTime; }; @@ -114,7 +119,8 @@ struct ConnectionPolicy { struct BackoffPolicy { /** Delay before the first reconnection attempt, in milliseconds. Unset uses the client default. */ std::optional initialBackoff; - /** Upper bound on the backoff delay as it grows across retries, in milliseconds. Unset uses the client default. */ + /** Upper bound on the backoff delay as it grows across retries, in milliseconds. Unset uses the client + * default. */ std::optional maxBackoff; }; @@ -128,13 +134,17 @@ struct BackoffPolicy { struct TlsPolicy { /** Whether TLS is used for broker connections. Defaults to false (plaintext). */ bool enabled = false; - /** Path to the PEM file of trusted CA certificates used to verify the broker. Unset uses the system trust store. */ + /** Path to the PEM file of trusted CA certificates used to verify the broker. Unset uses the system trust + * store. */ std::optional trustCertsFilePath; - /** Path to the client certificate PEM file, for mutual TLS. Unset disables client-certificate authentication. */ + /** Path to the client certificate PEM file, for mutual TLS. Unset disables client-certificate + * authentication. */ std::optional certificateFilePath; - /** Path to the client private key PEM file, for mutual TLS. Unset disables client-certificate authentication. */ + /** Path to the client private key PEM file, for mutual TLS. Unset disables client-certificate + * authentication. */ std::optional privateKeyFilePath; - /** Whether to accept the broker's certificate without validating it against the trust store. Defaults to false (validation enforced). */ + /** Whether to accept the broker's certificate without validating it against the trust store. Defaults to + * false (validation enforced). */ bool allowInsecureConnection = false; /** Whether to verify that the broker's certificate hostname matches the endpoint. Defaults to true. */ bool validateHostname = true; @@ -148,7 +158,8 @@ struct TlsPolicy { * optional and the client supplies a built-in default when it is unset. */ struct TransactionPolicy { - /** Default lifetime of a transaction before it is automatically aborted, in milliseconds. Unset uses the client default. */ + /** Default lifetime of a transaction before it is automatically aborted, in milliseconds. Unset uses the + * client default. */ std::optional timeout; }; diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 0fe4fc10..be1774e6 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -19,8 +19,6 @@ #pragma once #include -#include -#include #include #include #include @@ -28,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -45,7 +45,8 @@ namespace pulsar::st { * and controls how the broker arbitrates between multiple producers on the same * topic. */ -enum class ProducerAccessMode { +enum class ProducerAccessMode +{ /** Multiple producers may publish to the topic concurrently. The default. */ Shared, /** Only one producer may be active at a time; another producer requesting @@ -106,9 +107,9 @@ struct OutgoingMessage { std::string key; /** Per-message user metadata. Empty by default. */ Properties properties; - int64_t eventTimeMs = 0; ///< Application event time, epoch ms; 0 = unset. - int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. - int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. + int64_t eventTimeMs = 0; ///< Application event time, epoch ms; 0 = unset. + int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. + int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. /** Target clusters for geo-replication; empty applies the topic's default. */ std::vector replicationClusters; std::optional transaction; ///< Enlisting transaction; unset = non-transactional. @@ -350,7 +351,8 @@ class Producer { private: template friend class ProducerBuilder; - Producer(detail::ProducerCore core, Schema schema) : core_(std::move(core)), schema_(std::move(schema)) {} + Producer(detail::ProducerCore core, Schema schema) + : core_(std::move(core)), schema_(std::move(schema)) {} detail::ProducerCore core_; Schema schema_; @@ -460,9 +462,8 @@ class ProducerBuilder { Schema schema = schema_; ProducerConfig config = config_; config.schema = schema.info(); - return client_.createProducerAsync(std::move(config)).thenApply([schema](const detail::ProducerCore& core) { - return Producer(core, schema); - }); + return client_.createProducerAsync(std::move(config)) + .thenApply([schema](const detail::ProducerCore& core) { return Producer(core, schema); }); } private: diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index c82f2256..42aae798 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -19,8 +19,6 @@ #pragma once #include -#include -#include #include #include #include @@ -28,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -38,7 +38,6 @@ namespace pulsar::st { - /** * Plain-old-data configuration accumulated by `QueueConsumerBuilder`. * @@ -83,9 +82,6 @@ struct QueueConsumerConfig { SchemaInfo schema; }; - - - template class QueueConsumerBuilder; @@ -356,7 +352,8 @@ class QueueConsumerBuilder { QueueConsumerConfig config = config_; config.schema = schema.info(); return client_.subscribeQueueAsync(std::move(config)) - .thenApply([schema](const detail::QueueConsumerCore& core) { return QueueConsumer(core, schema); }); + .thenApply( + [schema](const detail::QueueConsumerCore& core) { return QueueConsumer(core, schema); }); } private: diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index 1ad5d649..856bb021 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -67,9 +67,12 @@ using Bytes = std::vector; */ template concept SerDeFor = requires(const S& serde, const T& value, const char* data, std::size_t size) { - { serde.info() } -> std::convertible_to; - { serde.encode(value) } -> std::convertible_to; - { serde.decode(data, size) } -> std::convertible_to; + { serde.info() } + ->std::convertible_to; + { serde.encode(value) } + ->std::convertible_to; + { serde.decode(data, size) } + ->std::convertible_to; }; /** @@ -126,8 +129,9 @@ class Schema { * @param serde the SerDe to adopt; taken by value and stored. */ template - requires(!std::is_same_v, Schema> && SerDeFor, T>) - Schema(SerDe serde) : self_(std::make_shared>>(std::move(serde))) {} + requires(!std::is_same_v, Schema> && SerDeFor, T>) + Schema(SerDe serde) + : self_(std::make_shared>>(std::move(serde))) {} /** * @brief Returns the schema description sent to the broker for compatibility. diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index a0f7c198..8775f890 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -19,8 +19,6 @@ #pragma once #include -#include -#include #include #include #include @@ -28,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -38,7 +38,6 @@ namespace pulsar::st { - /** * Plain-old-data configuration accumulated by `StreamConsumerBuilder`. * @@ -89,9 +88,6 @@ struct StreamConsumerConfig { SchemaInfo schema; }; - - - template class StreamConsumerBuilder; @@ -271,7 +267,6 @@ class StreamConsumerBuilder { return *this; } - /** * Subscribe to all scalable topics in a namespace with live membership — topics * created or removed later are joined/dropped automatically (spec §7.1). @@ -400,7 +395,8 @@ class StreamConsumerBuilder { StreamConsumerConfig config = config_; config.schema = schema.info(); return client_.subscribeStreamAsync(std::move(config)) - .thenApply([schema](const detail::StreamConsumerCore& core) { return StreamConsumer(core, schema); }); + .thenApply( + [schema](const detail::StreamConsumerCore& core) { return StreamConsumer(core, schema); }); } private: diff --git a/include/pulsar/st/Transaction.h b/include/pulsar/st/Transaction.h index 0b69ba5b..a5e7fe01 100644 --- a/include/pulsar/st/Transaction.h +++ b/include/pulsar/st/Transaction.h @@ -43,7 +43,8 @@ class QueueConsumerCore; * terminal states `Committed`, `Aborted`, `Error`, or `TimedOut`. Query the * current state with `Transaction::state()`. */ -enum class TransactionState { +enum class TransactionState +{ Open, ///< Active: messages and acks may still be enlisted; not yet committed or aborted. Committing, ///< Transient: a `commit()`/`commitAsync()` is in progress but not yet durable. Aborting, ///< Transient: an `abort()`/`abortAsync()` is in progress but not yet finalized. diff --git a/include/pulsar/st/detail/Cxx20.h b/include/pulsar/st/detail/Cxx20.h index 1fae2385..698a129d 100644 --- a/include/pulsar/st/detail/Cxx20.h +++ b/include/pulsar/st/detail/Cxx20.h @@ -22,5 +22,6 @@ // client remains C++17 — only this new API requires C++20 (for concepts, // coroutine-awaitable Future, `using enum`, reflection-based schemas, etc.). #if (defined(_MSVC_LANG) ? _MSVC_LANG : __cplusplus) < 202002L -#error "pulsar::st (scalable topics) requires C++20. Build this translation unit with -std=c++20 (or /std:c++20)." +#error \ + "pulsar::st (scalable topics) requires C++20. Build this translation unit with -std=c++20 (or /std:c++20)." #endif diff --git a/vcpkg.json b/vcpkg.json index 721371da..3452492d 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -43,10 +43,6 @@ "name": "protobuf", "version>=": "6.33.4#1" }, - { - "name": "reflectcpp", - "version>=": "0.24.0" - }, { "name": "snappy", "version>=": "1.2.2" From afda5578d93aa4cc4c8461ec32365ed5e80e5bf4 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 23 Jun 2026 14:41:19 -0700 Subject: [PATCH 03/39] Fix CI: give st config-struct fields default member initializers GCC's -Wmissing-field-initializers (-Wextra, and the build is -Werror) fires on a partial designated-initializer such as .deadLetterPolicy({.maxRedeliverCount = 5}) for every omitted member that lacks a default member initializer. clang does not warn, so this was missed locally. Give every optional field in the user-facing policy/ack/DLQ structs an '= std::nullopt' NSDMI so designated-init of any subset is warning-clean. Verified with gcc:13 -Wextra -Werror against all four st examples. Signed-off-by: Matteo Merli --- include/pulsar/st/Consumer.h | 8 ++++---- include/pulsar/st/Policies.h | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/pulsar/st/Consumer.h b/include/pulsar/st/Consumer.h index 8c346656..a5598126 100644 --- a/include/pulsar/st/Consumer.h +++ b/include/pulsar/st/Consumer.h @@ -54,10 +54,10 @@ enum class SubscriptionInitialPosition struct AckPolicy { /** Time window over which acknowledgments are batched before being sent, in milliseconds; 0 acks * immediately. Unset uses the client default. */ - std::optional groupTime; + std::optional groupTime = std::nullopt; /** Delay before a negatively-acknowledged message is redelivered, in milliseconds. QueueConsumer only. * Unset uses the client default. */ - std::optional negativeAckRedeliveryDelay; + std::optional negativeAckRedeliveryDelay = std::nullopt; }; /** @@ -73,10 +73,10 @@ struct DeadLetterPolicy { * which disables dead-lettering. */ int maxRedeliverCount = 0; /** Name of the dead-letter topic. Unset defaults to "<topic>-<subscription>-DLQ". */ - std::optional deadLetterTopic; + std::optional deadLetterTopic = std::nullopt; /** If set, creates this subscription on the dead-letter topic up front so no messages are missed before a * consumer attaches. Unset creates no initial subscription. */ - std::optional initialSubscriptionName; + std::optional initialSubscriptionName = std::nullopt; }; } // namespace pulsar::st diff --git a/include/pulsar/st/Policies.h b/include/pulsar/st/Policies.h index 29c0d739..dd2b6a0d 100644 --- a/include/pulsar/st/Policies.h +++ b/include/pulsar/st/Policies.h @@ -89,24 +89,24 @@ struct MemorySize { */ struct ConnectionPolicy { /** Number of physical connections opened to each broker. Unset uses the client default. */ - std::optional connectionsPerBroker; + std::optional connectionsPerBroker = std::nullopt; /** Maximum time to wait for a TCP/TLS connection to be established, in milliseconds. Unset uses the * client default. */ - std::optional connectionTimeout; + std::optional connectionTimeout = std::nullopt; /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete, in * milliseconds. Unset uses the client default. */ - std::optional operationTimeout; + std::optional operationTimeout = std::nullopt; /** Interval between keep-alive pings sent on an idle connection, in seconds. Unset uses the client * default. */ - std::optional keepAliveInterval; + std::optional keepAliveInterval = std::nullopt; /** Maximum number of concurrent topic-lookup requests in flight. Unset uses the client default. */ - std::optional maxLookupRequests; + std::optional maxLookupRequests = std::nullopt; /** Maximum number of lookup redirects to follow before failing a lookup. Unset uses the client default. */ - std::optional maxLookupRedirects; + std::optional maxLookupRedirects = std::nullopt; /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the * client default. */ - std::optional maxConnectionIdleTime; + std::optional maxConnectionIdleTime = std::nullopt; }; /** @@ -118,10 +118,10 @@ struct ConnectionPolicy { */ struct BackoffPolicy { /** Delay before the first reconnection attempt, in milliseconds. Unset uses the client default. */ - std::optional initialBackoff; + std::optional initialBackoff = std::nullopt; /** Upper bound on the backoff delay as it grows across retries, in milliseconds. Unset uses the client * default. */ - std::optional maxBackoff; + std::optional maxBackoff = std::nullopt; }; /** @@ -136,13 +136,13 @@ struct TlsPolicy { bool enabled = false; /** Path to the PEM file of trusted CA certificates used to verify the broker. Unset uses the system trust * store. */ - std::optional trustCertsFilePath; + std::optional trustCertsFilePath = std::nullopt; /** Path to the client certificate PEM file, for mutual TLS. Unset disables client-certificate * authentication. */ - std::optional certificateFilePath; + std::optional certificateFilePath = std::nullopt; /** Path to the client private key PEM file, for mutual TLS. Unset disables client-certificate * authentication. */ - std::optional privateKeyFilePath; + std::optional privateKeyFilePath = std::nullopt; /** Whether to accept the broker's certificate without validating it against the trust store. Defaults to * false (validation enforced). */ bool allowInsecureConnection = false; @@ -160,7 +160,7 @@ struct TlsPolicy { struct TransactionPolicy { /** Default lifetime of a transaction before it is automatically aborted, in milliseconds. Unset uses the * client default. */ - std::optional timeout; + std::optional timeout = std::nullopt; }; } // namespace pulsar::st From 46d3f6d1fcbd63c031e8ce7ca1eb8ed48719c44c Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 07:51:42 -0700 Subject: [PATCH 04/39] st: decode returns Expected and takes std::span Addresses PR review feedback on the Schema decode signature. The SerDe seam now takes a std::span instead of (const char*, size_t), and returns Expected instead of T -- so malformed bytes or an unset schema are error values rather than a non-opt-in throw, consistent with the rest of the API. Message::value() returns Expected accordingly. - built-in numeric codecs report a short payload as ResultInvalidMessage; - the reflect-cpp JSON/Avro SerDes map a parse failure to an Error instead of letting rfl's .value() throw; - the protobuf SerDe now checks ParseFromArray's result; - a custom SerDe may still return a plain T (infallible) -- it converts implicitly to Expected. encode keeps throwing on an unset schema (a configuration error). Examples updated to check the decoded value. Verified with clang + gcc:13 (-Wextra -Werror) and clang-format-11. Signed-off-by: Matteo Merli --- examples/st/SampleStCheckpointConsumer.cc | 6 +- examples/st/SampleStJsonSchema.cc | 9 ++- examples/st/SampleStQueueConsumer.cc | 8 ++- examples/st/SampleStStreamConsumer.cc | 7 ++- include/pulsar/st/AvroSchema.h | 15 +++-- include/pulsar/st/JsonSchema.h | 15 +++-- include/pulsar/st/Message.h | 12 ++-- include/pulsar/st/ProtobufNativeSchema.h | 9 ++- include/pulsar/st/Schema.h | 72 ++++++++++++++--------- 9 files changed, 102 insertions(+), 51 deletions(-) diff --git a/examples/st/SampleStCheckpointConsumer.cc b/examples/st/SampleStCheckpointConsumer.cc index 7baa34c8..79c2d30e 100644 --- a/examples/st/SampleStCheckpointConsumer.cc +++ b/examples/st/SampleStCheckpointConsumer.cc @@ -54,7 +54,11 @@ int main() { std::cerr << "receive failed: " << msg.error() << "\n"; break; } - std::cout << "read: " << msg->value() << "\n"; + if (auto value = msg->value()) { + std::cout << "read: " << *value << "\n"; + } else { + std::cerr << "decode failed: " << value.error() << "\n"; + } } // Atomic position snapshot across all segments. Store the bytes yourself diff --git a/examples/st/SampleStJsonSchema.cc b/examples/st/SampleStJsonSchema.cc index d647042d..7b3c91c9 100644 --- a/examples/st/SampleStJsonSchema.cc +++ b/examples/st/SampleStJsonSchema.cc @@ -72,9 +72,12 @@ int main() { if (consumerResult) { StreamConsumer consumer = std::move(consumerResult).value(); if (auto msg = consumer.receive(std::chrono::seconds(5))) { - Order received = msg->value(); // decoded straight back into the struct - std::cout << received.orderId << " -> " << received.shipTo.city << "\n"; - consumer.acknowledgeCumulative(msg->id()); + if (auto received = msg->value()) { // decoded straight back into the struct + std::cout << received->orderId << " -> " << received->shipTo.city << "\n"; + consumer.acknowledgeCumulative(msg->id()); + } else { + std::cerr << "decode failed: " << received.error() << "\n"; + } } (void)consumer.close(); } diff --git a/examples/st/SampleStQueueConsumer.cc b/examples/st/SampleStQueueConsumer.cc index 1efac129..110dbb3d 100644 --- a/examples/st/SampleStQueueConsumer.cc +++ b/examples/st/SampleStQueueConsumer.cc @@ -54,7 +54,13 @@ int main() { break; } - const bool processed = !msg->value().empty(); + auto value = msg->value(); + if (!value) { + std::cerr << "decode failed: " << value.error() << "\n"; + consumer.negativeAcknowledge(msg->id()); // bad payload; redeliver + continue; + } + const bool processed = !value->empty(); if (processed) { consumer.acknowledge(msg->id()); // fire-and-forget; never blocks or errors } else { diff --git a/examples/st/SampleStStreamConsumer.cc b/examples/st/SampleStStreamConsumer.cc index 57d9f355..3bcbc671 100644 --- a/examples/st/SampleStStreamConsumer.cc +++ b/examples/st/SampleStStreamConsumer.cc @@ -53,7 +53,12 @@ int main() { std::cerr << "receive failed: " << msg.error() << "\n"; break; } - std::cout << "key=" << msg->key().value_or("") << " value=" << msg->value() << "\n"; + auto value = msg->value(); + if (!value) { + std::cerr << "decode failed: " << value.error() << "\n"; + continue; + } + std::cout << "key=" << msg->key().value_or("") << " value=" << *value << "\n"; consumer.acknowledgeCumulative(msg->id()); // fire-and-forget; never blocks or errors } diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h index ccf88656..46ba7d9e 100644 --- a/include/pulsar/st/AvroSchema.h +++ b/include/pulsar/st/AvroSchema.h @@ -21,8 +21,10 @@ #include #include +#include #include #include +#include #include // avroSchema() is the Avro counterpart of jsonSchema(): reflect-cpp derives @@ -44,8 +46,12 @@ template struct AvroSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::AVRO, "AVRO", rfl::avro::to_schema()); } std::string encode(const T& value) const { return rfl::avro::write(value); } - T decode(const char* data, std::size_t size) const { - return rfl::avro::read(std::string(data, size)).value(); + Expected decode(std::span data) const { + try { + return rfl::avro::read(std::string(data.data(), data.size())).value(); + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } }; } // namespace detail @@ -63,9 +69,8 @@ struct AvroSerDe { * * @tparam T the struct type to serialize as Avro; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use Avro. - * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes - * are not a valid Avro encoding for `T`. + * @return a `Schema` whose `encode`/`decode` use Avro. `decode` reports input + * that is not a valid Avro encoding for `T` as an `Error` rather than throwing. */ template Schema avroSchema() { diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h index 15fa6987..bf27a5e2 100644 --- a/include/pulsar/st/JsonSchema.h +++ b/include/pulsar/st/JsonSchema.h @@ -21,8 +21,10 @@ #include #include +#include #include #include +#include #include // jsonSchema() derives BOTH the JSON SerDe and the declared schema from T's @@ -41,8 +43,12 @@ template struct JsonSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::JSON, "JSON", rfl::json::to_schema()); } std::string encode(const T& value) const { return rfl::json::write(value); } - T decode(const char* data, std::size_t size) const { - return rfl::json::read(std::string(data, size)).value(); + Expected decode(std::span data) const { + try { + return rfl::json::read(std::string(data.data(), data.size())).value(); + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } }; } // namespace detail @@ -63,9 +69,8 @@ struct JsonSerDe { * * @tparam T the struct type to serialize as JSON; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use JSON. - * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes - * are not valid JSON for `T`. + * @return a `Schema` whose `encode`/`decode` use JSON. `decode` reports input + * that is not valid JSON for `T` as an `Error` rather than throwing. */ template Schema jsonSchema() { diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index f029521c..99a1049d 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -19,6 +19,7 @@ #pragma once #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include @@ -61,12 +63,14 @@ class Message { /** * Decode the payload through `Schema` and return the typed value. * - * Decoding happens on every call (the result is not cached). May throw if the - * payload bytes are malformed for the schema. + * Decoding happens on every call (the result is not cached). Returns an `Error` + * rather than throwing if the payload bytes are malformed for the schema, or if no + * schema was configured for `T`; the raw bytes remain available via `data()` / + * `size()`. Call `.value()` on the result to opt into throwing instead. * - * @return the decoded value of type `T`. + * @return the decoded value of type `T`, or an `Error` if decoding fails. */ - T value() const { return schema_.decode(core_.data(), core_.size()); } + Expected value() const { return schema_.decode(std::span(core_.data(), core_.size())); } /** * Pointer to the raw, undecoded payload bytes. diff --git a/include/pulsar/st/ProtobufNativeSchema.h b/include/pulsar/st/ProtobufNativeSchema.h index 348ad051..06812767 100644 --- a/include/pulsar/st/ProtobufNativeSchema.h +++ b/include/pulsar/st/ProtobufNativeSchema.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -39,10 +40,12 @@ struct ProtobufNativeSerDe { "protobufNativeSchema requires T to be a generated protobuf Message"); SchemaInfo info() const { return pulsar::createProtobufNativeSchema(T::descriptor()); } std::string encode(const T& value) const { return value.SerializeAsString(); } - T decode(const char* data, std::size_t size) const { + Expected decode(std::span data) const { T message; - message.ParseFromArray(data, static_cast(size)); - return message; + if (message.ParseFromArray(data.data(), static_cast(data.size()))) { + return message; + } + return unexpected(pulsar::ResultInvalidMessage, "failed to parse protobuf message"); } }; } // namespace detail diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index 856bb021..d81e3c38 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -60,19 +62,21 @@ using Bytes = std::vector; * - `SchemaInfo info() const` — the schema description sent to the broker for * compatibility checking. * - `std::string encode(const T&) const` — serializes a value of `T` to bytes. - * - `T decode(const char*, std::size_t) const` — deserializes bytes back to `T`. + * - `Expected decode(std::span) const` — deserializes bytes back to + * `T`, reporting malformed input as an error value. (A SerDe may also return a + * plain `T` for an infallible decode; it converts implicitly to `Expected`.) * * @tparam S the candidate SerDe type. * @tparam T the value type the SerDe handles. */ template -concept SerDeFor = requires(const S& serde, const T& value, const char* data, std::size_t size) { +concept SerDeFor = requires(const S& serde, const T& value, std::span data) { { serde.info() } ->std::convertible_to; { serde.encode(value) } ->std::convertible_to; - { serde.decode(data, size) } - ->std::convertible_to; + { serde.decode(data) } + ->std::convertible_to>; }; /** @@ -85,7 +89,7 @@ concept SerDeFor = requires(const S& serde, const T& value, const char* data, st * A SerDe is any copyable type providing three const members: * SchemaInfo info() const; // describes T to the broker * std::string encode(const T& value) const; // T -> bytes - * T decode(const char* data, size_t) const; // bytes -> T + * Expected decode(std::span) const; // bytes -> T (or error) * * Construct a `Schema` from a SerDe directly, or use a factory: * - primitives: `Schema{}`, `Schema{}`, `Schema{}` (default) @@ -112,10 +116,10 @@ class Schema { * format for primitive schemas. * * For any other (non-primitive) `T` this installs an "unset" schema: it reports - * `SchemaType::BYTES` to the broker, but its `encode` and `decode` throw - * `ClientException` on use. Supply a real schema (`jsonSchema()`, - * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) before - * producing or consuming such a `T`. + * `SchemaType::BYTES` to the broker, but `encode` throws `ClientException` and + * `decode` returns an `Error` until you supply a real schema (`jsonSchema()`, + * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) for such a + * `T`. */ Schema(); @@ -150,21 +154,19 @@ class Schema { /** * @brief Deserializes wire bytes back into a value of `T`. - * @param data pointer to the payload bytes. - * @param size number of bytes available at @p data. - * @return the decoded value. - * @throws ClientException if this is an unset schema (non-primitive `T` with no - * SerDe supplied). A SerDe may also throw on malformed or incompatible - * bytes. + * @param data a view over the payload bytes. + * @return the decoded value, or an `Error` if decoding fails — including an unset + * schema (non-primitive `T` with no SerDe supplied) or bytes that are + * malformed or incompatible with the schema. */ - T decode(const char* data, std::size_t size) const { return self_->decode(data, size); } + Expected decode(std::span data) const { return self_->decode(data); } private: struct Concept { virtual ~Concept() = default; virtual SchemaInfo info() const = 0; virtual std::string encode(const T&) const = 0; - virtual T decode(const char*, std::size_t) const = 0; + virtual Expected decode(std::span) const = 0; }; template struct Model final : Concept { @@ -172,7 +174,7 @@ class Schema { explicit Model(SerDe s) : serde(std::move(s)) {} SchemaInfo info() const override { return serde.info(); } std::string encode(const T& v) const override { return serde.encode(v); } - T decode(const char* d, std::size_t n) const override { return serde.decode(d, n); } + Expected decode(std::span d) const override { return serde.decode(d); } }; std::shared_ptr self_; @@ -204,11 +206,13 @@ inline U decodeBigEndian(const char* data, std::size_t size) { return static_cast(u); } +inline constexpr const char* kNoSchemaMsg = + "no schema configured for this value type — pass an explicit Schema " + "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"; + [[noreturn]] inline void throwNoSchema() { #if defined(__cpp_exceptions) || defined(_CPPUNWIND) - throw ClientException(pulsar::ResultInvalidConfiguration, - "no schema configured for this value type — pass an explicit Schema " - "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"); + throw ClientException(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); #else std::abort(); #endif @@ -218,22 +222,30 @@ inline U decodeBigEndian(const char* data, std::size_t size) { struct BytesCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } std::string encode(const Bytes& v) const { return std::string(v.begin(), v.end()); } - Bytes decode(const char* d, std::size_t n) const { return Bytes(d, d + n); } + Expected decode(std::span d) const { return Bytes(d.begin(), d.end()); } }; struct StringCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "String", ""); } std::string encode(const std::string& v) const { return v; } - std::string decode(const char* d, std::size_t n) const { return std::string(d, n); } + Expected decode(std::span d) const { return std::string(d.data(), d.size()); } }; struct Int32Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT32, "INT32", ""); } std::string encode(std::int32_t v) const { return encodeBigEndian(v); } - std::int32_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } + Expected decode(std::span d) const { + if (d.size() < sizeof(std::int32_t)) + return unexpected(pulsar::ResultInvalidMessage, "INT32 payload too short"); + return decodeBigEndian(d.data(), d.size()); + } }; struct Int64Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT64, "INT64", ""); } std::string encode(std::int64_t v) const { return encodeBigEndian(v); } - std::int64_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } + Expected decode(std::span d) const { + if (d.size() < sizeof(std::int64_t)) + return unexpected(pulsar::ResultInvalidMessage, "INT64 payload too short"); + return decodeBigEndian(d.data(), d.size()); + } }; struct DoubleCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "Double", ""); } @@ -242,8 +254,10 @@ struct DoubleCodec { std::memcpy(&bits, &v, sizeof(bits)); return encodeBigEndian(static_cast(bits)); } - double decode(const char* d, std::size_t n) const { - auto bits = static_cast(decodeBigEndian(d, n)); + Expected decode(std::span d) const { + if (d.size() < sizeof(double)) + return unexpected(pulsar::ResultInvalidMessage, "DOUBLE payload too short"); + auto bits = static_cast(decodeBigEndian(d.data(), d.size())); double v; std::memcpy(&v, &bits, sizeof(v)); return v; @@ -253,7 +267,9 @@ template struct UnsetCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } std::string encode(const T&) const { throwNoSchema(); } - T decode(const char*, std::size_t) const { throwNoSchema(); } + Expected decode(std::span) const { + return unexpected(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); + } }; } // namespace detail From 967a09d698016da247a1dbf5f1b84c847f9a099a Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 08:24:55 -0700 Subject: [PATCH 05/39] Revert "st: decode returns Expected and takes std::span" This reverts commit 46d3f6d1fcbd63c031e8ce7ca1eb8ed48719c44c. --- examples/st/SampleStCheckpointConsumer.cc | 6 +- examples/st/SampleStJsonSchema.cc | 9 +-- examples/st/SampleStQueueConsumer.cc | 8 +-- examples/st/SampleStStreamConsumer.cc | 7 +-- include/pulsar/st/AvroSchema.h | 15 ++--- include/pulsar/st/JsonSchema.h | 15 ++--- include/pulsar/st/Message.h | 12 ++-- include/pulsar/st/ProtobufNativeSchema.h | 9 +-- include/pulsar/st/Schema.h | 72 +++++++++-------------- 9 files changed, 51 insertions(+), 102 deletions(-) diff --git a/examples/st/SampleStCheckpointConsumer.cc b/examples/st/SampleStCheckpointConsumer.cc index 79c2d30e..7baa34c8 100644 --- a/examples/st/SampleStCheckpointConsumer.cc +++ b/examples/st/SampleStCheckpointConsumer.cc @@ -54,11 +54,7 @@ int main() { std::cerr << "receive failed: " << msg.error() << "\n"; break; } - if (auto value = msg->value()) { - std::cout << "read: " << *value << "\n"; - } else { - std::cerr << "decode failed: " << value.error() << "\n"; - } + std::cout << "read: " << msg->value() << "\n"; } // Atomic position snapshot across all segments. Store the bytes yourself diff --git a/examples/st/SampleStJsonSchema.cc b/examples/st/SampleStJsonSchema.cc index 7b3c91c9..d647042d 100644 --- a/examples/st/SampleStJsonSchema.cc +++ b/examples/st/SampleStJsonSchema.cc @@ -72,12 +72,9 @@ int main() { if (consumerResult) { StreamConsumer consumer = std::move(consumerResult).value(); if (auto msg = consumer.receive(std::chrono::seconds(5))) { - if (auto received = msg->value()) { // decoded straight back into the struct - std::cout << received->orderId << " -> " << received->shipTo.city << "\n"; - consumer.acknowledgeCumulative(msg->id()); - } else { - std::cerr << "decode failed: " << received.error() << "\n"; - } + Order received = msg->value(); // decoded straight back into the struct + std::cout << received.orderId << " -> " << received.shipTo.city << "\n"; + consumer.acknowledgeCumulative(msg->id()); } (void)consumer.close(); } diff --git a/examples/st/SampleStQueueConsumer.cc b/examples/st/SampleStQueueConsumer.cc index 110dbb3d..1efac129 100644 --- a/examples/st/SampleStQueueConsumer.cc +++ b/examples/st/SampleStQueueConsumer.cc @@ -54,13 +54,7 @@ int main() { break; } - auto value = msg->value(); - if (!value) { - std::cerr << "decode failed: " << value.error() << "\n"; - consumer.negativeAcknowledge(msg->id()); // bad payload; redeliver - continue; - } - const bool processed = !value->empty(); + const bool processed = !msg->value().empty(); if (processed) { consumer.acknowledge(msg->id()); // fire-and-forget; never blocks or errors } else { diff --git a/examples/st/SampleStStreamConsumer.cc b/examples/st/SampleStStreamConsumer.cc index 3bcbc671..57d9f355 100644 --- a/examples/st/SampleStStreamConsumer.cc +++ b/examples/st/SampleStStreamConsumer.cc @@ -53,12 +53,7 @@ int main() { std::cerr << "receive failed: " << msg.error() << "\n"; break; } - auto value = msg->value(); - if (!value) { - std::cerr << "decode failed: " << value.error() << "\n"; - continue; - } - std::cout << "key=" << msg->key().value_or("") << " value=" << *value << "\n"; + std::cout << "key=" << msg->key().value_or("") << " value=" << msg->value() << "\n"; consumer.acknowledgeCumulative(msg->id()); // fire-and-forget; never blocks or errors } diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h index 46ba7d9e..ccf88656 100644 --- a/include/pulsar/st/AvroSchema.h +++ b/include/pulsar/st/AvroSchema.h @@ -21,10 +21,8 @@ #include #include -#include #include #include -#include #include // avroSchema() is the Avro counterpart of jsonSchema(): reflect-cpp derives @@ -46,12 +44,8 @@ template struct AvroSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::AVRO, "AVRO", rfl::avro::to_schema()); } std::string encode(const T& value) const { return rfl::avro::write(value); } - Expected decode(std::span data) const { - try { - return rfl::avro::read(std::string(data.data(), data.size())).value(); - } catch (const std::exception& e) { - return unexpected(pulsar::ResultInvalidMessage, e.what()); - } + T decode(const char* data, std::size_t size) const { + return rfl::avro::read(std::string(data, size)).value(); } }; } // namespace detail @@ -69,8 +63,9 @@ struct AvroSerDe { * * @tparam T the struct type to serialize as Avro; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use Avro. `decode` reports input - * that is not a valid Avro encoding for `T` as an `Error` rather than throwing. + * @return a `Schema` whose `encode`/`decode` use Avro. + * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes + * are not a valid Avro encoding for `T`. */ template Schema avroSchema() { diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h index bf27a5e2..15fa6987 100644 --- a/include/pulsar/st/JsonSchema.h +++ b/include/pulsar/st/JsonSchema.h @@ -21,10 +21,8 @@ #include #include -#include #include #include -#include #include // jsonSchema() derives BOTH the JSON SerDe and the declared schema from T's @@ -43,12 +41,8 @@ template struct JsonSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::JSON, "JSON", rfl::json::to_schema()); } std::string encode(const T& value) const { return rfl::json::write(value); } - Expected decode(std::span data) const { - try { - return rfl::json::read(std::string(data.data(), data.size())).value(); - } catch (const std::exception& e) { - return unexpected(pulsar::ResultInvalidMessage, e.what()); - } + T decode(const char* data, std::size_t size) const { + return rfl::json::read(std::string(data, size)).value(); } }; } // namespace detail @@ -69,8 +63,9 @@ struct JsonSerDe { * * @tparam T the struct type to serialize as JSON; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use JSON. `decode` reports input - * that is not valid JSON for `T` as an `Error` rather than throwing. + * @return a `Schema` whose `encode`/`decode` use JSON. + * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes + * are not valid JSON for `T`. */ template Schema jsonSchema() { diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index 99a1049d..f029521c 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -19,7 +19,6 @@ #pragma once #include -#include #include #include #include @@ -28,7 +27,6 @@ #include #include #include -#include #include #include @@ -63,14 +61,12 @@ class Message { /** * Decode the payload through `Schema` and return the typed value. * - * Decoding happens on every call (the result is not cached). Returns an `Error` - * rather than throwing if the payload bytes are malformed for the schema, or if no - * schema was configured for `T`; the raw bytes remain available via `data()` / - * `size()`. Call `.value()` on the result to opt into throwing instead. + * Decoding happens on every call (the result is not cached). May throw if the + * payload bytes are malformed for the schema. * - * @return the decoded value of type `T`, or an `Error` if decoding fails. + * @return the decoded value of type `T`. */ - Expected value() const { return schema_.decode(std::span(core_.data(), core_.size())); } + T value() const { return schema_.decode(core_.data(), core_.size()); } /** * Pointer to the raw, undecoded payload bytes. diff --git a/include/pulsar/st/ProtobufNativeSchema.h b/include/pulsar/st/ProtobufNativeSchema.h index 06812767..348ad051 100644 --- a/include/pulsar/st/ProtobufNativeSchema.h +++ b/include/pulsar/st/ProtobufNativeSchema.h @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -40,12 +39,10 @@ struct ProtobufNativeSerDe { "protobufNativeSchema requires T to be a generated protobuf Message"); SchemaInfo info() const { return pulsar::createProtobufNativeSchema(T::descriptor()); } std::string encode(const T& value) const { return value.SerializeAsString(); } - Expected decode(std::span data) const { + T decode(const char* data, std::size_t size) const { T message; - if (message.ParseFromArray(data.data(), static_cast(data.size()))) { - return message; - } - return unexpected(pulsar::ResultInvalidMessage, "failed to parse protobuf message"); + message.ParseFromArray(data, static_cast(size)); + return message; } }; } // namespace detail diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index d81e3c38..856bb021 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -30,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -62,21 +60,19 @@ using Bytes = std::vector; * - `SchemaInfo info() const` — the schema description sent to the broker for * compatibility checking. * - `std::string encode(const T&) const` — serializes a value of `T` to bytes. - * - `Expected decode(std::span) const` — deserializes bytes back to - * `T`, reporting malformed input as an error value. (A SerDe may also return a - * plain `T` for an infallible decode; it converts implicitly to `Expected`.) + * - `T decode(const char*, std::size_t) const` — deserializes bytes back to `T`. * * @tparam S the candidate SerDe type. * @tparam T the value type the SerDe handles. */ template -concept SerDeFor = requires(const S& serde, const T& value, std::span data) { +concept SerDeFor = requires(const S& serde, const T& value, const char* data, std::size_t size) { { serde.info() } ->std::convertible_to; { serde.encode(value) } ->std::convertible_to; - { serde.decode(data) } - ->std::convertible_to>; + { serde.decode(data, size) } + ->std::convertible_to; }; /** @@ -89,7 +85,7 @@ concept SerDeFor = requires(const S& serde, const T& value, std::span bytes - * Expected decode(std::span) const; // bytes -> T (or error) + * T decode(const char* data, size_t) const; // bytes -> T * * Construct a `Schema` from a SerDe directly, or use a factory: * - primitives: `Schema{}`, `Schema{}`, `Schema{}` (default) @@ -116,10 +112,10 @@ class Schema { * format for primitive schemas. * * For any other (non-primitive) `T` this installs an "unset" schema: it reports - * `SchemaType::BYTES` to the broker, but `encode` throws `ClientException` and - * `decode` returns an `Error` until you supply a real schema (`jsonSchema()`, - * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) for such a - * `T`. + * `SchemaType::BYTES` to the broker, but its `encode` and `decode` throw + * `ClientException` on use. Supply a real schema (`jsonSchema()`, + * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) before + * producing or consuming such a `T`. */ Schema(); @@ -154,19 +150,21 @@ class Schema { /** * @brief Deserializes wire bytes back into a value of `T`. - * @param data a view over the payload bytes. - * @return the decoded value, or an `Error` if decoding fails — including an unset - * schema (non-primitive `T` with no SerDe supplied) or bytes that are - * malformed or incompatible with the schema. + * @param data pointer to the payload bytes. + * @param size number of bytes available at @p data. + * @return the decoded value. + * @throws ClientException if this is an unset schema (non-primitive `T` with no + * SerDe supplied). A SerDe may also throw on malformed or incompatible + * bytes. */ - Expected decode(std::span data) const { return self_->decode(data); } + T decode(const char* data, std::size_t size) const { return self_->decode(data, size); } private: struct Concept { virtual ~Concept() = default; virtual SchemaInfo info() const = 0; virtual std::string encode(const T&) const = 0; - virtual Expected decode(std::span) const = 0; + virtual T decode(const char*, std::size_t) const = 0; }; template struct Model final : Concept { @@ -174,7 +172,7 @@ class Schema { explicit Model(SerDe s) : serde(std::move(s)) {} SchemaInfo info() const override { return serde.info(); } std::string encode(const T& v) const override { return serde.encode(v); } - Expected decode(std::span d) const override { return serde.decode(d); } + T decode(const char* d, std::size_t n) const override { return serde.decode(d, n); } }; std::shared_ptr self_; @@ -206,13 +204,11 @@ inline U decodeBigEndian(const char* data, std::size_t size) { return static_cast(u); } -inline constexpr const char* kNoSchemaMsg = - "no schema configured for this value type — pass an explicit Schema " - "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"; - [[noreturn]] inline void throwNoSchema() { #if defined(__cpp_exceptions) || defined(_CPPUNWIND) - throw ClientException(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); + throw ClientException(pulsar::ResultInvalidConfiguration, + "no schema configured for this value type — pass an explicit Schema " + "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"); #else std::abort(); #endif @@ -222,30 +218,22 @@ inline constexpr const char* kNoSchemaMsg = struct BytesCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } std::string encode(const Bytes& v) const { return std::string(v.begin(), v.end()); } - Expected decode(std::span d) const { return Bytes(d.begin(), d.end()); } + Bytes decode(const char* d, std::size_t n) const { return Bytes(d, d + n); } }; struct StringCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "String", ""); } std::string encode(const std::string& v) const { return v; } - Expected decode(std::span d) const { return std::string(d.data(), d.size()); } + std::string decode(const char* d, std::size_t n) const { return std::string(d, n); } }; struct Int32Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT32, "INT32", ""); } std::string encode(std::int32_t v) const { return encodeBigEndian(v); } - Expected decode(std::span d) const { - if (d.size() < sizeof(std::int32_t)) - return unexpected(pulsar::ResultInvalidMessage, "INT32 payload too short"); - return decodeBigEndian(d.data(), d.size()); - } + std::int32_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } }; struct Int64Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT64, "INT64", ""); } std::string encode(std::int64_t v) const { return encodeBigEndian(v); } - Expected decode(std::span d) const { - if (d.size() < sizeof(std::int64_t)) - return unexpected(pulsar::ResultInvalidMessage, "INT64 payload too short"); - return decodeBigEndian(d.data(), d.size()); - } + std::int64_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } }; struct DoubleCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "Double", ""); } @@ -254,10 +242,8 @@ struct DoubleCodec { std::memcpy(&bits, &v, sizeof(bits)); return encodeBigEndian(static_cast(bits)); } - Expected decode(std::span d) const { - if (d.size() < sizeof(double)) - return unexpected(pulsar::ResultInvalidMessage, "DOUBLE payload too short"); - auto bits = static_cast(decodeBigEndian(d.data(), d.size())); + double decode(const char* d, std::size_t n) const { + auto bits = static_cast(decodeBigEndian(d, n)); double v; std::memcpy(&v, &bits, sizeof(v)); return v; @@ -267,9 +253,7 @@ template struct UnsetCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } std::string encode(const T&) const { throwNoSchema(); } - Expected decode(std::span) const { - return unexpected(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); - } + T decode(const char*, std::size_t) const { throwNoSchema(); } }; } // namespace detail From e470720381c523fbd5297869f1994bf87f55aecc Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 10:24:25 -0700 Subject: [PATCH 06/39] st: byte-buffer SerDe seam + zero-copy BytesView Per PR review on the Schema encode/decode signatures. SerDe seam (Schema + JSON/Avro/protobuf factories): - encode writes into a caller-provided, reusable std::vector& (no per-message allocation) and returns Expected; - decode takes std::span and returns Expected, so malformed input / an unset schema are error values rather than throws; - Bytes is now std::vector. Client-facing API unchanged: Message::value() still returns T (decode failures are handled inside the SDK), Producer::send(const T&) and the examples are as before; a rare encode error is stashed in the builder and surfaces from send()/sendAsync(). Zero-copy bytes: new BytesView = std::span. Schema is the zero-copy counterpart of Schema -- Producer publishes the caller's bytes without copying (the caller keeps them valid until the send completes) and Message::value() returns a view into the message buffer. OutgoingMessage carries an optional non-owning view. Verified with clang + gcc:13 (-Wextra -Werror), clang-format-11, and a runtime check that decode returns a view at the same address. Signed-off-by: Matteo Merli --- include/pulsar/st/AvroSchema.h | 24 ++- include/pulsar/st/JsonSchema.h | 24 ++- include/pulsar/st/Message.h | 12 +- include/pulsar/st/Producer.h | 36 ++++- include/pulsar/st/ProtobufNativeSchema.h | 15 +- include/pulsar/st/Schema.h | 191 +++++++++++++++-------- 6 files changed, 214 insertions(+), 88 deletions(-) diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h index ccf88656..ff00b8a2 100644 --- a/include/pulsar/st/AvroSchema.h +++ b/include/pulsar/st/AvroSchema.h @@ -21,9 +21,12 @@ #include #include +#include #include #include +#include #include +#include // avroSchema() is the Avro counterpart of jsonSchema(): reflect-cpp derives // the SerDe and the Avro schema from T's fields — no per-type serializer. The @@ -43,9 +46,19 @@ namespace detail { template struct AvroSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::AVRO, "AVRO", rfl::avro::to_schema()); } - std::string encode(const T& value) const { return rfl::avro::write(value); } - T decode(const char* data, std::size_t size) const { - return rfl::avro::read(std::string(data, size)).value(); + Expected encode(const T& value, std::vector& out) const { + const std::string s = rfl::avro::write(value); + const auto* p = reinterpret_cast(s.data()); + out.assign(p, p + s.size()); + return {}; + } + Expected decode(std::span data) const { + try { + return rfl::avro::read(std::string(reinterpret_cast(data.data()), data.size())) + .value(); + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } }; } // namespace detail @@ -63,9 +76,8 @@ struct AvroSerDe { * * @tparam T the struct type to serialize as Avro; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use Avro. - * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes - * are not a valid Avro encoding for `T`. + * @return a `Schema` whose `encode`/`decode` use Avro. `decode` reports input + * that is not a valid Avro encoding for `T` as an `Error` rather than throwing. */ template Schema avroSchema() { diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h index 15fa6987..d3644541 100644 --- a/include/pulsar/st/JsonSchema.h +++ b/include/pulsar/st/JsonSchema.h @@ -21,9 +21,12 @@ #include #include +#include #include #include +#include #include +#include // jsonSchema() derives BOTH the JSON SerDe and the declared schema from T's // fields via reflect-cpp (https://github.com/getml/reflect-cpp) — no per-type @@ -40,9 +43,19 @@ namespace detail { template struct JsonSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::JSON, "JSON", rfl::json::to_schema()); } - std::string encode(const T& value) const { return rfl::json::write(value); } - T decode(const char* data, std::size_t size) const { - return rfl::json::read(std::string(data, size)).value(); + Expected encode(const T& value, std::vector& out) const { + const std::string s = rfl::json::write(value); + const auto* p = reinterpret_cast(s.data()); + out.assign(p, p + s.size()); + return {}; + } + Expected decode(std::span data) const { + try { + return rfl::json::read(std::string(reinterpret_cast(data.data()), data.size())) + .value(); + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } }; } // namespace detail @@ -63,9 +76,8 @@ struct JsonSerDe { * * @tparam T the struct type to serialize as JSON; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use JSON. - * @throws std::runtime_error (from reflect-cpp) at decode time if the input bytes - * are not valid JSON for `T`. + * @return a `Schema` whose `encode`/`decode` use JSON. `decode` reports input + * that is not valid JSON for `T` as an `Error` rather than throwing. */ template Schema jsonSchema() { diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index f029521c..d81ff9c0 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -61,12 +62,17 @@ class Message { /** * Decode the payload through `Schema` and return the typed value. * - * Decoding happens on every call (the result is not cached). May throw if the - * payload bytes are malformed for the schema. + * Decoding happens on every call (the result is not cached). The SDK handles a + * payload that cannot be decoded internally — such a message is not delivered to + * the application — so this does not surface decode failures to the caller. The + * raw bytes remain available via `data()` / `size()`. * * @return the decoded value of type `T`. */ - T value() const { return schema_.decode(core_.data(), core_.size()); } + T value() const { + const auto* bytes = reinterpret_cast(core_.data()); + return schema_.decode(std::span(bytes, core_.size())).value(); + } /** * Pointer to the raw, undecoded payload bytes. diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index be1774e6..b09e001d 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -30,10 +30,13 @@ #include #include +#include #include #include #include +#include #include +#include #include namespace pulsar::st { @@ -99,8 +102,14 @@ struct ProducerConfig { * its fluent setters and hands the result to the producer core for publishing. */ struct OutgoingMessage { - /** Encoded message payload (the value serialized through `Schema`). */ - std::string payload; + /** Encoded message payload — the value serialized to bytes through `Schema`. + * Published unless `usesView` is set. */ + std::vector payload; + /** Non-owning view of already-encoded bytes for zero-copy publishing + * (`Schema`); the caller keeps them valid until the send completes. */ + std::span payloadView; + /** When true, publish `payloadView` directly without copying; otherwise `payload`. */ + bool usesView = false; /** Whether a routing/ordering key is set. `false` (the default) means no key. */ bool hasKey = false; /** Partition/ordering key; meaningful only when `hasKey` is true. */ @@ -145,11 +154,22 @@ class MessageBuilder { * Set the message value, encoding it to bytes through this producer's * `Schema`. * + * For a zero-copy `Schema` producer the bytes are not copied — the + * view is published directly, so the caller must keep them valid until the send + * completes. A rare encoding failure (e.g. an unset schema) is not reported here, + * so the fluent chain stays unbroken; it surfaces from the terminal `send()` / + * `sendAsync()` instead. + * * @param v the typed value to publish. * @return `*this`, for chaining. */ MessageBuilder& value(const T& v) { - message_.payload = schema_.encode(v); + if constexpr (std::is_same_v) { + message_.payloadView = v; + message_.usesView = true; + } else { + if (auto r = schema_.encode(v, message_.payload); !r) encodeError_ = r.error(); + } return *this; } /** @@ -246,7 +266,14 @@ class MessageBuilder { * @return a `Future` that completes with the assigned id on success * or the failure. The future may be ignored for fire-and-forget sends. */ - Future sendAsync() { return core_.sendAsync(std::move(message_)); } + Future sendAsync() { + if (encodeError_) { + detail::Promise promise; + promise.setError(*encodeError_); + return promise.getFuture(); + } + return core_.sendAsync(std::move(message_)); + } private: friend class Producer; @@ -260,6 +287,7 @@ class MessageBuilder { detail::ProducerCore core_; Schema schema_; OutgoingMessage message_; + std::optional encodeError_; }; /** diff --git a/include/pulsar/st/ProtobufNativeSchema.h b/include/pulsar/st/ProtobufNativeSchema.h index 348ad051..5fadc197 100644 --- a/include/pulsar/st/ProtobufNativeSchema.h +++ b/include/pulsar/st/ProtobufNativeSchema.h @@ -24,8 +24,10 @@ #include #include +#include #include #include +#include namespace pulsar::st { @@ -38,11 +40,16 @@ struct ProtobufNativeSerDe { static_assert(std::is_base_of_v, "protobufNativeSchema requires T to be a generated protobuf Message"); SchemaInfo info() const { return pulsar::createProtobufNativeSchema(T::descriptor()); } - std::string encode(const T& value) const { return value.SerializeAsString(); } - T decode(const char* data, std::size_t size) const { + Expected encode(const T& value, std::vector& out) const { + out.resize(value.ByteSizeLong()); + if (!value.SerializeToArray(out.data(), static_cast(out.size()))) + return unexpected(pulsar::ResultInvalidMessage, "failed to serialize protobuf message"); + return {}; + } + Expected decode(std::span data) const { T message; - message.ParseFromArray(data, static_cast(size)); - return message; + if (message.ParseFromArray(data.data(), static_cast(data.size()))) return message; + return unexpected(pulsar::ResultInvalidMessage, "failed to parse protobuf message"); } }; } // namespace detail diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index 856bb021..a1b1c4e7 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -22,13 +22,14 @@ #include #include #include +#include #include #include #include -#include #include #include +#include #include #include #include @@ -41,11 +42,22 @@ using pulsar::SchemaType; /** * @brief The default value type: a raw, uninterpreted byte payload. * - * Alias for `std::vector`. A `Schema` (the default schema) passes the - * payload through verbatim in both directions, applying no encoding or schema + * Alias for `std::vector`. A `Schema` (the default schema) passes + * the payload through verbatim in both directions, applying no encoding or schema * declaration to the broker beyond `SchemaType::BYTES`. */ -using Bytes = std::vector; +using Bytes = std::vector; + +/** + * @brief A non-owning, zero-copy view over raw payload bytes. + * + * Alias for `std::span` — the zero-copy counterpart of `Bytes`. A + * `Schema` publishes the viewed bytes without copying them (the caller + * must keep them valid until the send completes), and on receive + * `Message::value()` returns a view into the message's own buffer (valid + * for that message's lifetime). Use `Bytes` when you want the SDK to own a copy. + */ +using BytesView = std::span; /** * @brief Constraint identifying a *SerDe* for `T`: a type that can describe `T` to @@ -59,20 +71,25 @@ using Bytes = std::vector; * The required members are: * - `SchemaInfo info() const` — the schema description sent to the broker for * compatibility checking. - * - `std::string encode(const T&) const` — serializes a value of `T` to bytes. - * - `T decode(const char*, std::size_t) const` — deserializes bytes back to `T`. + * - `Expected encode(const T&, std::vector& out) const` — + * serializes a value of `T` into @p out (replacing its contents), reporting any + * failure as an error value. The buffer is supplied by the caller so it can be + * reused across messages, avoiding a per-message allocation. + * - `Expected decode(std::span) const` — deserializes bytes + * back to `T`, reporting malformed input as an error value. * * @tparam S the candidate SerDe type. * @tparam T the value type the SerDe handles. */ template -concept SerDeFor = requires(const S& serde, const T& value, const char* data, std::size_t size) { +concept SerDeFor = requires(const S& serde, const T& value, std::span data, + std::vector& out) { { serde.info() } ->std::convertible_to; - { serde.encode(value) } - ->std::convertible_to; - { serde.decode(data, size) } - ->std::convertible_to; + { serde.encode(value, out) } + ->std::convertible_to>; + { serde.decode(data) } + ->std::convertible_to>; }; /** @@ -83,9 +100,9 @@ concept SerDeFor = requires(const S& serde, const T& value, const char* data, st * carried by different encodings, and a producer can be handed any schema. * * A SerDe is any copyable type providing three const members: - * SchemaInfo info() const; // describes T to the broker - * std::string encode(const T& value) const; // T -> bytes - * T decode(const char* data, size_t) const; // bytes -> T + * SchemaInfo info() const; + * Expected encode(const T& value, std::vector& out) const; // T -> bytes + * Expected decode(std::span data) const; // bytes -> T * * Construct a `Schema` from a SerDe directly, or use a factory: * - primitives: `Schema{}`, `Schema{}`, `Schema{}` (default) @@ -112,10 +129,10 @@ class Schema { * format for primitive schemas. * * For any other (non-primitive) `T` this installs an "unset" schema: it reports - * `SchemaType::BYTES` to the broker, but its `encode` and `decode` throw - * `ClientException` on use. Supply a real schema (`jsonSchema()`, - * `avroSchema()`, `protobufNativeSchema()`, or a custom SerDe) before - * producing or consuming such a `T`. + * `SchemaType::BYTES` to the broker, but its `encode` and `decode` return an + * `Error` on use. Supply a real schema (`jsonSchema()`, `avroSchema()`, + * `protobufNativeSchema()`, or a custom SerDe) before producing or consuming + * such a `T`. */ Schema(); @@ -140,39 +157,45 @@ class Schema { SchemaInfo info() const { return self_->info(); } /** - * @brief Serializes a value to its wire bytes. + * @brief Serializes a value into a caller-provided byte buffer. + * + * The destination buffer is supplied by the caller and reused across calls, so + * the producer hot path does not allocate a fresh buffer per message. + * * @param value the value to encode. - * @return the encoded payload as a byte string. - * @throws ClientException if this is an unset schema (non-primitive `T` with no - * SerDe supplied). A custom SerDe may throw on its own encoding errors. + * @param out destination buffer; its previous contents are replaced. + * @return success, or an `Error` — including an unset schema (non-primitive `T` + * with no SerDe supplied) or a SerDe-specific encoding failure. */ - std::string encode(const T& value) const { return self_->encode(value); } + Expected encode(const T& value, std::vector& out) const { + return self_->encode(value, out); + } /** * @brief Deserializes wire bytes back into a value of `T`. - * @param data pointer to the payload bytes. - * @param size number of bytes available at @p data. - * @return the decoded value. - * @throws ClientException if this is an unset schema (non-primitive `T` with no - * SerDe supplied). A SerDe may also throw on malformed or incompatible - * bytes. + * @param data a view over the payload bytes. + * @return the decoded value, or an `Error` if decoding fails — including an unset + * schema (non-primitive `T` with no SerDe supplied) or bytes that are + * malformed or incompatible with the schema. */ - T decode(const char* data, std::size_t size) const { return self_->decode(data, size); } + Expected decode(std::span data) const { return self_->decode(data); } private: struct Concept { virtual ~Concept() = default; virtual SchemaInfo info() const = 0; - virtual std::string encode(const T&) const = 0; - virtual T decode(const char*, std::size_t) const = 0; + virtual Expected encode(const T&, std::vector&) const = 0; + virtual Expected decode(std::span) const = 0; }; template struct Model final : Concept { SerDe serde; explicit Model(SerDe s) : serde(std::move(s)) {} SchemaInfo info() const override { return serde.info(); } - std::string encode(const T& v) const override { return serde.encode(v); } - T decode(const char* d, std::size_t n) const override { return serde.decode(d, n); } + Expected encode(const T& v, std::vector& out) const override { + return serde.encode(v, out); + } + Expected decode(std::span d) const override { return serde.decode(d); } }; std::shared_ptr self_; @@ -183,67 +206,99 @@ class Schema { /// part of the public API. namespace detail { +inline constexpr const char* kNoSchemaMsg = + "no schema configured for this value type — pass an explicit Schema " + "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"; + // Pulsar encodes numeric schemas as fixed-width big-endian. template -inline std::string encodeBigEndian(U value) { +inline void encodeBigEndian(U value, std::vector& out) { static_assert(std::is_integral_v, "integral only"); - std::string out(sizeof(U), '\0'); auto u = static_cast>(value); for (std::size_t i = 0; i < sizeof(U); ++i) { - out[i] = static_cast((u >> (8 * (sizeof(U) - 1 - i))) & 0xFF); + out.push_back(static_cast((u >> (8 * (sizeof(U) - 1 - i))) & 0xFF)); } - return out; } template -inline U decodeBigEndian(const char* data, std::size_t size) { +inline U decodeBigEndian(std::span data) { static_assert(std::is_integral_v, "integral only"); std::make_unsigned_t u = 0; - for (std::size_t i = 0; i < sizeof(U) && i < size; ++i) { - u = (u << 8) | static_cast(data[i]); + for (std::size_t i = 0; i < sizeof(U) && i < data.size(); ++i) { + u = (u << 8) | std::to_integer(data[i]); } return static_cast(u); } -[[noreturn]] inline void throwNoSchema() { -#if defined(__cpp_exceptions) || defined(_CPPUNWIND) - throw ClientException(pulsar::ResultInvalidConfiguration, - "no schema configured for this value type — pass an explicit Schema " - "(jsonSchema/avroSchema/protobufNativeSchema, or a custom SerDe)"); -#else - std::abort(); -#endif -} - // Built-in SerDe codecs. struct BytesCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } - std::string encode(const Bytes& v) const { return std::string(v.begin(), v.end()); } - Bytes decode(const char* d, std::size_t n) const { return Bytes(d, d + n); } + Expected encode(const Bytes& v, std::vector& out) const { + out = v; + return {}; + } + Expected decode(std::span d) const { return Bytes(d.begin(), d.end()); } +}; +// Zero-copy raw bytes: decode hands back a view into the message buffer; the +// producer publishes the caller's span without copying (it bypasses encode, which +// is provided here only as an owning fallback). +struct SpanBytesCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } + Expected encode(BytesView v, std::vector& out) const { + out.assign(v.begin(), v.end()); + return {}; + } + Expected decode(std::span d) const { return d; } }; struct StringCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "String", ""); } - std::string encode(const std::string& v) const { return v; } - std::string decode(const char* d, std::size_t n) const { return std::string(d, n); } + Expected encode(const std::string& v, std::vector& out) const { + const auto* p = reinterpret_cast(v.data()); + out.assign(p, p + v.size()); + return {}; + } + Expected decode(std::span d) const { + return std::string(reinterpret_cast(d.data()), d.size()); + } }; struct Int32Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT32, "INT32", ""); } - std::string encode(std::int32_t v) const { return encodeBigEndian(v); } - std::int32_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } + Expected encode(std::int32_t v, std::vector& out) const { + out.clear(); + encodeBigEndian(v, out); + return {}; + } + Expected decode(std::span d) const { + if (d.size() < sizeof(std::int32_t)) + return unexpected(pulsar::ResultInvalidMessage, "INT32 payload too short"); + return decodeBigEndian(d); + } }; struct Int64Codec { SchemaInfo info() const { return SchemaInfo(SchemaType::INT64, "INT64", ""); } - std::string encode(std::int64_t v) const { return encodeBigEndian(v); } - std::int64_t decode(const char* d, std::size_t n) const { return decodeBigEndian(d, n); } + Expected encode(std::int64_t v, std::vector& out) const { + out.clear(); + encodeBigEndian(v, out); + return {}; + } + Expected decode(std::span d) const { + if (d.size() < sizeof(std::int64_t)) + return unexpected(pulsar::ResultInvalidMessage, "INT64 payload too short"); + return decodeBigEndian(d); + } }; struct DoubleCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "Double", ""); } - std::string encode(double v) const { + Expected encode(double v, std::vector& out) const { std::uint64_t bits; std::memcpy(&bits, &v, sizeof(bits)); - return encodeBigEndian(static_cast(bits)); + out.clear(); + encodeBigEndian(static_cast(bits), out); + return {}; } - double decode(const char* d, std::size_t n) const { - auto bits = static_cast(decodeBigEndian(d, n)); + Expected decode(std::span d) const { + if (d.size() < sizeof(double)) + return unexpected(pulsar::ResultInvalidMessage, "DOUBLE payload too short"); + auto bits = static_cast(decodeBigEndian(d)); double v; std::memcpy(&v, &bits, sizeof(v)); return v; @@ -252,8 +307,12 @@ struct DoubleCodec { template struct UnsetCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } - std::string encode(const T&) const { throwNoSchema(); } - T decode(const char*, std::size_t) const { throwNoSchema(); } + Expected encode(const T&, std::vector&) const { + return unexpected(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); + } + Expected decode(std::span) const { + return unexpected(pulsar::ResultInvalidConfiguration, kNoSchemaMsg); + } }; } // namespace detail @@ -271,6 +330,8 @@ Schema::Schema() { self_ = std::make_shared>(detail::Int64Codec{}); } else if constexpr (std::is_same_v) { self_ = std::make_shared>(detail::DoubleCodec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::SpanBytesCodec{}); } else { self_ = std::make_shared>>(detail::UnsetCodec{}); } From ac616bb752463940339bc889f830654b9beb6fa6 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 11:00:00 -0700 Subject: [PATCH 07/39] st: return std::string_view from string accessors Per PR review: the string accessors return views instead of owning references/copies, so the lib/st impl is not forced to store a std::string per field -- it can return a view into whatever it already holds. - consumer/producer topic() / subscription() / consumerName() / name() and Message::topic() now return std::string_view (Message::topic() previously copied); the detail::*Core declarations they forward to return string_view too. - Message::key() / producerName() / replicatedFrom() now return std::optional. - Error::message() stays const std::string& (an Error is usually a temporary, so auto-capturing a const ref copies safely whereas a view would dangle). Returned views are valid while the source object (message / consumer / producer) is alive. All within pulsar::st; the old API is untouched. Verified with clang + gcc:13 (-Wextra -Werror), static_asserts on the return types, and clang-format-11. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 5 ++-- include/pulsar/st/Message.h | 28 +++++++++++-------- include/pulsar/st/Producer.h | 10 ++++--- include/pulsar/st/QueueConsumer.h | 16 ++++++----- include/pulsar/st/StreamConsumer.h | 16 ++++++----- .../pulsar/st/detail/CheckpointConsumerCore.h | 3 +- include/pulsar/st/detail/MessageCore.h | 9 +++--- include/pulsar/st/detail/ProducerCore.h | 5 ++-- include/pulsar/st/detail/QueueConsumerCore.h | 7 +++-- include/pulsar/st/detail/StreamConsumerCore.h | 7 +++-- 10 files changed, 61 insertions(+), 45 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 8f063706..2a5dd493 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -32,6 +32,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -169,9 +170,9 @@ class CheckpointConsumer { /** * @brief Return the topic this consumer reads from. * - * @return Reference to the topic name. + * @return a view of the topic name, valid while this consumer is alive. */ - const std::string& topic() const { return core_.topic(); } + std::string_view topic() const { return core_.topic(); } /** * @brief Test whether this consumer is usable. diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index d81ff9c0..2263277f 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -29,6 +29,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -98,10 +99,11 @@ class Message { /** * The optional partition/routing key the message was published with. * - * @return the key, or `std::nullopt` if the message has none. + * @return a view of the key (valid while this message is alive), or `std::nullopt` + * if the message has none. */ - std::optional key() const { - return core_.hasKey() ? std::optional(core_.key()) : std::nullopt; + std::optional key() const { + return core_.hasKey() ? std::optional(core_.key()) : std::nullopt; } /** @@ -138,18 +140,19 @@ class Message { /** * The name of the producer that published the message, if available. * - * @return the producer name, or `std::nullopt` if not present. + * @return a view of the producer name (valid while this message is alive), or + * `std::nullopt` if not present. */ - std::optional producerName() const { - return core_.hasProducerName() ? std::optional(core_.producerName()) : std::nullopt; + std::optional producerName() const { + return core_.hasProducerName() ? std::optional(core_.producerName()) : std::nullopt; } /** * The resolved canonical topic the message was received from. * - * @return the fully-qualified topic name. + * @return a view of the fully-qualified topic name, valid while this message is alive. */ - std::string topic() const { return core_.topic(); } + std::string_view topic() const { return core_.topic(); } /** * How many times this message has been redelivered. @@ -161,11 +164,12 @@ class Message { /** * The source cluster this message was replicated from, if any. * - * @return the originating cluster name, or `std::nullopt` if the message was not - * replicated from another cluster. + * @return a view of the originating cluster name (valid while this message is alive), + * or `std::nullopt` if the message was not replicated from another cluster. */ - std::optional replicatedFrom() const { - return core_.hasReplicatedFrom() ? std::optional(core_.replicatedFrom()) : std::nullopt; + std::optional replicatedFrom() const { + return core_.hasReplicatedFrom() ? std::optional(core_.replicatedFrom()) + : std::nullopt; } /** diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index b09e001d..9ea86a40 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -333,10 +334,11 @@ class Producer { */ Future sendAsync(const T& value) { return newMessage().value(value).sendAsync(); } - /** @return the topic this producer publishes to. */ - const std::string& topic() const { return core_.topic(); } - /** @return the producer's name (broker-assigned when none was configured). */ - const std::string& name() const { return core_.name(); } + /** @return a view of the topic this producer publishes to, valid while the producer is alive. */ + std::string_view topic() const { return core_.topic(); } + /** @return a view of the producer's name (broker-assigned when none was configured), valid + * while the producer is alive. */ + std::string_view name() const { return core_.name(); } /** @return the sequence id of the most recently published message, or -1 if * none has been published yet. */ int64_t lastSequenceId() const { return core_.lastSequenceId(); } diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index 42aae798..aebd929d 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -192,13 +193,14 @@ class QueueConsumer { */ Future closeAsync() { return core_.closeAsync(); } - /** @return the topic this consumer is subscribed to. In namespace mode this is the - * namespace-derived subscription target. */ - const std::string& topic() const { return core_.topic(); } - /** @return the subscription name. */ - const std::string& subscription() const { return core_.subscription(); } - /** @return the consumer name (broker-assigned if none was set on the builder). */ - const std::string& consumerName() const { return core_.consumerName(); } + /** @return a view of the topic this consumer is subscribed to (in namespace mode, the + * namespace-derived subscription target), valid while the consumer is alive. */ + std::string_view topic() const { return core_.topic(); } + /** @return a view of the subscription name, valid while the consumer is alive. */ + std::string_view subscription() const { return core_.subscription(); } + /** @return a view of the consumer name (broker-assigned if none was set on the builder), + * valid while the consumer is alive. */ + std::string_view consumerName() const { return core_.consumerName(); } /** @return `true` if this is a live, subscribed consumer; `false` if it is an empty * (default-constructed or closed/moved-from) handle. */ diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index 8775f890..36021a14 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -206,13 +207,14 @@ class StreamConsumer { */ Future closeAsync() { return core_.closeAsync(); } - /** @return the topic this consumer is subscribed to. In namespace mode this is the - * namespace-derived subscription target. */ - const std::string& topic() const { return core_.topic(); } - /** @return the subscription name. */ - const std::string& subscription() const { return core_.subscription(); } - /** @return the consumer name (broker-assigned if none was set on the builder). */ - const std::string& consumerName() const { return core_.consumerName(); } + /** @return a view of the topic this consumer is subscribed to (in namespace mode, the + * namespace-derived subscription target), valid while the consumer is alive. */ + std::string_view topic() const { return core_.topic(); } + /** @return a view of the subscription name, valid while the consumer is alive. */ + std::string_view subscription() const { return core_.subscription(); } + /** @return a view of the consumer name (broker-assigned if none was set on the builder), + * valid while the consumer is alive. */ + std::string_view consumerName() const { return core_.consumerName(); } /** @return `true` if this is a live, subscribed consumer; `false` if it is an empty * (default-constructed or closed/moved-from) handle. */ diff --git a/include/pulsar/st/detail/CheckpointConsumerCore.h b/include/pulsar/st/detail/CheckpointConsumerCore.h index 9b3a67c7..22d74521 100644 --- a/include/pulsar/st/detail/CheckpointConsumerCore.h +++ b/include/pulsar/st/detail/CheckpointConsumerCore.h @@ -26,6 +26,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -50,7 +51,7 @@ class PULSAR_PUBLIC CheckpointConsumerCore { Future> receiveMultiAsync(int maxMessages, int64_t timeoutMs) const; Checkpoint checkpoint() const; Future closeAsync() const; - const std::string& topic() const; + std::string_view topic() const; explicit operator bool() const { return static_cast(impl_); } diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h index 3c32ccdd..03c57741 100644 --- a/include/pulsar/st/detail/MessageCore.h +++ b/include/pulsar/st/detail/MessageCore.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace pulsar::st { @@ -55,17 +56,17 @@ class PULSAR_PUBLIC MessageCore { std::size_t size() const; MessageId id() const; bool hasKey() const; - const std::string& key() const; + std::string_view key() const; const Properties& properties() const; int64_t publishTimeMs() const; int64_t eventTimeMs() const; // 0 if unset int64_t sequenceId() const; bool hasProducerName() const; - const std::string& producerName() const; - const std::string& topic() const; + std::string_view producerName() const; + std::string_view topic() const; int redeliveryCount() const; bool hasReplicatedFrom() const; - const std::string& replicatedFrom() const; + std::string_view replicatedFrom() const; explicit operator bool() const { return static_cast(impl_); } diff --git a/include/pulsar/st/detail/ProducerCore.h b/include/pulsar/st/detail/ProducerCore.h index 543eb750..41fbdf45 100644 --- a/include/pulsar/st/detail/ProducerCore.h +++ b/include/pulsar/st/detail/ProducerCore.h @@ -25,6 +25,7 @@ #include #include #include +#include namespace pulsar::st { @@ -46,8 +47,8 @@ class PULSAR_PUBLIC ProducerCore { ProducerCore() = default; Future sendAsync(OutgoingMessage message) const; - const std::string& topic() const; - const std::string& name() const; + std::string_view topic() const; + std::string_view name() const; int64_t lastSequenceId() const; Future flushAsync() const; Future closeAsync() const; diff --git a/include/pulsar/st/detail/QueueConsumerCore.h b/include/pulsar/st/detail/QueueConsumerCore.h index ef8e0035..d5214a88 100644 --- a/include/pulsar/st/detail/QueueConsumerCore.h +++ b/include/pulsar/st/detail/QueueConsumerCore.h @@ -26,6 +26,7 @@ #include #include #include +#include namespace pulsar::st { @@ -51,9 +52,9 @@ class PULSAR_PUBLIC QueueConsumerCore { void acknowledge(const MessageId& id, const Transaction& txn) const; void negativeAcknowledge(const MessageId& id) const; Future closeAsync() const; - const std::string& topic() const; - const std::string& subscription() const; - const std::string& consumerName() const; + std::string_view topic() const; + std::string_view subscription() const; + std::string_view consumerName() const; explicit operator bool() const { return static_cast(impl_); } diff --git a/include/pulsar/st/detail/StreamConsumerCore.h b/include/pulsar/st/detail/StreamConsumerCore.h index 4952d7ba..45865610 100644 --- a/include/pulsar/st/detail/StreamConsumerCore.h +++ b/include/pulsar/st/detail/StreamConsumerCore.h @@ -26,6 +26,7 @@ #include #include #include +#include #include namespace pulsar::st { @@ -52,9 +53,9 @@ class PULSAR_PUBLIC StreamConsumerCore { void acknowledgeCumulative(const MessageId& id) const; void acknowledgeCumulative(const MessageId& id, const Transaction& txn) const; Future closeAsync() const; - const std::string& topic() const; - const std::string& subscription() const; - const std::string& consumerName() const; + std::string_view topic() const; + std::string_view subscription() const; + std::string_view consumerName() const; explicit operator bool() const { return static_cast(impl_); } From 901b9eb8f14acc1376a59a52362bf20e26245e3e Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 11:27:45 -0700 Subject: [PATCH 08/39] st: serialize MessageId/Checkpoint as bytes, not std::string toByteArray() returns std::vector and fromByteArray() takes std::span, instead of std::string -- byte-correct and consistent with Bytes/BytesView. The round-trip stays implicit: a std::vector from toByteArray() converts to the span parameter. Example updated. All within pulsar::st. Verified with clang + gcc:13 (-Wextra -Werror) and clang-format-11. Signed-off-by: Matteo Merli --- examples/st/SampleStCheckpointConsumer.cc | 4 +++- include/pulsar/st/Checkpoint.h | 12 +++++++----- include/pulsar/st/MessageId.h | 8 +++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/examples/st/SampleStCheckpointConsumer.cc b/examples/st/SampleStCheckpointConsumer.cc index 7baa34c8..ff91d379 100644 --- a/examples/st/SampleStCheckpointConsumer.cc +++ b/examples/st/SampleStCheckpointConsumer.cc @@ -22,8 +22,10 @@ #include +#include #include #include +#include using namespace pulsar::st; @@ -60,7 +62,7 @@ int main() { // Atomic position snapshot across all segments. Store the bytes yourself // (Flink/Spark state backend, a file, etc.) — there is no broker-side cursor. Checkpoint checkpoint = consumer.checkpoint(); - std::string persisted = checkpoint.toByteArray(); + std::vector persisted = checkpoint.toByteArray(); // store these bytes yourself std::cout << "checkpoint is " << persisted.size() << " bytes\n"; (void)consumer.close(); diff --git a/include/pulsar/st/Checkpoint.h b/include/pulsar/st/Checkpoint.h index d3a32e34..0ee2938e 100644 --- a/include/pulsar/st/Checkpoint.h +++ b/include/pulsar/st/Checkpoint.h @@ -21,8 +21,10 @@ #include #include +#include #include -#include +#include +#include namespace pulsar::st { @@ -91,17 +93,17 @@ class PULSAR_PUBLIC Checkpoint { * The returned bytes are an opaque blob suitable for persisting in any state * backend; restore them later with `fromByteArray()`. * - * @return Byte string encoding the cross-segment position. + * @return a byte vector encoding the cross-segment position. */ - std::string toByteArray() const; + std::vector toByteArray() const; /** * @brief Restore a `Checkpoint` previously produced by `toByteArray()`. * - * @param data Bytes returned by an earlier `toByteArray()` call. + * @param data bytes returned by an earlier `toByteArray()` call. * @return The reconstructed `Checkpoint`. */ - static Checkpoint fromByteArray(const std::string& data); + static Checkpoint fromByteArray(std::span data); /** * @brief Test whether this checkpoint holds a valid position. diff --git a/include/pulsar/st/MessageId.h b/include/pulsar/st/MessageId.h index 52abacb8..b774ed70 100644 --- a/include/pulsar/st/MessageId.h +++ b/include/pulsar/st/MessageId.h @@ -22,9 +22,11 @@ #include #include +#include #include #include -#include +#include +#include namespace pulsar::st { @@ -55,10 +57,10 @@ class PULSAR_PUBLIC MessageId { static const MessageId& latest(); /** Serialize to a portable binary form for external storage. */ - std::string toByteArray() const; + std::vector toByteArray() const; /** Restore a `MessageId` previously produced by `toByteArray()`. */ - static MessageId fromByteArray(const std::string& data); + static MessageId fromByteArray(std::span data); // Totally ordered within a topic; `<=>` and `==` synthesize <, <=, >, >=, !=. /** From 9dffe5a3455d6aa3573c697151e31eefeb52f0d4 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 13:33:50 -0700 Subject: [PATCH 09/39] st: make property() a by-value sink (review item K1) property(const std::string& k, const std::string& v) becomes property(std::string k, std::string v) with insert_or_assign(std::move(k), std::move(v)), across MessageBuilder, ProducerBuilder, and the three consumer builders -- consistent with the other by-value-sink setters (topic / subscriptionName / etc.). Verified with clang + gcc:13 (-Wextra -Werror) and clang-format-11. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 4 ++-- include/pulsar/st/Producer.h | 8 ++++---- include/pulsar/st/QueueConsumer.h | 4 ++-- include/pulsar/st/StreamConsumer.h | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 2a5dd493..23a99ec1 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -274,8 +274,8 @@ class CheckpointConsumerBuilder { * @param v Property value. * @return `*this`, for call chaining. */ - CheckpointConsumerBuilder& property(const std::string& k, const std::string& v) { - config_.properties[k] = v; + CheckpointConsumerBuilder& property(std::string k, std::string v) { + config_.properties.insert_or_assign(std::move(k), std::move(v)); return *this; } diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 9ea86a40..af81d2c7 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -180,8 +180,8 @@ class MessageBuilder { * @param v property value. * @return `*this`, for chaining. */ - MessageBuilder& property(const std::string& k, const std::string& v) { - message_.properties[k] = v; + MessageBuilder& property(std::string k, std::string v) { + message_.properties.insert_or_assign(std::move(k), std::move(v)); return *this; } /** @@ -470,8 +470,8 @@ class ProducerBuilder { * @param v property value. * @return `*this`, for chaining. */ - ProducerBuilder& property(const std::string& k, const std::string& v) { - config_.properties[k] = v; + ProducerBuilder& property(std::string k, std::string v) { + config_.properties.insert_or_assign(std::move(k), std::move(v)); return *this; } diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index aebd929d..73eec668 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -329,8 +329,8 @@ class QueueConsumerBuilder { * @param v property value. * @return `*this` for chaining. */ - QueueConsumerBuilder& property(const std::string& k, const std::string& v) { - config_.properties[k] = v; + QueueConsumerBuilder& property(std::string k, std::string v) { + config_.properties.insert_or_assign(std::move(k), std::move(v)); return *this; } diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index 36021a14..e592af90 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -372,8 +372,8 @@ class StreamConsumerBuilder { * @param v property value. * @return `*this` for chaining. */ - StreamConsumerBuilder& property(const std::string& k, const std::string& v) { - config_.properties[k] = v; + StreamConsumerBuilder& property(std::string k, std::string v) { + config_.properties.insert_or_assign(std::move(k), std::move(v)); return *this; } From 4853693a7be1da15d6b71dddbd18a368599f308f Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 13:55:06 -0700 Subject: [PATCH 10/39] st: fix await_suspend coroutine resume race (review B1) await_suspend now returns bool and uses SharedState::addListenerOrReady, which atomically registers the resume continuation or reports the result is already available -- so the coroutine resumes via await_resume instead of being resumed from inside await_suspend (which could run/destroy the awaiter before it returns). Verified with a co_await runtime test on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Future.h | 13 +++++++++---- include/pulsar/st/detail/SharedState.h | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/pulsar/st/Future.h b/include/pulsar/st/Future.h index 8e2f6c26..e6107217 100644 --- a/include/pulsar/st/Future.h +++ b/include/pulsar/st/Future.h @@ -144,13 +144,18 @@ class Future { /** * Coroutine support: suspend the awaiting coroutine until completion. * - * Registers a listener that resumes @p handle when the operation completes. Part - * of the C++20 awaitable interface; not called directly. + * Atomically registers a continuation that resumes @p handle on completion, or — + * if the result is already available — returns `false` so the coroutine resumes + * immediately rather than being resumed from inside `await_suspend` (which could + * run and destroy this awaiter before it returns). Part of the C++20 awaitable + * interface; not called directly. * * @param handle the suspended coroutine to resume on completion. + * @return `true` to stay suspended (resumed later on the completing thread), + * `false` to resume immediately because the result is already available. */ - void await_suspend(std::coroutine_handle<> handle) { - state_->addListener([handle](const Expected&) { handle.resume(); }); + bool await_suspend(std::coroutine_handle<> handle) { + return state_->addListenerOrReady([handle](const Expected&) { handle.resume(); }); } /** diff --git a/include/pulsar/st/detail/SharedState.h b/include/pulsar/st/detail/SharedState.h index ccbfe4a7..c1147006 100644 --- a/include/pulsar/st/detail/SharedState.h +++ b/include/pulsar/st/detail/SharedState.h @@ -66,6 +66,21 @@ class SharedState { } } + // For a coroutine awaiter. Atomically: if the result is not yet available, + // register @p listener to run on completion and return true (the awaiting + // coroutine stays suspended); if it is already available, register nothing and + // return false (the coroutine resumes immediately). Unlike addListener(), it + // never runs @p listener synchronously, so a coroutine is never resumed from + // inside its own await_suspend. + bool addListenerOrReady(Listener listener) { + std::lock_guard lock(mutex_); + if (result_.has_value()) { + return false; + } + listeners_.push_back(std::move(listener)); + return true; + } + Expected get() { std::unique_lock lock(mutex_); cond_.wait(lock, [this] { return result_.has_value(); }); From c36448effafc8c956811b88703920b1c6a0040dd Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 13:55:06 -0700 Subject: [PATCH 11/39] st: receive() does not surface decode errors -- doc fix (review B2) A message whose payload cannot be decoded is handled internally by the SDK and never delivered, so decode is not a receive failure. Dropped it from the receive failure lists on all three consumers and added a clarifying note. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 7 ++++--- include/pulsar/st/QueueConsumer.h | 12 +++++++----- include/pulsar/st/StreamConsumer.h | 14 ++++++++------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 23a99ec1..34677185 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -88,8 +88,9 @@ class CheckpointConsumer { * Waits indefinitely for the next message at the current read position. * * @return `Expected>` holding the decoded message, or an `Error` if - * the receive fails (e.g. the consumer is closed or disconnected, or the - * payload cannot be decoded). + * the receive fails (e.g. the consumer is closed or disconnected). A + * message whose payload cannot be decoded is handled by the SDK and never + * delivered, so it is not a receive failure. */ Expected> receive() { return toTyped(core_.receiveAsync().get()); } @@ -99,7 +100,7 @@ class CheckpointConsumer { * @param timeout Maximum time to wait (`std::chrono::milliseconds`). * @return `Expected>` holding the decoded message, or an `Error`; a * timeout surfaces as `Error{ResultTimeout}`. May also fail on - * close/disconnect or a decode error. + * close/disconnect. */ Expected> receive(std::chrono::milliseconds timeout) { return toTyped(core_.receiveAsync(timeout.count()).get()); diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index 73eec668..d1608120 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -114,9 +114,11 @@ class QueueConsumer { * Block until the next message arrives and return it. * * Returns `Expected` because a receive can fail *without* yielding a message — - * the consumer was closed, the connection dropped, or the payload failed to - * decode. On such failures the result holds an `Error` instead of a message; - * call `.value()` on the result if you would rather throw a `ClientException`. + * the consumer was closed or the connection dropped. On such failures the result + * holds an `Error` instead of a message; call `.value()` on the result if you + * would rather throw a `ClientException`. (A message whose payload cannot be + * decoded is handled by the SDK and never delivered, so it is not a receive + * failure.) * * @return the next `Message`, or an `Error` describing why no message could * be delivered. @@ -127,7 +129,7 @@ class QueueConsumer { * * @param timeout maximum time to wait for a message. * @return the next `Message`; if no message arrives within `timeout`, an - * `Error{ResultTimeout}`; or another `Error` on close/disconnect/decode + * `Error{ResultTimeout}`; or another `Error` on close/disconnect * failure. */ Expected> receive(std::chrono::milliseconds timeout) { @@ -138,7 +140,7 @@ class QueueConsumer { * * @return a `Future>` completed with the message when one is * available, or completed with an `Error` (via the future's `Expected` - * result) on close/disconnect/decode failure. + * result) on close/disconnect failure. */ Future> receiveAsync() { Schema schema = schema_; diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index e592af90..e42680f1 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -120,9 +120,11 @@ class StreamConsumer { * Block until the next message arrives and return it. * * Returns `Expected` because a receive can fail *without* yielding a message — - * the consumer was closed, the connection dropped, or the payload failed to - * decode. On such failures the result holds an `Error` instead of a message; - * call `.value()` on the result if you would rather throw a `ClientException`. + * the consumer was closed or the connection dropped. On such failures the result + * holds an `Error` instead of a message; call `.value()` on the result if you + * would rather throw a `ClientException`. (A message whose payload cannot be + * decoded is handled by the SDK and never delivered, so it is not a receive + * failure.) * * @return the next `Message`, or an `Error` describing why no message could * be delivered. @@ -133,7 +135,7 @@ class StreamConsumer { * * @param timeout maximum time to wait for a message. * @return the next `Message`; if no message arrives within `timeout`, an - * `Error{ResultTimeout}`; or another `Error` on close/disconnect/decode + * `Error{ResultTimeout}`; or another `Error` on close/disconnect * failure. */ Expected> receive(std::chrono::milliseconds timeout) { @@ -144,7 +146,7 @@ class StreamConsumer { * * @return a `Future>` completed with the message when one is * available, or completed with an `Error` (via the future's - * `Expected` result) on close/disconnect/decode failure. + * `Expected` result) on close/disconnect failure. */ Future> receiveAsync() { Schema schema = schema_; @@ -161,7 +163,7 @@ class StreamConsumer { * * @param maxMessages the maximum number of messages to return in the batch. * @param timeout maximum time to wait while accumulating the batch. - * @return the collected `Messages`, or an `Error` on close/disconnect/decode + * @return the collected `Messages`, or an `Error` on close/disconnect * failure. */ Expected> receiveMulti(int maxMessages, std::chrono::milliseconds timeout) { From 7c6043e52c8b4776c84cdaa7cd97c295a59f163f Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 13:55:06 -0700 Subject: [PATCH 12/39] st: uppercase primitive schema names STRING/DOUBLE (review B4) Match the existing client's canonical primitive names (lib/Schema.cc: STRING/INT32/INT64/FLOAT/DOUBLE/BYTES); StringCodec/DoubleCodec used mixed-case 'String'/'Double'. The name is sent to the broker. Signed-off-by: Matteo Merli --- include/pulsar/st/Schema.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index a1b1c4e7..2fd35faa 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -250,7 +250,7 @@ struct SpanBytesCodec { Expected decode(std::span d) const { return d; } }; struct StringCodec { - SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "String", ""); } + SchemaInfo info() const { return SchemaInfo(SchemaType::STRING, "STRING", ""); } Expected encode(const std::string& v, std::vector& out) const { const auto* p = reinterpret_cast(v.data()); out.assign(p, p + v.size()); @@ -287,7 +287,7 @@ struct Int64Codec { } }; struct DoubleCodec { - SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "Double", ""); } + SchemaInfo info() const { return SchemaInfo(SchemaType::DOUBLE, "DOUBLE", ""); } Expected encode(double v, std::vector& out) const { std::uint64_t bits; std::memcpy(&bits, &v, sizeof(bits)); From d2c91db3279ab98244eeecf4d35f4bebdfaff100 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 13:55:06 -0700 Subject: [PATCH 13/39] st: reset encodeError_ on a successful encode (review B5) MessageBuilder::value() now clears encodeError_ on success instead of leaving a prior failure sticky, so a later successful value() doesn't surface a stale error at send()/sendAsync(). Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index af81d2c7..dd148fdb 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -169,7 +169,8 @@ class MessageBuilder { message_.payloadView = v; message_.usesView = true; } else { - if (auto r = schema_.encode(v, message_.payload); !r) encodeError_ = r.error(); + auto r = schema_.encode(v, message_.payload); + encodeError_ = r ? std::nullopt : std::optional(r.error()); } return *this; } From d3e914dac522739fa6e6be34246de1740fb3df37 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:08:30 -0700 Subject: [PATCH 14/39] st: model event time as std::optional (review B3) OutgoingMessage::eventTime and MessageCore::eventTime() are now std::optional instead of an int64 epoch-ms with a 0=unset sentinel, so an event time of exactly the Unix epoch is no longer indistinguishable from unset. The int64 epoch-ms is just the wire encoding (converted in lib/st); MessageBuilder::eventTime and Message::eventTime() simplify accordingly. Verified epoch != unset at runtime on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Message.h | 5 +---- include/pulsar/st/Producer.h | 12 ++++++------ include/pulsar/st/detail/MessageCore.h | 3 ++- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index 2263277f..0c1fbb6a 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -125,10 +125,7 @@ class Message { * * @return the event time, or `std::nullopt` if the producer did not set one. */ - std::optional eventTime() const { - auto ms = core_.eventTimeMs(); - return ms != 0 ? std::optional(Timestamp(std::chrono::milliseconds(ms))) : std::nullopt; - } + std::optional eventTime() const { return core_.eventTime(); } /** * The producer-assigned sequence id of the message. diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index dd148fdb..e1cd3f85 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -117,9 +117,9 @@ struct OutgoingMessage { std::string key; /** Per-message user metadata. Empty by default. */ Properties properties; - int64_t eventTimeMs = 0; ///< Application event time, epoch ms; 0 = unset. - int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. - int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. + std::optional eventTime; ///< Application event time; unset (nullopt) by default. + int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. + int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. /** Target clusters for geo-replication; empty applies the topic's default. */ std::vector replicationClusters; std::optional transaction; ///< Enlisting transaction; unset = non-transactional. @@ -198,12 +198,12 @@ class MessageBuilder { /** * Set the application-defined event time of the message. * - * @param t the event time as a wall-clock `Timestamp`; stored as epoch - * milliseconds. Unset by default (event time absent). + * @param t the event time as a wall-clock `Timestamp`. Unset by default (no + * event time is attached). * @return `*this`, for chaining. */ MessageBuilder& eventTime(Timestamp t) { - message_.eventTimeMs = toEpochMs(t); + message_.eventTime = t; return *this; } /** diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h index 03c57741..117b925e 100644 --- a/include/pulsar/st/detail/MessageCore.h +++ b/include/pulsar/st/detail/MessageCore.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,7 @@ class PULSAR_PUBLIC MessageCore { std::string_view key() const; const Properties& properties() const; int64_t publishTimeMs() const; - int64_t eventTimeMs() const; // 0 if unset + std::optional eventTime() const; int64_t sequenceId() const; bool hasProducerName() const; std::string_view producerName() const; From b234d8eab4166c41ba79a37111013f236a1fbc94 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:12:16 -0700 Subject: [PATCH 15/39] st: add CheckpointConsumer::consumerName() (review G2) Parity with Stream/QueueConsumer -- the consumerName config field and builder setter existed, but the getter did not, so the name could be set but not read. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 8 ++++++++ include/pulsar/st/detail/CheckpointConsumerCore.h | 1 + 2 files changed, 9 insertions(+) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 34677185..0457cc32 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -175,6 +175,14 @@ class CheckpointConsumer { */ std::string_view topic() const { return core_.topic(); } + /** + * @brief Return the consumer name (broker-assigned if none was set on the + * builder). + * + * @return a view of the consumer name, valid while this consumer is alive. + */ + std::string_view consumerName() const { return core_.consumerName(); } + /** * @brief Test whether this consumer is usable. * diff --git a/include/pulsar/st/detail/CheckpointConsumerCore.h b/include/pulsar/st/detail/CheckpointConsumerCore.h index 22d74521..9d67855d 100644 --- a/include/pulsar/st/detail/CheckpointConsumerCore.h +++ b/include/pulsar/st/detail/CheckpointConsumerCore.h @@ -52,6 +52,7 @@ class PULSAR_PUBLIC CheckpointConsumerCore { Checkpoint checkpoint() const; Future closeAsync() const; std::string_view topic() const; + std::string_view consumerName() const; explicit operator bool() const { return static_cast(impl_); } From c65e6dfe8d20d65e4cafab97386d19113318a143 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:12:16 -0700 Subject: [PATCH 16/39] st: [[nodiscard]] on MessageId/Checkpoint serialization + sentinels (review P3) toByteArray() / fromByteArray() / earliest() / latest() return values that must not be silently discarded. Signed-off-by: Matteo Merli --- include/pulsar/st/Checkpoint.h | 8 ++++---- include/pulsar/st/MessageId.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/pulsar/st/Checkpoint.h b/include/pulsar/st/Checkpoint.h index 0ee2938e..0e91d97a 100644 --- a/include/pulsar/st/Checkpoint.h +++ b/include/pulsar/st/Checkpoint.h @@ -72,7 +72,7 @@ class PULSAR_PUBLIC Checkpoint { * * @return Reference to the shared earliest-position sentinel. */ - static const Checkpoint& earliest(); + [[nodiscard]] static const Checkpoint& earliest(); /** * @brief Well-known sentinel positioned after the latest published message of @@ -84,7 +84,7 @@ class PULSAR_PUBLIC Checkpoint { * * @return Reference to the shared latest-position sentinel. */ - static const Checkpoint& latest(); + [[nodiscard]] static const Checkpoint& latest(); /** * @brief Serialize this checkpoint to a portable binary form for external @@ -95,7 +95,7 @@ class PULSAR_PUBLIC Checkpoint { * * @return a byte vector encoding the cross-segment position. */ - std::vector toByteArray() const; + [[nodiscard]] std::vector toByteArray() const; /** * @brief Restore a `Checkpoint` previously produced by `toByteArray()`. @@ -103,7 +103,7 @@ class PULSAR_PUBLIC Checkpoint { * @param data bytes returned by an earlier `toByteArray()` call. * @return The reconstructed `Checkpoint`. */ - static Checkpoint fromByteArray(std::span data); + [[nodiscard]] static Checkpoint fromByteArray(std::span data); /** * @brief Test whether this checkpoint holds a valid position. diff --git a/include/pulsar/st/MessageId.h b/include/pulsar/st/MessageId.h index b774ed70..2a8ff4d3 100644 --- a/include/pulsar/st/MessageId.h +++ b/include/pulsar/st/MessageId.h @@ -51,16 +51,16 @@ class PULSAR_PUBLIC MessageId { MessageId(); /** Sentinel: the earliest (oldest) message available in the topic. */ - static const MessageId& earliest(); + [[nodiscard]] static const MessageId& earliest(); /** Sentinel: the latest (most recently published) message in the topic. */ - static const MessageId& latest(); + [[nodiscard]] static const MessageId& latest(); /** Serialize to a portable binary form for external storage. */ - std::vector toByteArray() const; + [[nodiscard]] std::vector toByteArray() const; /** Restore a `MessageId` previously produced by `toByteArray()`. */ - static MessageId fromByteArray(std::span data); + [[nodiscard]] static MessageId fromByteArray(std::span data); // Totally ordered within a topic; `<=>` and `==` synthesize <, <=, >, >=, !=. /** From f7affbc19bb636208577244d8c0e2e3c93a1dab1 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:12:17 -0700 Subject: [PATCH 17/39] st: document Message::properties() view lifetime (review P4) It returns a reference into the message, like the other view-returning getters. Signed-off-by: Matteo Merli --- include/pulsar/st/Message.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index 0c1fbb6a..a8327b9a 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -109,7 +109,7 @@ class Message { /** * The application-defined string properties attached to the message. * - * @return a reference to the properties map. + * @return a reference to the properties map, valid while this message is alive. */ const Properties& properties() const { return core_.properties(); } From 3730ff07b5ff86afb08bed3b0964d63dd58ae9db Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:21:38 -0700 Subject: [PATCH 18/39] st: model deliverAt as std::optional too (review B3 follow-on) OutgoingMessage::deliverAt is now std::optional (was int64 epoch-ms with 0=immediate), matching eventTime; deliverAfter/deliverAt set it directly, and the now-unused toEpochMs helper is removed. Verified on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index e1cd3f85..2d2efe96 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -119,7 +119,7 @@ struct OutgoingMessage { Properties properties; std::optional eventTime; ///< Application event time; unset (nullopt) by default. int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. - int64_t deliverAtMs = 0; ///< Absolute delivery time, epoch ms; 0 = deliver immediately. + std::optional deliverAt; ///< Scheduled delivery time; unset = deliver immediately. /** Target clusters for geo-replication; empty applies the topic's default. */ std::vector replicationClusters; std::optional transaction; ///< Enlisting transaction; unset = non-transactional. @@ -221,26 +221,26 @@ class MessageBuilder { * Request delayed delivery: deliver the message after `delay` has elapsed from * now (spec §4 delayed delivery). * - * @param delay delay relative to the current time, in milliseconds. Computed - * into an absolute delivery time. Mutually exclusive with - * `deliverAt`; the last of the two called wins. + * @param delay delay relative to the current time. Computed into an absolute + * delivery time. Mutually exclusive with `deliverAt`; the last of the + * two called wins. * @return `*this`, for chaining. */ MessageBuilder& deliverAfter(std::chrono::milliseconds delay) { - message_.deliverAtMs = toEpochMs(std::chrono::system_clock::now()) + delay.count(); + message_.deliverAt = std::chrono::system_clock::now() + delay; return *this; } /** * Request delayed delivery at a specific wall-clock time (spec §4 delayed * delivery). * - * @param t absolute delivery time; stored as epoch milliseconds. A time in the - * past delivers immediately. Mutually exclusive with `deliverAfter`; - * the last of the two called wins. + * @param t the absolute delivery time. A time in the past delivers immediately. + * Mutually exclusive with `deliverAfter`; the last of the two called + * wins. * @return `*this`, for chaining. */ MessageBuilder& deliverAt(Timestamp t) { - message_.deliverAtMs = toEpochMs(t); + message_.deliverAt = t; return *this; } /** @@ -282,10 +282,6 @@ class MessageBuilder { MessageBuilder(detail::ProducerCore core, Schema schema) : core_(std::move(core)), schema_(std::move(schema)) {} - static int64_t toEpochMs(Timestamp t) { - return std::chrono::duration_cast(t.time_since_epoch()).count(); - } - detail::ProducerCore core_; Schema schema_; OutgoingMessage message_; From db9607bd7a47bc8b9446a07ae83e0f900fe88114 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:23:31 -0700 Subject: [PATCH 19/39] st: model publishTime as Timestamp, not int64 epoch-ms MessageCore::publishTime() now returns Timestamp (was int64_t publishTimeMs()); Message::publishTime() forwards it directly. Consistent with the eventTime / deliverAt Timestamp modeling; the int64 epoch-ms is just the wire encoding (converted in lib/st). Signed-off-by: Matteo Merli --- include/pulsar/st/Message.h | 2 +- include/pulsar/st/detail/MessageCore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index a8327b9a..7a43c8fa 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -118,7 +118,7 @@ class Message { * * @return the timestamp at which the message was published. */ - Timestamp publishTime() const { return Timestamp(std::chrono::milliseconds(core_.publishTimeMs())); } + Timestamp publishTime() const { return core_.publishTime(); } /** * The optional application-supplied event time. diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h index 117b925e..f45f7acc 100644 --- a/include/pulsar/st/detail/MessageCore.h +++ b/include/pulsar/st/detail/MessageCore.h @@ -59,7 +59,7 @@ class PULSAR_PUBLIC MessageCore { bool hasKey() const; std::string_view key() const; const Properties& properties() const; - int64_t publishTimeMs() const; + Timestamp publishTime() const; std::optional eventTime() const; int64_t sequenceId() const; bool hasProducerName() const; From ebb726863c54feab14ea3a699b4ae74da0e0ec10 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:28:36 -0700 Subject: [PATCH 20/39] st: producer review items P1, G3, Q1 - P1: rename Producer::name() -> producerName() (+ ProducerCore), consistent with Message::producerName() and the producerName builder setter. - G3: add MessageBuilder::replicationClusters() setter for the previously unreachable OutgoingMessage::replicationClusters field. - Q1: drop the 'ordering key' framing from the message-key docs -- it is a routing / partition key; ordering is provided by the StreamConsumer. Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 19 +++++++++++++++---- include/pulsar/st/detail/ProducerCore.h | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 2d2efe96..a27f7491 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -111,9 +111,9 @@ struct OutgoingMessage { std::span payloadView; /** When true, publish `payloadView` directly without copying; otherwise `payload`. */ bool usesView = false; - /** Whether a routing/ordering key is set. `false` (the default) means no key. */ + /** Whether a routing key is set. `false` (the default) means no key. */ bool hasKey = false; - /** Partition/ordering key; meaningful only when `hasKey` is true. */ + /** Partition/routing key; meaningful only when `hasKey` is true. */ std::string key; /** Per-message user metadata. Empty by default. */ Properties properties; @@ -141,7 +141,7 @@ template class MessageBuilder { public: /** - * Set the message key, used for per-key ordering and key-affinity routing. + * Set the message key, used for partition routing and key affinity. * * @param k the key; taken by value and moved into the message. * @return `*this`, for chaining. @@ -254,6 +254,17 @@ class MessageBuilder { message_.transaction = txn; return *this; } + /** + * Restrict geo-replication of this message to the given clusters. + * + * @param clusters the target clusters; taken by value and moved in. Empty (the + * default) applies the topic's configured replication. + * @return `*this`, for chaining. + */ + MessageBuilder& replicationClusters(std::vector clusters) { + message_.replicationClusters = std::move(clusters); + return *this; + } /** * Publish the message and block until the broker acknowledges it. @@ -335,7 +346,7 @@ class Producer { std::string_view topic() const { return core_.topic(); } /** @return a view of the producer's name (broker-assigned when none was configured), valid * while the producer is alive. */ - std::string_view name() const { return core_.name(); } + std::string_view producerName() const { return core_.producerName(); } /** @return the sequence id of the most recently published message, or -1 if * none has been published yet. */ int64_t lastSequenceId() const { return core_.lastSequenceId(); } diff --git a/include/pulsar/st/detail/ProducerCore.h b/include/pulsar/st/detail/ProducerCore.h index 41fbdf45..bb6cc4b9 100644 --- a/include/pulsar/st/detail/ProducerCore.h +++ b/include/pulsar/st/detail/ProducerCore.h @@ -48,7 +48,7 @@ class PULSAR_PUBLIC ProducerCore { Future sendAsync(OutgoingMessage message) const; std::string_view topic() const; - std::string_view name() const; + std::string_view producerName() const; int64_t lastSequenceId() const; Future flushAsync() const; Future closeAsync() const; From db0f940d4e44d346bcfea76ae9bddf88c2496176 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:32:53 -0700 Subject: [PATCH 21/39] st: add float/int8/int16 primitive codecs (review G4/Q4) FloatCodec (FLOAT, big-endian IEEE-754), Int8Codec (INT8), Int16Codec (INT16), wired into the default Schema ctor; canonical uppercase names match the existing client. Round-trip verified on clang + gcc:13. bool is NOT added: the existing pulsar::SchemaType enum has no BOOLEAN value (Java has it at 5; the C++ port skipped it), and adding it would mean touching the old API. Deferred pending a decision on extending the old enum. Signed-off-by: Matteo Merli --- include/pulsar/st/Schema.h | 56 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index 2fd35faa..9f4fa788 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -124,9 +124,10 @@ class Schema { * @brief Constructs the default schema for `T`. * * For a primitive `T` this installs the built-in codec: `Bytes` (the default), - * `std::string`, `std::int32_t`, `std::int64_t`, or `double`. Integers and - * `double` are encoded as fixed-width big-endian, matching the Pulsar wire - * format for primitive schemas. + * `BytesView`, `std::string`, `std::int8_t`, `std::int16_t`, `std::int32_t`, + * `std::int64_t`, `float`, or `double`. Integers and floating-point values are + * encoded as fixed-width big-endian, matching the Pulsar wire format for + * primitive schemas. * * For any other (non-primitive) `T` this installs an "unset" schema: it reports * `SchemaType::BYTES` to the broker, but its `encode` and `decode` return an @@ -304,6 +305,49 @@ struct DoubleCodec { return v; } }; +struct FloatCodec { + SchemaInfo info() const { return SchemaInfo(SchemaType::FLOAT, "FLOAT", ""); } + Expected encode(float v, std::vector& out) const { + std::uint32_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + out.clear(); + encodeBigEndian(static_cast(bits), out); + return {}; + } + Expected decode(std::span d) const { + if (d.size() < sizeof(float)) + return unexpected(pulsar::ResultInvalidMessage, "FLOAT payload too short"); + auto bits = static_cast(decodeBigEndian(d)); + float v; + std::memcpy(&v, &bits, sizeof(v)); + return v; + } +}; +struct Int8Codec { + SchemaInfo info() const { return SchemaInfo(SchemaType::INT8, "INT8", ""); } + Expected encode(std::int8_t v, std::vector& out) const { + out.clear(); + encodeBigEndian(v, out); + return {}; + } + Expected decode(std::span d) const { + if (d.empty()) return unexpected(pulsar::ResultInvalidMessage, "INT8 payload too short"); + return decodeBigEndian(d); + } +}; +struct Int16Codec { + SchemaInfo info() const { return SchemaInfo(SchemaType::INT16, "INT16", ""); } + Expected encode(std::int16_t v, std::vector& out) const { + out.clear(); + encodeBigEndian(v, out); + return {}; + } + Expected decode(std::span d) const { + if (d.size() < sizeof(std::int16_t)) + return unexpected(pulsar::ResultInvalidMessage, "INT16 payload too short"); + return decodeBigEndian(d); + } +}; template struct UnsetCodec { SchemaInfo info() const { return SchemaInfo(SchemaType::BYTES, "BYTES", ""); } @@ -330,6 +374,12 @@ Schema::Schema() { self_ = std::make_shared>(detail::Int64Codec{}); } else if constexpr (std::is_same_v) { self_ = std::make_shared>(detail::DoubleCodec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::FloatCodec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::Int8Codec{}); + } else if constexpr (std::is_same_v) { + self_ = std::make_shared>(detail::Int16Codec{}); } else if constexpr (std::is_same_v) { self_ = std::make_shared>(detail::SpanBytesCodec{}); } else { From 4013ba6241cb2410844d00bb6a5793f2d1fcf6f3 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:36:27 -0700 Subject: [PATCH 22/39] st: Message::data() returns BytesView, not const char* + size() MessageCore::data() / Message::data() now return std::span (BytesView), carrying pointer and length together; the separate size() accessor is removed (use data().size()), and Message::value() simplifies accordingly. Consistent with the Bytes/BytesView byte modeling. Verified on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Message.h | 21 ++++++--------------- include/pulsar/st/detail/MessageCore.h | 4 ++-- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index 7a43c8fa..ace1003d 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -66,28 +66,19 @@ class Message { * Decoding happens on every call (the result is not cached). The SDK handles a * payload that cannot be decoded internally — such a message is not delivered to * the application — so this does not surface decode failures to the caller. The - * raw bytes remain available via `data()` / `size()`. + * raw bytes remain available via `data()`. * * @return the decoded value of type `T`. */ - T value() const { - const auto* bytes = reinterpret_cast(core_.data()); - return schema_.decode(std::span(bytes, core_.size())).value(); - } - - /** - * Pointer to the raw, undecoded payload bytes. - * - * @return a pointer to `size()` bytes of payload, valid for this message's lifetime. - */ - const char* data() const { return core_.data(); } + T value() const { return schema_.decode(core_.data()).value(); } /** - * Size of the raw payload in bytes. + * The raw, undecoded payload bytes. * - * @return the number of bytes pointed to by `data()`. + * @return a view of the payload, valid while this message is alive; its `.size()` + * gives the byte count. */ - std::size_t size() const { return core_.size(); } + BytesView data() const { return core_.data(); } /** * The message's position within the topic. diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h index f45f7acc..1d3d30d3 100644 --- a/include/pulsar/st/detail/MessageCore.h +++ b/include/pulsar/st/detail/MessageCore.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -53,8 +54,7 @@ class PULSAR_PUBLIC MessageCore { public: MessageCore() = default; - const char* data() const; - std::size_t size() const; + std::span data() const; MessageId id() const; bool hasKey() const; std::string_view key() const; From 85114a43a6293c681345ea760d788d5d4e01515d Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:44:50 -0700 Subject: [PATCH 23/39] st: group loose client settings into policies by scope (review Q3) PulsarClientBuilder drops the top-level ioThreads / messageListenerThreads / memoryLimit / listenerName setters. Grouped by scope: - listenerName -> ConnectionPolicy - ioThreads + messageListenerThreads -> new ThreadPolicy - memoryLimit -> new MemoryPolicy with threadPolicy() / memoryPolicy() builder setters. Verified on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Client.h | 69 +++++++++++------------------------- include/pulsar/st/Policies.h | 27 ++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/include/pulsar/st/Client.h b/include/pulsar/st/Client.h index be0eedc4..31090136 100644 --- a/include/pulsar/st/Client.h +++ b/include/pulsar/st/Client.h @@ -229,43 +229,6 @@ class PULSAR_PUBLIC PulsarClientBuilder { return *this; } - /** - * Set the number of threads used for network IO. - * - * Optional. Defaults to 1 when unset. - * - * @param n the number of IO threads - * @return `*this`, for call chaining - */ - PulsarClientBuilder& ioThreads(int n) { - ioThreads_ = n; - return *this; - } - /** - * Set the number of threads used to run message listeners. - * - * Optional. Defaults to 1 when unset. - * - * @param n the number of message-listener threads - * @return `*this`, for call chaining - */ - PulsarClientBuilder& messageListenerThreads(int n) { - messageListenerThreads_ = n; - return *this; - } - /** - * Set the client-wide memory budget for pending (in-flight) messages. - * - * Optional. When unset, the client applies its built-in default limit. - * - * @param size the maximum memory, in bytes, for buffered messages - * @return `*this`, for call chaining - */ - PulsarClientBuilder& memoryLimit(MemorySize size) { - memoryLimit_ = size; - return *this; - } - /** * Set connection-pool, lookup, and request-timeout tuning. * @@ -319,17 +282,29 @@ class PULSAR_PUBLIC PulsarClientBuilder { } /** - * Set the advertised listener name to use for broker discovery. + * Set IO- and listener-thread pool sizing. + * + * Optional. Any field left unset within the policy falls back to the client + * default of a single thread. (The advertised `listenerName` for broker + * discovery now lives on `ConnectionPolicy`.) * - * Optional. Used in multi-listener deployments to select which set of - * advertised addresses the client connects through. When unset, the broker's - * default listener is used. + * @param policy the thread policy to apply + * @return `*this`, for call chaining + */ + PulsarClientBuilder& threadPolicy(ThreadPolicy policy) { + threadPolicy_ = std::move(policy); + return *this; + } + /** + * Set the client-wide memory budget for pending (in-flight) messages. + * + * Optional. When unset, the client applies its built-in default limit. * - * @param name the configured listener name + * @param policy the memory policy to apply * @return `*this`, for call chaining */ - PulsarClientBuilder& listenerName(std::string name) { - listenerName_ = std::move(name); + PulsarClientBuilder& memoryPolicy(MemoryPolicy policy) { + memoryPolicy_ = std::move(policy); return *this; } @@ -345,14 +320,12 @@ class PULSAR_PUBLIC PulsarClientBuilder { private: std::string serviceUrl_; AuthenticationPtr authentication_; - std::optional ioThreads_; - std::optional messageListenerThreads_; - std::optional memoryLimit_; ConnectionPolicy connectionPolicy_; + ThreadPolicy threadPolicy_; + MemoryPolicy memoryPolicy_; BackoffPolicy backoffPolicy_; TlsPolicy tlsPolicy_; TransactionPolicy transactionPolicy_; - std::optional listenerName_; }; inline PulsarClientBuilder PulsarClient::builder() { return PulsarClientBuilder{}; } diff --git a/include/pulsar/st/Policies.h b/include/pulsar/st/Policies.h index dd2b6a0d..c078f774 100644 --- a/include/pulsar/st/Policies.h +++ b/include/pulsar/st/Policies.h @@ -107,6 +107,9 @@ struct ConnectionPolicy { /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the * client default. */ std::optional maxConnectionIdleTime = std::nullopt; + /** Advertised listener name for broker discovery (multi-listener deployments). Unset uses the + * broker's default listener. */ + std::optional listenerName = std::nullopt; }; /** @@ -163,4 +166,28 @@ struct TransactionPolicy { std::optional timeout = std::nullopt; }; +/** + * Client thread-pool sizing. + * + * Both fields are optional; when unset the client uses its built-in default of a + * single thread for that pool. + */ +struct ThreadPolicy { + /** Number of threads used for network I/O. Unset uses the client default (1). */ + std::optional ioThreads = std::nullopt; + /** Number of threads used to run message listeners. Unset uses the client default (1). */ + std::optional messageListenerThreads = std::nullopt; +}; + +/** + * Client-wide memory budget. + * + * Bounds the memory used for pending (in-flight) messages across the client. The + * field is optional; when unset the client applies its built-in default limit. + */ +struct MemoryPolicy { + /** Maximum memory for buffered messages. Unset uses the client default. */ + std::optional limit = std::nullopt; +}; + } // namespace pulsar::st From e497536be5f24781930cec72e6b4307f8549e89d Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:53:36 -0700 Subject: [PATCH 24/39] st: MessageCore optional accessors, drop hasX() bools MessageCore::key() / producerName() / replicatedFrom() now return std::optional directly instead of a paired hasX() bool + string_view accessor -- the optional carries the present/absent signal. Message's wrappers collapse to direct forwards. Verified on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/Message.h | 13 +++---------- include/pulsar/st/detail/MessageCore.h | 9 +++------ 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/include/pulsar/st/Message.h b/include/pulsar/st/Message.h index ace1003d..8681e545 100644 --- a/include/pulsar/st/Message.h +++ b/include/pulsar/st/Message.h @@ -93,9 +93,7 @@ class Message { * @return a view of the key (valid while this message is alive), or `std::nullopt` * if the message has none. */ - std::optional key() const { - return core_.hasKey() ? std::optional(core_.key()) : std::nullopt; - } + std::optional key() const { return core_.key(); } /** * The application-defined string properties attached to the message. @@ -131,9 +129,7 @@ class Message { * @return a view of the producer name (valid while this message is alive), or * `std::nullopt` if not present. */ - std::optional producerName() const { - return core_.hasProducerName() ? std::optional(core_.producerName()) : std::nullopt; - } + std::optional producerName() const { return core_.producerName(); } /** * The resolved canonical topic the message was received from. @@ -155,10 +151,7 @@ class Message { * @return a view of the originating cluster name (valid while this message is alive), * or `std::nullopt` if the message was not replicated from another cluster. */ - std::optional replicatedFrom() const { - return core_.hasReplicatedFrom() ? std::optional(core_.replicatedFrom()) - : std::nullopt; - } + std::optional replicatedFrom() const { return core_.replicatedFrom(); } /** * Whether this is a non-empty message. diff --git a/include/pulsar/st/detail/MessageCore.h b/include/pulsar/st/detail/MessageCore.h index 1d3d30d3..13daa9ae 100644 --- a/include/pulsar/st/detail/MessageCore.h +++ b/include/pulsar/st/detail/MessageCore.h @@ -56,18 +56,15 @@ class PULSAR_PUBLIC MessageCore { std::span data() const; MessageId id() const; - bool hasKey() const; - std::string_view key() const; + std::optional key() const; const Properties& properties() const; Timestamp publishTime() const; std::optional eventTime() const; int64_t sequenceId() const; - bool hasProducerName() const; - std::string_view producerName() const; + std::optional producerName() const; std::string_view topic() const; int redeliveryCount() const; - bool hasReplicatedFrom() const; - std::string_view replicatedFrom() const; + std::optional replicatedFrom() const; explicit operator bool() const { return static_cast(impl_); } From 39a305fce644addf6ea1f3aae1afb7f6b08f1916 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 14:56:59 -0700 Subject: [PATCH 25/39] st: receive cores take std::chrono::milliseconds, not int64_t timeoutMs detail::*Core receiveAsync/receiveMultiAsync now take std::chrono::milliseconds (matching the public receive() signatures), so the public methods forward the typed timeout directly instead of calling .count(). swapped for in the cores (int64_t was only the timeout). Verified on clang + gcc:13. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 4 ++-- include/pulsar/st/QueueConsumer.h | 2 +- include/pulsar/st/StreamConsumer.h | 4 ++-- include/pulsar/st/detail/CheckpointConsumerCore.h | 7 ++++--- include/pulsar/st/detail/QueueConsumerCore.h | 4 ++-- include/pulsar/st/detail/StreamConsumerCore.h | 7 ++++--- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 0457cc32..af4cf338 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -103,7 +103,7 @@ class CheckpointConsumer { * close/disconnect. */ Expected> receive(std::chrono::milliseconds timeout) { - return toTyped(core_.receiveAsync(timeout.count()).get()); + return toTyped(core_.receiveAsync(timeout).get()); } /** @@ -134,7 +134,7 @@ class CheckpointConsumer { * failure. */ Expected> receiveMulti(int maxMessages, std::chrono::milliseconds timeout) { - return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout.count()).get()); + return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout).get()); } /** diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index d1608120..bf44719e 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -133,7 +133,7 @@ class QueueConsumer { * failure. */ Expected> receive(std::chrono::milliseconds timeout) { - return toTyped(core_.receiveAsync(timeout.count()).get()); + return toTyped(core_.receiveAsync(timeout).get()); } /** * Request the next message without blocking. diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index e42680f1..75c654a5 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -139,7 +139,7 @@ class StreamConsumer { * failure. */ Expected> receive(std::chrono::milliseconds timeout) { - return toTyped(core_.receiveAsync(timeout.count()).get()); + return toTyped(core_.receiveAsync(timeout).get()); } /** * Request the next message without blocking. @@ -167,7 +167,7 @@ class StreamConsumer { * failure. */ Expected> receiveMulti(int maxMessages, std::chrono::milliseconds timeout) { - return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout.count()).get()); + return toTypedBatch(core_.receiveMultiAsync(maxMessages, timeout).get()); } /** diff --git a/include/pulsar/st/detail/CheckpointConsumerCore.h b/include/pulsar/st/detail/CheckpointConsumerCore.h index 9d67855d..880cd2c7 100644 --- a/include/pulsar/st/detail/CheckpointConsumerCore.h +++ b/include/pulsar/st/detail/CheckpointConsumerCore.h @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include @@ -47,8 +47,9 @@ class PULSAR_PUBLIC CheckpointConsumerCore { CheckpointConsumerCore() = default; Future receiveAsync() const; - Future receiveAsync(int64_t timeoutMs) const; - Future> receiveMultiAsync(int maxMessages, int64_t timeoutMs) const; + Future receiveAsync(std::chrono::milliseconds timeout) const; + Future> receiveMultiAsync(int maxMessages, + std::chrono::milliseconds timeout) const; Checkpoint checkpoint() const; Future closeAsync() const; std::string_view topic() const; diff --git a/include/pulsar/st/detail/QueueConsumerCore.h b/include/pulsar/st/detail/QueueConsumerCore.h index d5214a88..c67e24d3 100644 --- a/include/pulsar/st/detail/QueueConsumerCore.h +++ b/include/pulsar/st/detail/QueueConsumerCore.h @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include @@ -47,7 +47,7 @@ class PULSAR_PUBLIC QueueConsumerCore { QueueConsumerCore() = default; Future receiveAsync() const; - Future receiveAsync(int64_t timeoutMs) const; + Future receiveAsync(std::chrono::milliseconds timeout) const; void acknowledge(const MessageId& id) const; void acknowledge(const MessageId& id, const Transaction& txn) const; void negativeAcknowledge(const MessageId& id) const; diff --git a/include/pulsar/st/detail/StreamConsumerCore.h b/include/pulsar/st/detail/StreamConsumerCore.h index 45865610..aea16e71 100644 --- a/include/pulsar/st/detail/StreamConsumerCore.h +++ b/include/pulsar/st/detail/StreamConsumerCore.h @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include @@ -48,8 +48,9 @@ class PULSAR_PUBLIC StreamConsumerCore { StreamConsumerCore() = default; Future receiveAsync() const; - Future receiveAsync(int64_t timeoutMs) const; - Future> receiveMultiAsync(int maxMessages, int64_t timeoutMs) const; + Future receiveAsync(std::chrono::milliseconds timeout) const; + Future> receiveMultiAsync(int maxMessages, + std::chrono::milliseconds timeout) const; void acknowledgeCumulative(const MessageId& id) const; void acknowledgeCumulative(const MessageId& id, const Transaction& txn) const; Future closeAsync() const; From d17d94aa276bca85f048694eaa80fd3e89575a4c Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:01:01 -0700 Subject: [PATCH 26/39] st: OutgoingMessage key -> std::optional Replace the bool hasKey + std::string key pair on OutgoingMessage with a single std::optional key, mirroring the read-side MessageCore::key() -> std::optional. nullopt means no routing key. MessageBuilder::key() now just assigns the optional. Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index a27f7491..6008893e 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -111,10 +111,8 @@ struct OutgoingMessage { std::span payloadView; /** When true, publish `payloadView` directly without copying; otherwise `payload`. */ bool usesView = false; - /** Whether a routing key is set. `false` (the default) means no key. */ - bool hasKey = false; - /** Partition/routing key; meaningful only when `hasKey` is true. */ - std::string key; + /** Partition/routing key; unset (nullopt, the default) means no key. */ + std::optional key; /** Per-message user metadata. Empty by default. */ Properties properties; std::optional eventTime; ///< Application event time; unset (nullopt) by default. @@ -147,7 +145,6 @@ class MessageBuilder { * @return `*this`, for chaining. */ MessageBuilder& key(std::string k) { - message_.hasKey = true; message_.key = std::move(k); return *this; } From cf6ed88dcef95aeff2b88794e153254950133a3e Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:02:21 -0700 Subject: [PATCH 27/39] st: OutgoingMessage sequenceId -> std::optional Drop the -1 sentinel on OutgoingMessage::sequenceId in favor of std::optional; unset means auto-assign. Avoids a custom in-band encoding of 'no explicit sequence id'. MessageBuilder::sequenceId() just assigns the optional. Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 6008893e..75888ca0 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -116,7 +116,7 @@ struct OutgoingMessage { /** Per-message user metadata. Empty by default. */ Properties properties; std::optional eventTime; ///< Application event time; unset (nullopt) by default. - int64_t sequenceId = -1; ///< Explicit sequence id; -1 = auto-assign. + std::optional sequenceId; ///< Explicit sequence id; unset = auto-assign. std::optional deliverAt; ///< Scheduled delivery time; unset = deliver immediately. /** Target clusters for geo-replication; empty applies the topic's default. */ std::vector replicationClusters; @@ -206,7 +206,7 @@ class MessageBuilder { /** * Set an explicit sequence id for this message, overriding auto-assignment. * - * @param s the sequence id. By default (-1) the producer assigns one + * @param s the sequence id. When left unset the producer assigns one * automatically. * @return `*this`, for chaining. */ From c3ac06da8493ec25a0e365a4cf578faea2b301fb Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:05:11 -0700 Subject: [PATCH 28/39] st: Producer::lastSequenceId() -> std::optional Drop the -1 sentinel on the read side too: lastSequenceId() now returns std::nullopt when nothing has been published yet, instead of -1. Updates detail::ProducerCore to match. Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 6 +++--- include/pulsar/st/detail/ProducerCore.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 75888ca0..3a040d23 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -344,9 +344,9 @@ class Producer { /** @return a view of the producer's name (broker-assigned when none was configured), valid * while the producer is alive. */ std::string_view producerName() const { return core_.producerName(); } - /** @return the sequence id of the most recently published message, or -1 if - * none has been published yet. */ - int64_t lastSequenceId() const { return core_.lastSequenceId(); } + /** @return the sequence id of the most recently published message, or + * std::nullopt if none has been published yet. */ + std::optional lastSequenceId() const { return core_.lastSequenceId(); } /** * Block until all sends issued before this call have completed. Takes a diff --git a/include/pulsar/st/detail/ProducerCore.h b/include/pulsar/st/detail/ProducerCore.h index bb6cc4b9..004dc2ee 100644 --- a/include/pulsar/st/detail/ProducerCore.h +++ b/include/pulsar/st/detail/ProducerCore.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -49,7 +50,7 @@ class PULSAR_PUBLIC ProducerCore { Future sendAsync(OutgoingMessage message) const; std::string_view topic() const; std::string_view producerName() const; - int64_t lastSequenceId() const; + std::optional lastSequenceId() const; Future flushAsync() const; Future closeAsync() const; From 14c1e9217a18ddf550084785e616410951441523 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:08:09 -0700 Subject: [PATCH 29/39] st: P2 - guard rfl encode() against throwing rfl::{json,avro}::write() can throw, which would escape encode()'s Expected non-throwing contract. Wrap the body in try/catch and report failures as unexpected(ResultInvalidMessage, ...), mirroring the existing decode() guard. info() (schema derivation) has no error channel and stays off the non-throwing path; document that on the factories. Signed-off-by: Matteo Merli --- include/pulsar/st/AvroSchema.h | 19 +++++++++++++------ include/pulsar/st/JsonSchema.h | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/pulsar/st/AvroSchema.h b/include/pulsar/st/AvroSchema.h index ff00b8a2..f8bc58f7 100644 --- a/include/pulsar/st/AvroSchema.h +++ b/include/pulsar/st/AvroSchema.h @@ -47,10 +47,14 @@ template struct AvroSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::AVRO, "AVRO", rfl::avro::to_schema()); } Expected encode(const T& value, std::vector& out) const { - const std::string s = rfl::avro::write(value); - const auto* p = reinterpret_cast(s.data()); - out.assign(p, p + s.size()); - return {}; + try { + const std::string s = rfl::avro::write(value); + const auto* p = reinterpret_cast(s.data()); + out.assign(p, p + s.size()); + return {}; + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } Expected decode(std::span data) const { try { @@ -76,8 +80,11 @@ struct AvroSerDe { * * @tparam T the struct type to serialize as Avro; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use Avro. `decode` reports input - * that is not a valid Avro encoding for `T` as an `Error` rather than throwing. + * @return a `Schema` whose `encode`/`decode` use Avro. Both report failures as + * an `Error` (input that is not a valid Avro encoding for `T` on `decode`, a + * serialization failure on `encode`) rather than throwing. Note: `info()` + * (schema derivation) is not on this non-throwing path and may propagate a + * reflect-cpp exception. */ template Schema avroSchema() { diff --git a/include/pulsar/st/JsonSchema.h b/include/pulsar/st/JsonSchema.h index d3644541..2336cda9 100644 --- a/include/pulsar/st/JsonSchema.h +++ b/include/pulsar/st/JsonSchema.h @@ -44,10 +44,14 @@ template struct JsonSerDe { SchemaInfo info() const { return SchemaInfo(SchemaType::JSON, "JSON", rfl::json::to_schema()); } Expected encode(const T& value, std::vector& out) const { - const std::string s = rfl::json::write(value); - const auto* p = reinterpret_cast(s.data()); - out.assign(p, p + s.size()); - return {}; + try { + const std::string s = rfl::json::write(value); + const auto* p = reinterpret_cast(s.data()); + out.assign(p, p + s.size()); + return {}; + } catch (const std::exception& e) { + return unexpected(pulsar::ResultInvalidMessage, e.what()); + } } Expected decode(std::span data) const { try { @@ -76,8 +80,10 @@ struct JsonSerDe { * * @tparam T the struct type to serialize as JSON; its fields must be reflectable * by reflect-cpp. - * @return a `Schema` whose `encode`/`decode` use JSON. `decode` reports input - * that is not valid JSON for `T` as an `Error` rather than throwing. + * @return a `Schema` whose `encode`/`decode` use JSON. Both report failures as + * an `Error` (invalid JSON for `T` on `decode`, a serialization failure on + * `encode`) rather than throwing. Note: `info()` (schema derivation) is not + * on this non-throwing path and may propagate a reflect-cpp exception. */ template Schema jsonSchema() { From 5808898266516d2ed66e1f41769c475a72d1c083 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:08:43 -0700 Subject: [PATCH 30/39] st: P5 - warn about fire-and-forget + BytesView dangling A zero-copy Schema send publishes the viewed bytes directly, so they must outlive the send. The returned future is the only completion signal; discarding it (fire-and-forget) leaves no safe point to free the bytes. Document this on sendAsync(). Signed-off-by: Matteo Merli --- include/pulsar/st/Producer.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 3a040d23..f566c70e 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -273,8 +273,16 @@ class MessageBuilder { /** * Publish the message asynchronously without blocking. * + * @warning With a zero-copy `Schema` producer the payload is not + * copied, so the viewed bytes must stay valid until the send completes. + * The returned future is the only signal for that — discarding it + * (fire-and-forget) leaves no way to know when the bytes may be freed. + * For a fire-and-forget send, either keep the future, or use a copying + * `Schema` payload instead. + * * @return a `Future` that completes with the assigned id on success - * or the failure. The future may be ignored for fire-and-forget sends. + * or the failure. The future may be ignored for fire-and-forget sends + * (but see the warning above for `BytesView` payloads). */ Future sendAsync() { if (encodeError_) { From 0ab60f7eaf9d96171fb77e308791ed2a1fc31f51 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:11:58 -0700 Subject: [PATCH 31/39] st: P6 - std::hash + Checkpoint operator<< MessageId could not be used as a key in unordered_map/unordered_set. Add a std::hash specialization (operator() defined in lib/st, consistent with operator==: equal ids hash equal); befriend it so it can read the impl. Give Checkpoint a hidden-friend operator<< mirroring MessageId's, so both opaque position types stream the same way for logging/debugging. Signed-off-by: Matteo Merli --- include/pulsar/st/Checkpoint.h | 13 +++++++++++++ include/pulsar/st/MessageId.h | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/pulsar/st/Checkpoint.h b/include/pulsar/st/Checkpoint.h index 0e91d97a..37678c91 100644 --- a/include/pulsar/st/Checkpoint.h +++ b/include/pulsar/st/Checkpoint.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,18 @@ class PULSAR_PUBLIC Checkpoint { friend class CheckpointFactory; explicit Checkpoint(std::shared_ptr impl); + /** + * Write a human-readable representation of @p checkpoint to @p s. + * + * Intended for logging and debugging; the format is not a stable contract and + * must not be parsed (use `toByteArray()` for serialization). + * + * @param s the output stream to write to. + * @param checkpoint the checkpoint to format. + * @return the stream @p s, to allow chaining. + */ + friend PULSAR_PUBLIC std::ostream& operator<<(std::ostream& s, const Checkpoint& checkpoint); + std::shared_ptr impl_; }; diff --git a/include/pulsar/st/MessageId.h b/include/pulsar/st/MessageId.h index 2a8ff4d3..93e6ba34 100644 --- a/include/pulsar/st/MessageId.h +++ b/include/pulsar/st/MessageId.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -91,6 +92,7 @@ class PULSAR_PUBLIC MessageId { private: friend class MessageIdFactory; + friend struct std::hash; explicit MessageId(std::shared_ptr impl); /** @@ -109,3 +111,13 @@ class PULSAR_PUBLIC MessageId { }; } // namespace pulsar::st + +/** + * Hash support so `MessageId` can be used as a key in unordered containers + * (`std::unordered_map`/`std::unordered_set`). Consistent with `operator==`: ids + * that compare equal hash equal. Defined in lib/st. + */ +template <> +struct std::hash { + std::size_t operator()(const pulsar::st::MessageId& messageId) const noexcept; +}; From 00af737ff031d14a2ff812dccf9c6b4bd5a7f4f7 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:16:31 -0700 Subject: [PATCH 32/39] st: P7 - explicitly default copy/move on handles PulsarClient, Producer, the three consumers, and Transaction are shared-state handles that must stay cheaply copyable and movable. They relied on implicitly-generated special members, which a later user-declared destructor would silently suppress (turning the move into a copy or deleting it). Declare copy/move = default explicitly on all six to lock in handle value semantics and make the intent visible. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 6 ++++++ include/pulsar/st/Client.h | 6 ++++++ include/pulsar/st/Producer.h | 6 ++++++ include/pulsar/st/QueueConsumer.h | 6 ++++++ include/pulsar/st/StreamConsumer.h | 6 ++++++ include/pulsar/st/Transaction.h | 6 ++++++ 6 files changed, 36 insertions(+) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index af4cf338..88bd1619 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -82,6 +82,12 @@ class CheckpointConsumer { /** @brief Construct an empty, unusable consumer (falsy under `operator bool`). */ CheckpointConsumer() = default; + /** Copyable, movable handle; copies share the underlying consumer. */ + CheckpointConsumer(const CheckpointConsumer&) = default; + CheckpointConsumer& operator=(const CheckpointConsumer&) = default; + CheckpointConsumer(CheckpointConsumer&&) = default; + CheckpointConsumer& operator=(CheckpointConsumer&&) = default; + /** * @brief Block until the next message is available and return it. * diff --git a/include/pulsar/st/Client.h b/include/pulsar/st/Client.h index 31090136..1b7156fd 100644 --- a/include/pulsar/st/Client.h +++ b/include/pulsar/st/Client.h @@ -53,6 +53,12 @@ class PulsarClientBuilder; */ class PULSAR_PUBLIC PulsarClient { public: + /** Copyable, movable handle; copies share the underlying client. */ + PulsarClient(const PulsarClient&) = default; + PulsarClient& operator=(const PulsarClient&) = default; + PulsarClient(PulsarClient&&) = default; + PulsarClient& operator=(PulsarClient&&) = default; + /** * Begin configuring a client. * diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index f566c70e..05fa6d05 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -320,6 +320,12 @@ class Producer { * live producer from `ProducerBuilder`. */ Producer() = default; + /** Copyable, movable handle; copies share the underlying producer. */ + Producer(const Producer&) = default; + Producer& operator=(const Producer&) = default; + Producer(Producer&&) = default; + Producer& operator=(Producer&&) = default; + /** * Begin building a single message with per-message options (key, properties, * event time, delayed delivery, transaction, ...). diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index bf44719e..1a0c20cb 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -110,6 +110,12 @@ class QueueConsumer { * subscribed consumer is move-assigned into it. */ QueueConsumer() = default; + /** Copyable, movable handle; copies share the underlying consumer. */ + QueueConsumer(const QueueConsumer&) = default; + QueueConsumer& operator=(const QueueConsumer&) = default; + QueueConsumer(QueueConsumer&&) = default; + QueueConsumer& operator=(QueueConsumer&&) = default; + /** * Block until the next message arrives and return it. * diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index 75c654a5..6fa980cb 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -116,6 +116,12 @@ class StreamConsumer { * subscribed consumer is move-assigned into it. */ StreamConsumer() = default; + /** Copyable, movable handle; copies share the underlying consumer. */ + StreamConsumer(const StreamConsumer&) = default; + StreamConsumer& operator=(const StreamConsumer&) = default; + StreamConsumer(StreamConsumer&&) = default; + StreamConsumer& operator=(StreamConsumer&&) = default; + /** * Block until the next message arrives and return it. * diff --git a/include/pulsar/st/Transaction.h b/include/pulsar/st/Transaction.h index a5e7fe01..6d29a417 100644 --- a/include/pulsar/st/Transaction.h +++ b/include/pulsar/st/Transaction.h @@ -80,6 +80,12 @@ class PULSAR_PUBLIC Transaction { /** @brief Construct an empty, unusable transaction (falsy under `operator bool`). */ Transaction() = default; + /** Copyable, movable handle; copies share the underlying transaction. */ + Transaction(const Transaction&) = default; + Transaction& operator=(const Transaction&) = default; + Transaction(Transaction&&) = default; + Transaction& operator=(Transaction&&) = default; + /** * @brief Return the current lifecycle state of this transaction. * From 7d2dd84c07022e6f3b4d2ad7416e6130a8605163 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:19:22 -0700 Subject: [PATCH 33/39] st: P8 - thenApply supports void-returning and move-only mappers thenApply assumed a non-void, copyable mapper: it called setValue(f(...)) (ill-formed when f returns void) and moved f straight into the std::function listener (ill-formed when f is move-only, since std::function requires a copyable target). Branch on the result type with if constexpr - a void mapper runs and then completes the Future via setSuccess() - and hold f in a shared_ptr so the copyable listener can carry a move-only mapper. Verified at runtime (normal, void, move-only, and error-propagation paths) on clang and gcc. Signed-off-by: Matteo Merli --- include/pulsar/st/Future.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/include/pulsar/st/Future.h b/include/pulsar/st/Future.h index e6107217..54570bc0 100644 --- a/include/pulsar/st/Future.h +++ b/include/pulsar/st/Future.h @@ -111,21 +111,31 @@ class Future { * returned Future. Only participates in overload resolution when `T` is not * `void`. * - * @tparam F a callable taking `const T&` and returning the mapped value. + * @tparam F a callable taking `const T&`; may be move-only, and may return + * `void` (producing a `Future` that completes once `f` runs). * @tparam U defaults to `T`; an implementation detail of the `void` constraint. * @param f the mapping function to apply to the value. - * @return a `Future` of `f`'s return type, completed with the mapped value on - * success or the propagated error on failure. + * @return a `Future` of `f`'s return type, completed with the mapped value (or + * with success when `f` returns `void`) on success, or the propagated + * error on failure. */ template , int> = 0> Future> thenApply(F f) const { using R = std::invoke_result_t; detail::Promise promise; - state_->addListener([promise, f = std::move(f)](const Expected& result) { - if (result) { - promise.setValue(f(*result)); - } else { + // Hold f in a shared_ptr so the copyable std::function listener can carry a + // move-only mapper; f is invoked at most once. + auto fp = std::make_shared(std::move(f)); + state_->addListener([promise, fp](const Expected& result) { + if (!result) { promise.setError(result.error()); + return; + } + if constexpr (std::is_void_v) { + (*fp)(*result); + promise.setSuccess(); + } else { + promise.setValue((*fp)(*result)); } }); return promise.getFuture(); From 22b480f2f0f5dfde0cb73a5aaa5993bb9c38e360 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:22:50 -0700 Subject: [PATCH 34/39] st: P9 - fail the future when a Promise is abandoned A detail::Promise dropped without being completed left its SharedState forever pending, so Future::get() (and listeners / co_await) blocked indefinitely. Add a Guard shared by every copy of a Promise: when the last copy is destroyed it completes the state with an error (ResultUnknownError, "promise abandoned before completion") unless something already fulfilled it. complete() is idempotent, so a normally-completed promise is unaffected, and destroying one copy among several does not trip it. Verified at runtime (single/copied/void abandonment, partial-copy safety, completed no-op) on clang and gcc. Signed-off-by: Matteo Merli --- include/pulsar/st/Future.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/include/pulsar/st/Future.h b/include/pulsar/st/Future.h index 54570bc0..0aa5322b 100644 --- a/include/pulsar/st/Future.h +++ b/include/pulsar/st/Future.h @@ -194,8 +194,22 @@ namespace detail { */ template class Promise { + // Shared by every copy of a Promise. When the last copy is destroyed its + // destructor fails the future if nothing ever completed it, so an abandoned + // producer surfaces as an error instead of a get()/listener blocking forever. + // complete() is idempotent, so this is a no-op once the promise was fulfilled. + struct Guard { + explicit Guard(std::shared_ptr> s) : state(std::move(s)) {} + Guard(const Guard&) = delete; + Guard& operator=(const Guard&) = delete; + ~Guard() { + state->complete(Expected(Error{ResultUnknownError, "promise abandoned before completion"})); + } + std::shared_ptr> state; + }; + public: - Promise() : state_(std::make_shared>()) {} + Promise() : state_(std::make_shared>()), guard_(std::make_shared(state_)) {} Future getFuture() const { return Future(state_); } @@ -214,6 +228,7 @@ class Promise { private: std::shared_ptr> state_; + std::shared_ptr guard_; }; } // namespace detail From 7d972ded896881f5f8d7123f261be6f404b8a175 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:23:41 -0700 Subject: [PATCH 35/39] st: P10 - note negativeAckRedeliveryDelay is inert on StreamConsumer AckPolicy::negativeAckRedeliveryDelay only applies to a QueueConsumer. A StreamConsumer acknowledges cumulatively and has no negative-ack path, so the field is silently ignored there. Document that on the StreamConsumer config field and the ackPolicy() setter. Signed-off-by: Matteo Merli --- include/pulsar/st/StreamConsumer.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index 6fa980cb..e98bf1aa 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -74,7 +74,8 @@ struct StreamConsumerConfig { /// which case the broker assigns one. std::optional consumerName; /// Acknowledgment tuning (e.g. the ack-grouping/batching window). Default-constructed - /// `AckPolicy` when unset. + /// `AckPolicy` when unset. Note: `AckPolicy::negativeAckRedeliveryDelay` is ignored here — + /// a StreamConsumer acknowledges cumulatively and has no negative-ack/redelivery path. AckPolicy ackPolicy; /// When set to `true`, read from the topic's compacted view (latest value per key) /// instead of the full log. Default unset (broker default, i.e. uncompacted). @@ -343,6 +344,10 @@ class StreamConsumerBuilder { /** * Tune acknowledgment behavior (e.g. the ack-grouping/batching window). * + * @note `AckPolicy::negativeAckRedeliveryDelay` does not apply to a StreamConsumer + * (it acknowledges cumulatively, with no negative-ack/redelivery path) and is + * silently ignored. + * * @param policy the ack policy. Default-constructed `AckPolicy` when unset. * @return `*this` for chaining. */ From d44f5188a261fb2b197d3c2fa6bb2d7b48f15e32 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:25:19 -0700 Subject: [PATCH 36/39] st: P11 - document the invalid/rejected default consumer target The topic-vs-namespace target is a bool + two strings, so the POD config can represent invalid combinations the type system does not prevent - including the default-constructed value (single-topic mode with an empty topic). Document that such states (no target, or missing subscriptionName) are rejected by create()/createAsync() with an Error, and that fields not selected by useNamespace are ignored. (A variant target could make these unrepresentable, but that diverges from the POD-config + designated-init pattern used across the API.) Signed-off-by: Matteo Merli --- include/pulsar/st/QueueConsumer.h | 7 +++++++ include/pulsar/st/StreamConsumer.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index 1a0c20cb..c883b882 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -46,6 +46,13 @@ namespace pulsar::st { * `QueueConsumerBuilder` rather than populating this struct directly; the * builder enforces the invariants (notably that exactly one of topic vs. namespace * mode is selected and that `subscriptionName` is set). + * + * This POD can also express states the type system does not rule out — including + * its **default value**, which selects single-topic mode with an empty `topic`. A + * configuration with no target (empty `topic` while `useNamespace == false`, or + * empty `namespaceName` while `useNamespace == true`) or no `subscriptionName` is + * invalid: `create()` / `createAsync()` reject it with an `Error` instead of + * connecting. Fields not selected by `useNamespace` are ignored. */ struct QueueConsumerConfig { /// Selects namespace mode over single-topic mode. When `false` (the default), diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index e98bf1aa..cddb66e3 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -46,6 +46,13 @@ namespace pulsar::st { * `StreamConsumerBuilder` rather than populating this struct directly; the * builder enforces the invariants (notably that exactly one of topic vs. * namespace mode is selected and that `subscriptionName` is set). + * + * This POD can also express states the type system does not rule out — including + * its **default value**, which selects single-topic mode with an empty `topic`. A + * configuration with no target (empty `topic` while `useNamespace == false`, or + * empty `namespaceName` while `useNamespace == true`) or no `subscriptionName` is + * invalid: `create()` / `createAsync()` reject it with an `Error` instead of + * connecting. Fields not selected by `useNamespace` are ignored. */ struct StreamConsumerConfig { /// Selects namespace mode over single-topic mode. When `false` (the default), From 9999a4a9b75f90fe7e84a838ac4a81deb4465e8a Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:26:05 -0700 Subject: [PATCH 37/39] st: P12 - rename ClientCore::createCheckpointAsync -> createCheckpointConsumerAsync Match the createAsync naming of its siblings (createProducerAsync) and the CheckpointConsumer type it returns. Internal detail rename; no public API change. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 2 +- include/pulsar/st/detail/ClientCore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 88bd1619..3c39e364 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -314,7 +314,7 @@ class CheckpointConsumerBuilder { Schema schema = schema_; CheckpointConsumerConfig config = config_; config.schema = schema.info(); - return client_.createCheckpointAsync(std::move(config)) + return client_.createCheckpointConsumerAsync(std::move(config)) .thenApply([schema](const detail::CheckpointConsumerCore& core) { return CheckpointConsumer(core, schema); }); diff --git a/include/pulsar/st/detail/ClientCore.h b/include/pulsar/st/detail/ClientCore.h index e7c957ab..9cb3bdfa 100644 --- a/include/pulsar/st/detail/ClientCore.h +++ b/include/pulsar/st/detail/ClientCore.h @@ -55,7 +55,7 @@ class PULSAR_PUBLIC ClientCore { Future createProducerAsync(ProducerConfig config) const; Future subscribeStreamAsync(StreamConsumerConfig config) const; Future subscribeQueueAsync(QueueConsumerConfig config) const; - Future createCheckpointAsync(CheckpointConsumerConfig config) const; + Future createCheckpointConsumerAsync(CheckpointConsumerConfig config) const; Future newTransactionAsync() const; Future closeAsync() const; void shutdown() const; From db63962e0ccbcb7f04995634fb1cd3ffff6e0f97 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 15:28:20 -0700 Subject: [PATCH 38/39] st: P13 - rvalue overloads for Expected monadic ops and value_or value_or, and_then, transform, and or_else were const&-only: they copied the contained value into the continuation, and value_or/and_then/transform would not even compile for a move-only T. Add &&-qualified overloads that move the contained value (and forward the error by move), so a move-only or expensive-to-copy T flows through the chain without a copy. value() and operator* already had ref-qualified overloads. Verified at runtime with a move-only payload (unique_ptr) on clang and gcc, plus an lvalue regression pass. Signed-off-by: Matteo Merli --- include/pulsar/st/Expected.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/pulsar/st/Expected.h b/include/pulsar/st/Expected.h index 3ea2f5cb..88d980ce 100644 --- a/include/pulsar/st/Expected.h +++ b/include/pulsar/st/Expected.h @@ -184,6 +184,12 @@ class [[nodiscard]] Expected { return has_value() ? std::get<0>(storage_) : static_cast(std::forward(fallback)); } + /** Rvalue overload of `value_or()`: moves the contained value out on success. */ + template + T value_or(U&& fallback) && { + return has_value() ? std::get<0>(std::move(storage_)) : static_cast(std::forward(fallback)); + } + /** * Monadic chaining: invoke @p f on the value, or propagate the error. * @@ -202,6 +208,13 @@ class [[nodiscard]] Expected { return has_value() ? std::forward(f)(std::get<0>(storage_)) : R(error()); } + /** Rvalue overload of `and_then()`: invokes @p f with the moved-out value. */ + template + auto and_then(F&& f) && { + using R = std::remove_cv_t>>; + return has_value() ? std::forward(f)(std::get<0>(std::move(storage_))) : R(std::move(error())); + } + /** * Monadic mapping: transform the value through @p f, or propagate the error. * @@ -219,6 +232,14 @@ class [[nodiscard]] Expected { return has_value() ? Expected(std::forward(f)(std::get<0>(storage_))) : Expected(error()); } + /** Rvalue overload of `transform()`: maps the moved-out value through @p f. */ + template + auto transform(F&& f) && { + using U = std::remove_cv_t>>; + return has_value() ? Expected(std::forward(f)(std::get<0>(std::move(storage_)))) + : Expected(std::move(error())); + } + /** * Monadic error recovery: invoke @p f on the error, or pass the value through. * @@ -235,6 +256,13 @@ class [[nodiscard]] Expected { return has_value() ? *this : std::forward(f)(error()); } + /** Rvalue overload of `or_else()`: passes the moved value through, or invokes @p f + * with the moved-out error. */ + template + Expected or_else(F&& f) && { + return has_value() ? std::move(*this) : std::forward(f)(std::move(error())); + } + private: std::variant storage_; }; From 8264bcf202b61354e52e7dfb1bfcfee776f4716a Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Tue, 30 Jun 2026 16:07:13 -0700 Subject: [PATCH 39/39] st: review nits N1-N7 N1 Expected operator*/operator-> are not UB on an error: operator* is noexcept + std::get so it terminates; operator-> returns nullptr. Correct the docs to say so. N2 Drop redundant unit prose ("in milliseconds"/"in seconds") from std::chrono fields/params in Policies, Consumer (AckPolicy) and the sendTimeout setter; the type already states the unit. (ProducerConfig's int64 sendTimeoutMs keeps its "milliseconds" note - it is not a chrono type.) N3 decodeBigEndian: replace the dead `i < data.size()` guard (all codecs length-check first) with an assert of that precondition. N4 ProtobufNativeSchema: guard the size_t->int narrowing in encode/decode, rejecting messages larger than INT_MAX instead of passing a wrapped size. N5 OutgoingMessage: one-line note for the usesView<->payloadView invariant. N7 Wrap the SerDeFor concept in clang-format off/on so clang-format-11 stops mangling the `{ expr } -> Concept;` compound requirements. N6 Normalize config-struct field docs to the dominant /** */-before style (OutgoingMessage, CheckpointConsumerConfig, Stream/QueueConsumerConfig); enum-value ///< trailing docs are left as-is. Verified: clang-format-11 clean; examples compile (clang); N3 runtime test and N4 (protobuf stub) pass on clang and gcc. Signed-off-by: Matteo Merli --- include/pulsar/st/CheckpointConsumer.h | 21 +++++---- include/pulsar/st/Consumer.h | 4 +- include/pulsar/st/Expected.h | 12 +++--- include/pulsar/st/Policies.h | 16 +++---- include/pulsar/st/Producer.h | 19 ++++++--- include/pulsar/st/ProtobufNativeSchema.h | 10 ++++- include/pulsar/st/QueueConsumer.h | 44 +++++++++---------- include/pulsar/st/Schema.h | 15 ++++--- include/pulsar/st/StreamConsumer.h | 54 ++++++++++++------------ 9 files changed, 107 insertions(+), 88 deletions(-) diff --git a/include/pulsar/st/CheckpointConsumer.h b/include/pulsar/st/CheckpointConsumer.h index 3c39e364..4e8463d1 100644 --- a/include/pulsar/st/CheckpointConsumer.h +++ b/include/pulsar/st/CheckpointConsumer.h @@ -45,15 +45,18 @@ namespace pulsar::st { * not construct this directly. */ struct CheckpointConsumerConfig { - std::string topic; ///< Scalable topic to read. REQUIRED; no default. - Checkpoint startPosition = - Checkpoint::latest(); ///< Position to start from. Default `Checkpoint::latest()`. - std::optional - consumerGroup; ///< Consumer group to join. Unset (default) => ungrouped, reads every segment. - std::optional - consumerName; ///< Human-readable consumer name. Unset (default) => auto-generated. - Properties properties; ///< Free-form key/value metadata attached to the consumer. Default empty. - SchemaInfo schema; ///< Schema descriptor; filled in from `Schema` by the builder. + /** Scalable topic to read. REQUIRED; no default. */ + std::string topic; + /** Position to start from. Default `Checkpoint::latest()`. */ + Checkpoint startPosition = Checkpoint::latest(); + /** Consumer group to join. Unset (default) => ungrouped, reads every segment. */ + std::optional consumerGroup; + /** Human-readable consumer name. Unset (default) => auto-generated. */ + std::optional consumerName; + /** Free-form key/value metadata attached to the consumer. Default empty. */ + Properties properties; + /** Schema descriptor; filled in from `Schema` by the builder. */ + SchemaInfo schema; }; template diff --git a/include/pulsar/st/Consumer.h b/include/pulsar/st/Consumer.h index a5598126..51bd9c5b 100644 --- a/include/pulsar/st/Consumer.h +++ b/include/pulsar/st/Consumer.h @@ -52,10 +52,10 @@ enum class SubscriptionInitialPosition * optional and fall back to the client default when unset. */ struct AckPolicy { - /** Time window over which acknowledgments are batched before being sent, in milliseconds; 0 acks + /** Time window over which acknowledgments are batched before being sent; 0 acks * immediately. Unset uses the client default. */ std::optional groupTime = std::nullopt; - /** Delay before a negatively-acknowledged message is redelivered, in milliseconds. QueueConsumer only. + /** Delay before a negatively-acknowledged message is redelivered. QueueConsumer only. * Unset uses the client default. */ std::optional negativeAckRedeliveryDelay = std::nullopt; }; diff --git a/include/pulsar/st/Expected.h b/include/pulsar/st/Expected.h index 88d980ce..8b0f1914 100644 --- a/include/pulsar/st/Expected.h +++ b/include/pulsar/st/Expected.h @@ -139,8 +139,10 @@ class [[nodiscard]] Expected { /** * Unchecked access to the contained value. * - * Unlike `value()`, this never throws and performs no check. Behaviour is - * undefined if this holds an error; verify with `operator bool` first. + * Performs no check and is `noexcept`. It does not throw on an error; instead, + * because it reads the wrong `std::variant` alternative through a `noexcept` + * boundary, accessing the value when this holds an error terminates the program. + * Verify with `operator bool` first. * * @return a reference to the contained value (lvalue or rvalue per ref-qualifier). */ @@ -153,10 +155,10 @@ class [[nodiscard]] Expected { /** * Unchecked member access to the contained value. * - * Behaviour is undefined if this holds an error; verify with `operator bool` - * first. + * Returns `nullptr` if this holds an error (so `e->member` would then dereference + * a null pointer); verify with `operator bool` first. * - * @return a pointer to the contained value. + * @return a pointer to the contained value, or `nullptr` if this holds an error. */ const T* operator->() const noexcept { return std::get_if<0>(&storage_); } /** @copydoc operator->() const */ diff --git a/include/pulsar/st/Policies.h b/include/pulsar/st/Policies.h index c078f774..47e12d8b 100644 --- a/include/pulsar/st/Policies.h +++ b/include/pulsar/st/Policies.h @@ -90,13 +90,13 @@ struct MemorySize { struct ConnectionPolicy { /** Number of physical connections opened to each broker. Unset uses the client default. */ std::optional connectionsPerBroker = std::nullopt; - /** Maximum time to wait for a TCP/TLS connection to be established, in milliseconds. Unset uses the + /** Maximum time to wait for a TCP/TLS connection to be established. Unset uses the * client default. */ std::optional connectionTimeout = std::nullopt; - /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete, in - * milliseconds. Unset uses the client default. */ + /** Maximum time to wait for a broker request (e.g. produce/consume control ops) to complete. Unset + * uses the client default. */ std::optional operationTimeout = std::nullopt; - /** Interval between keep-alive pings sent on an idle connection, in seconds. Unset uses the client + /** Interval between keep-alive pings sent on an idle connection. Unset uses the client * default. */ std::optional keepAliveInterval = std::nullopt; /** Maximum number of concurrent topic-lookup requests in flight. Unset uses the client default. */ @@ -104,7 +104,7 @@ struct ConnectionPolicy { /** Maximum number of lookup redirects to follow before failing a lookup. Unset uses the client default. */ std::optional maxLookupRedirects = std::nullopt; - /** Time an idle pooled connection may stay open before being closed, in milliseconds. Unset uses the + /** Time an idle pooled connection may stay open before being closed. Unset uses the * client default. */ std::optional maxConnectionIdleTime = std::nullopt; /** Advertised listener name for broker discovery (multi-listener deployments). Unset uses the @@ -120,9 +120,9 @@ struct ConnectionPolicy { * applies its built-in default for that bound. */ struct BackoffPolicy { - /** Delay before the first reconnection attempt, in milliseconds. Unset uses the client default. */ + /** Delay before the first reconnection attempt. Unset uses the client default. */ std::optional initialBackoff = std::nullopt; - /** Upper bound on the backoff delay as it grows across retries, in milliseconds. Unset uses the client + /** Upper bound on the backoff delay as it grows across retries. Unset uses the client * default. */ std::optional maxBackoff = std::nullopt; }; @@ -161,7 +161,7 @@ struct TlsPolicy { * optional and the client supplies a built-in default when it is unset. */ struct TransactionPolicy { - /** Default lifetime of a transaction before it is automatically aborted, in milliseconds. Unset uses the + /** Default lifetime of a transaction before it is automatically aborted. Unset uses the * client default. */ std::optional timeout = std::nullopt; }; diff --git a/include/pulsar/st/Producer.h b/include/pulsar/st/Producer.h index 05fa6d05..8848f18c 100644 --- a/include/pulsar/st/Producer.h +++ b/include/pulsar/st/Producer.h @@ -101,6 +101,10 @@ struct ProducerConfig { * This is the encoded, schema-agnostic form of a message: the typed value has * already been serialized to `payload` bytes. The builder fills these fields from * its fluent setters and hands the result to the producer core for publishing. + * + * Invariant (maintained by `MessageBuilder`): when `usesView` is true the payload is + * read from `payloadView` (which must stay valid until the send completes); otherwise + * it is read from `payload`. */ struct OutgoingMessage { /** Encoded message payload — the value serialized to bytes through `Schema`. @@ -115,12 +119,16 @@ struct OutgoingMessage { std::optional key; /** Per-message user metadata. Empty by default. */ Properties properties; - std::optional eventTime; ///< Application event time; unset (nullopt) by default. - std::optional sequenceId; ///< Explicit sequence id; unset = auto-assign. - std::optional deliverAt; ///< Scheduled delivery time; unset = deliver immediately. + /** Application event time; unset (nullopt) by default. */ + std::optional eventTime; + /** Explicit sequence id; unset = auto-assign. */ + std::optional sequenceId; + /** Scheduled delivery time; unset = deliver immediately. */ + std::optional deliverAt; /** Target clusters for geo-replication; empty applies the topic's default. */ std::vector replicationClusters; - std::optional transaction; ///< Enlisting transaction; unset = non-transactional. + /** Enlisting transaction; unset = non-transactional. */ + std::optional transaction; }; template @@ -451,8 +459,7 @@ class ProducerBuilder { * Set the per-message send timeout: how long a send may stay unacknowledged * before failing. * - * @param d the timeout, in milliseconds. Optional; when unset the SDK default - * applies. + * @param d the timeout. Optional; when unset the SDK default applies. * @return `*this`, for chaining. */ ProducerBuilder& sendTimeout(std::chrono::milliseconds d) { diff --git a/include/pulsar/st/ProtobufNativeSchema.h b/include/pulsar/st/ProtobufNativeSchema.h index 5fadc197..50aff69c 100644 --- a/include/pulsar/st/ProtobufNativeSchema.h +++ b/include/pulsar/st/ProtobufNativeSchema.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -41,12 +42,17 @@ struct ProtobufNativeSerDe { "protobufNativeSchema requires T to be a generated protobuf Message"); SchemaInfo info() const { return pulsar::createProtobufNativeSchema(T::descriptor()); } Expected encode(const T& value, std::vector& out) const { - out.resize(value.ByteSizeLong()); - if (!value.SerializeToArray(out.data(), static_cast(out.size()))) + const std::size_t size = value.ByteSizeLong(); + if (size > static_cast(std::numeric_limits::max())) + return unexpected(pulsar::ResultInvalidMessage, "protobuf message too large to serialize"); + out.resize(size); + if (!value.SerializeToArray(out.data(), static_cast(size))) return unexpected(pulsar::ResultInvalidMessage, "failed to serialize protobuf message"); return {}; } Expected decode(std::span data) const { + if (data.size() > static_cast(std::numeric_limits::max())) + return unexpected(pulsar::ResultInvalidMessage, "protobuf message too large to parse"); T message; if (message.ParseFromArray(data.data(), static_cast(data.size()))) return message; return unexpected(pulsar::ResultInvalidMessage, "failed to parse protobuf message"); diff --git a/include/pulsar/st/QueueConsumer.h b/include/pulsar/st/QueueConsumer.h index c883b882..b7184466 100644 --- a/include/pulsar/st/QueueConsumer.h +++ b/include/pulsar/st/QueueConsumer.h @@ -55,38 +55,38 @@ namespace pulsar::st { * connecting. Fields not selected by `useNamespace` are ignored. */ struct QueueConsumerConfig { - /// Selects namespace mode over single-topic mode. When `false` (the default), - /// `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. + /** Selects namespace mode over single-topic mode. When `false` (the default), + * `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. */ bool useNamespace = false; - /// Fully-qualified topic name. Used only when `useNamespace == false`. Mutually - /// exclusive with `namespaceName`. + /** Fully-qualified topic name. Used only when `useNamespace == false`. Mutually + * exclusive with `namespaceName`. */ std::string topic; // when !useNamespace - /// Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. - /// Subscribes to all scalable topics in the namespace with live membership. + /** Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. + * Subscribes to all scalable topics in the namespace with live membership. */ std::string namespaceName; // when useNamespace - /// Namespace mode only: AND filters matched against topic properties to select - /// which topics in the namespace are included. Empty means no filtering (all - /// topics). Ignored in single-topic mode. + /** Namespace mode only: AND filters matched against topic properties to select + * which topics in the namespace are included. Empty means no filtering (all + * topics). Ignored in single-topic mode. */ Properties propertyFilters; // namespace mode: AND filters over topic properties - /// REQUIRED. Subscription name shared by all consumers of this subscription. + /** REQUIRED. Subscription name shared by all consumers of this subscription. */ std::string subscriptionName; // REQUIRED - /// Where the subscription starts when it is first created. Default - /// `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once - /// the subscription already exists. + /** Where the subscription starts when it is first created. Default + * `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once + * the subscription already exists. */ SubscriptionInitialPosition initialPosition = SubscriptionInitialPosition::Latest; - /// Optional consumer name (useful for diagnostics and metrics). Default unset, in - /// which case the broker assigns one. + /** Optional consumer name (useful for diagnostics and metrics). Default unset, in + * which case the broker assigns one. */ std::optional consumerName; - /// Acknowledgment tuning (e.g. the ack-grouping/batching window and negative-ack - /// redelivery delay). Default-constructed `AckPolicy` when unset. + /** Acknowledgment tuning (e.g. the ack-grouping/batching window and negative-ack + * redelivery delay). Default-constructed `AckPolicy` when unset. */ AckPolicy ackPolicy; - /// Optional dead-letter policy: route messages to a dead-letter topic after - /// repeated redelivery. Default unset (no dead-lettering). + /** Optional dead-letter policy: route messages to a dead-letter topic after + * repeated redelivery. Default unset (no dead-lettering). */ std::optional deadLetterPolicy; - /// Arbitrary client-side consumer properties (reported in topic stats). Default empty. + /** Arbitrary client-side consumer properties (reported in topic stats). Default empty. */ Properties properties; - /// Schema descriptor for the value type `T`. Populated automatically by the builder - /// from the `Schema` it was constructed with. + /** Schema descriptor for the value type `T`. Populated automatically by the builder + * from the `Schema` it was constructed with. */ SchemaInfo schema; }; diff --git a/include/pulsar/st/Schema.h b/include/pulsar/st/Schema.h index 9f4fa788..9e15f8fa 100644 --- a/include/pulsar/st/Schema.h +++ b/include/pulsar/st/Schema.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -81,16 +82,15 @@ using BytesView = std::span; * @tparam S the candidate SerDe type. * @tparam T the value type the SerDe handles. */ +// clang-format off template concept SerDeFor = requires(const S& serde, const T& value, std::span data, std::vector& out) { - { serde.info() } - ->std::convertible_to; - { serde.encode(value, out) } - ->std::convertible_to>; - { serde.decode(data) } - ->std::convertible_to>; + { serde.info() } -> std::convertible_to; + { serde.encode(value, out) } -> std::convertible_to>; + { serde.decode(data) } -> std::convertible_to>; }; +// clang-format on /** * `Schema` is the typed seam of the API: `Producer`, `Consumer` and @@ -223,8 +223,9 @@ inline void encodeBigEndian(U value, std::vector& out) { template inline U decodeBigEndian(std::span data) { static_assert(std::is_integral_v, "integral only"); + assert(data.size() >= sizeof(U) && "callers validate the payload length before decoding"); std::make_unsigned_t u = 0; - for (std::size_t i = 0; i < sizeof(U) && i < data.size(); ++i) { + for (std::size_t i = 0; i < sizeof(U); ++i) { u = (u << 8) | std::to_integer(data[i]); } return static_cast(u); diff --git a/include/pulsar/st/StreamConsumer.h b/include/pulsar/st/StreamConsumer.h index cddb66e3..9874a2b3 100644 --- a/include/pulsar/st/StreamConsumer.h +++ b/include/pulsar/st/StreamConsumer.h @@ -55,45 +55,45 @@ namespace pulsar::st { * connecting. Fields not selected by `useNamespace` are ignored. */ struct StreamConsumerConfig { - /// Selects namespace mode over single-topic mode. When `false` (the default), - /// `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. + /** Selects namespace mode over single-topic mode. When `false` (the default), + * `topic` is used; when `true`, `namespaceName` (and `propertyFilters`) apply. */ bool useNamespace = false; - /// Fully-qualified topic name. Used only when `useNamespace == false`. Mutually - /// exclusive with `namespaceName`. + /** Fully-qualified topic name. Used only when `useNamespace == false`. Mutually + * exclusive with `namespaceName`. */ std::string topic; // when !useNamespace - /// Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. - /// Subscribes to all scalable topics in the namespace with live membership. + /** Namespace name (`tenant/namespace`). Used only when `useNamespace == true`. + * Subscribes to all scalable topics in the namespace with live membership. */ std::string namespaceName; // when useNamespace - /// Namespace mode only: AND filters matched against topic properties to select - /// which topics in the namespace are included. Empty means no filtering (all - /// topics). Ignored in single-topic mode. + /** Namespace mode only: AND filters matched against topic properties to select + * which topics in the namespace are included. Empty means no filtering (all + * topics). Ignored in single-topic mode. */ Properties propertyFilters; // namespace mode: AND filters over topic properties - /// REQUIRED. Subscription name shared by all consumers of this subscription. + /** REQUIRED. Subscription name shared by all consumers of this subscription. */ std::string subscriptionName; // REQUIRED - /// Where the subscription starts when it is first created. Default - /// `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once - /// the subscription already exists. + /** Where the subscription starts when it is first created. Default + * `SubscriptionInitialPosition::Latest` (skip the backlog). Has no effect once + * the subscription already exists. */ SubscriptionInitialPosition initialPosition = SubscriptionInitialPosition::Latest; - /// Optional key/value properties attached to the subscription itself (persisted - /// broker-side). Default empty. + /** Optional key/value properties attached to the subscription itself (persisted + * broker-side). Default empty. */ Properties subscriptionProperties; - /// Optional consumer name (useful for diagnostics and metrics). Default unset, in - /// which case the broker assigns one. + /** Optional consumer name (useful for diagnostics and metrics). Default unset, in + * which case the broker assigns one. */ std::optional consumerName; - /// Acknowledgment tuning (e.g. the ack-grouping/batching window). Default-constructed - /// `AckPolicy` when unset. Note: `AckPolicy::negativeAckRedeliveryDelay` is ignored here — - /// a StreamConsumer acknowledges cumulatively and has no negative-ack/redelivery path. + /** Acknowledgment tuning (e.g. the ack-grouping/batching window). Default-constructed + * `AckPolicy` when unset. Note: `AckPolicy::negativeAckRedeliveryDelay` is ignored here — + * a StreamConsumer acknowledges cumulatively and has no negative-ack/redelivery path. */ AckPolicy ackPolicy; - /// When set to `true`, read from the topic's compacted view (latest value per key) - /// instead of the full log. Default unset (broker default, i.e. uncompacted). + /** When set to `true`, read from the topic's compacted view (latest value per key) + * instead of the full log. Default unset (broker default, i.e. uncompacted). */ std::optional readCompacted; - /// When set to `true`, replicate the subscription's acknowledged position across - /// geo-replication clusters. Default unset (broker default, i.e. disabled). + /** When set to `true`, replicate the subscription's acknowledged position across + * geo-replication clusters. Default unset (broker default, i.e. disabled). */ std::optional replicateSubscriptionState; - /// Arbitrary client-side consumer properties (reported in topic stats). Default empty. + /** Arbitrary client-side consumer properties (reported in topic stats). Default empty. */ Properties properties; - /// Schema descriptor for the value type `T`. Populated automatically by the builder - /// from the `Schema` it was constructed with. + /** Schema descriptor for the value type `T`. Populated automatically by the builder + * from the `Schema` it was constructed with. */ SchemaInfo schema; };