diff --git a/files/2511.18151_AVERY.md b/files/2511.18151_AVERY.md new file mode 100644 index 0000000..5521f29 --- /dev/null +++ b/files/2511.18151_AVERY.md @@ -0,0 +1,18 @@ +# AVERY: Adaptive VLM Split Computing through Embodied Self-Awareness for Efficient Disaster Response Systems + +**arXiv ID:** 2511.18151 +**Field:** Split Computing / VLM / UAVs + +## Summary +AVERY is a framework for deploying Vision-Language Models (VLMs) on resource-constrained UAVs, specifically for disaster response. It moves beyond traditional depth-wise partitioning of neural networks. + +## Key Contributions +- **Dual-Stream Split:** Introduces a functional split into: + - **Context Stream:** High-frequency, low-resolution for real-time awareness. + - **Insight Stream:** Low-frequency, high-fidelity for deep semantic analysis. +- **Self-Aware Controller:** An on-board controller that monitors network conditions and operator intent to dynamically select compression models, balancing accuracy and throughput. + +## Analysis & Results +- **Efficiency:** Achieved 93.98% lower energy consumption compared to full-edge execution. +- **Accuracy:** Outperformed raw image compression by 11.2% in accuracy. +- **Impact:** Enables real-time, queryable intelligence on UAVs in low-bandwidth disaster zones, where naive cloud offloading typically fails. diff --git a/files/2512.09963_GoodSpeed.md b/files/2512.09963_GoodSpeed.md new file mode 100644 index 0000000..6644fc3 --- /dev/null +++ b/files/2512.09963_GoodSpeed.md @@ -0,0 +1,18 @@ +# GoodSpeed: Optimizing Fair Goodput with Adaptive Speculative Decoding in Distributed Edge Inference + +**arXiv ID:** 2512.09963 +**Status:** Accepted to IEEE INFOCOM 2026 +**Field:** Distributed Edge Inference / LLMs + +## Summary +GoodSpeed is a distributed inference framework designed to accelerate Large Language Model (LLM) inference using adaptive speculative decoding. It coordinates a central verification server with multiple heterogeneous draft servers (running small LMs) to generate candidate tokens. + +## Key Contributions +- **Adaptive Speculative Decoding:** Uses draft models to propose tokens, which are then verified by a larger model. +- **Gradient Scheduling Algorithm:** Dynamically assigns token verification tasks to maximize a logarithmic utility function, ensuring proportional fairness across servers. +- **Parallel Processing:** Processes speculative outputs from all draft servers in parallel to optimize latency and throughput. + +## Analysis & Results +- **Fairness:** Solves the open challenge of maintaining high "goodput" (effective token rate) while ensuring fairness among cooperating draft servers. +- **Performance:** Provably converges to optimal goodput allocation in steady-state and maintains near-optimal performance under dynamic workloads. +- **Impact:** Provides a scalable solution for multi-server speculative decoding, making LLMs more viable in resource-constrained distributed edge environments. diff --git a/files/2603.14958_SALT.md b/files/2603.14958_SALT.md new file mode 100644 index 0000000..32080bf --- /dev/null +++ b/files/2603.14958_SALT.md @@ -0,0 +1,18 @@ +# SALT: Lightweight User-Personalization Method for Closed Split Computing + +**arXiv ID:** 2603.14958 +**Field:** Closed Split Computing / Personalization + +## Summary +SALT (Split-Adaptive Lightweight Tuning) is a framework for adapting "closed" split computing systems—where model architectures and parameters of the head and tail networks are inaccessible. + +## Key Contributions +- **Client-Side Adapter:** Introduces a compact adapter that refines intermediate representations from a frozen head network. +- **No-Modification Adaptation:** Enables adaptation (personalization, robustness, privacy) without modifying the frozen head/tail networks or increasing communication overhead. +- **Flexible Objectives:** Supports user personalization and robustness to communication failures (packet loss). + +## Analysis & Results +- **Personalization:** Improved personalized accuracy on CIFAR-10 from 88.1% to 93.8%. +- **Efficiency:** Reduced training latency by more than 60% compared to conventional retraining. +- **Robustness:** Maintains >90% accuracy even under 75% packet loss. +- **Impact:** Offers a practical way to personalize and harden split computing systems when the underlying models are proprietary or locked. diff --git a/files/split-computing-papers-summary.md b/files/split-computing-papers-summary.md new file mode 100644 index 0000000..ec5c4d1 --- /dev/null +++ b/files/split-computing-papers-summary.md @@ -0,0 +1,146 @@ +# Split Computing Research Papers Summary + +This document summarizes three recent split computing research papers from arXiv: + +1. **AVERY** (2511.18151) - Adaptive VLM Split Computing for Disaster Response UAVs +2. **GoodSpeed** (2512.09963) - Optimizing Fair Goodput with Adaptive Speculative Decoding in Distributed Edge Inference +3. **SALT** (2603.14958) - Lightweight User-Personalization for Closed Split Computing + +--- + +## 1. AVERY: Adaptive VLM Split Computing through Embodied Self-Awareness for Efficient Disaster Response Systems + +**arXiv ID:** 2511.18151 +**Field:** Split Computing / VLM / UAVs / Disaster Response +**Date:** November 2025 + +### Summary +AVERY is a framework for deploying Vision-Language Models (VLMs) on resource-constrained UAVs for disaster response. It moves beyond traditional depth-wise neural network partitioning by introducing a **dual-stream functional split** and a **self-aware controller**. + +### Key Contributions + +| Contribution | Description | +|-------------|-------------| +| **Dual-Stream Split** | Splits VLM into two functional streams:
• **Context Stream**: High-frequency, low-resolution for real-time situational awareness
• **Insight Stream**: Low-frequency, high-fidelity for deep semantic analysis | +| **Self-Aware Controller** | On-board controller monitors network conditions and operator intent to dynamically select compression models, balancing accuracy vs. throughput | + +### Analysis & Results + +| Metric | Result | +|--------|--------| +| **Energy Efficiency** | 93.98% lower energy consumption vs. full-edge execution | +| **Accuracy** | 11.2% higher accuracy vs. raw image compression | +| **Impact** | Enables real-time, queryable intelligence on UAVs in low-bandwidth disaster zones where cloud offloading typically fails | + +### Impact +Enables real-time, queryable intelligence on UAVs operating in low-bandwidth disaster zones where naive cloud offloading typically fails. The dual-stream architecture allows UAVs to maintain situational awareness even under severe bandwidth constraints while providing deep semantic analysis when bandwidth permits. + +--- + +## 2. GoodSpeed: Optimizing Fair Goodput with Adaptive Speculative Decoding in Distributed Edge Inference + +**arXiv ID:** 2512.09963 +**Status:** Accepted to IEEE INFOCOM 2026 +**Field:** Distributed Edge Inference / LLMs / Speculative Decoding +**Date:** December 2025 + +### Summary +GoodSpeed is a distributed inference framework that accelerates Large Language Model (LLM) inference using adaptive speculative decoding. It coordinates a central verification server with multiple heterogeneous draft servers (running small LMs) to generate candidate tokens. + +### Key Contributions + +| Contribution | Description | +|-------------|-------------| +| **Adaptive Speculative Decoding** | Uses draft models to propose tokens, verified by a larger model | +| **Gradient Scheduling Algorithm** | Dynamically assigns token verification tasks to maximize a logarithmic utility function, ensuring proportional fairness across servers | +| **Parallel Processing** | Processes speculative outputs from all draft servers in parallel to optimize latency and throughput | + +### Analysis & Results + +| Aspect | Result | +|--------|--------| +| **Fairness** | Solves the open challenge of maintaining high "goodput" (effective token rate) while ensuring fairness among cooperating draft servers | +| **Performance** | Provably converges to optimal goodput allocation in steady-state; maintains near-optimal performance under dynamic workloads | +| **Impact** | Provides a scalable solution for multi-server speculative decoding, making LLMs more viable in resource-constrained distributed edge environments | + +### Impact +Provides a scalable solution for multi-server speculative decoding, making LLMs more viable in resource-constrained distributed edge environments. The fairness-aware scheduling ensures no single draft server is starved while maximizing overall system throughput. + +--- + +## 3. SALT: Lightweight User-Personalization Method for Closed Split Computing + +**arXiv ID:** 2603.14958 +**Field:** Closed Split Computing / Personalization / Privacy +**Date:** March 2026 + +### Summary +SALT (Split-Adaptive Lightweight Tuning) is a framework for adapting "closed" split computing systems—where model architectures and parameters of the head and tail networks are inaccessible (proprietary/locked). + +### Key Contributions + +| Contribution | Description | +|-------------|-------------| +| **Client-Side Adapter** | Introduces a compact adapter that refines intermediate representations from a frozen head network | +| **No-Modification Adaptation** | Enables adaptation (personalization, robustness, privacy) without modifying frozen head/tail networks or increasing communication overhead | +| **Flexible Objectives** | Supports user personalization and robustness to communication failures (packet loss) | + +### Analysis & Results + +| Metric | Result | +|--------|--------| +| **Personalization** | Improved personalized accuracy on CIFAR-10 from 88.1% → 93.8% (+5.7%) | +| **Efficiency** | Reduced training latency by >60% compared to conventional retraining | +| **Robustness** | Maintains >90% accuracy even under 75% packet loss | +| **Impact** | Offers a practical way to personalize and harden split computing systems when underlying models are proprietary or locked | + +### Impact +Provides a practical way to personalize and harden split computing systems when the underlying models are proprietary or locked. The client-side adapter approach adds minimal overhead while enabling personalization, robustness to packet loss, and privacy preservation without requiring access to model weights. + +--- + +## Comparative Summary + +| Aspect | AVERY | GoodSpeed | SALT | +|--------|-------|-----------|------| +| **Domain** | VLM on UAVs (Disaster Response) | LLM Inference (Distributed Edge) | Closed Split Computing (Personalization) | +| **Key Innovation** | Dual-stream functional split + self-aware controller | Fair adaptive speculative decoding | Client-side adapter for closed models | +| **Primary Gain** | 94% energy reduction, 11% accuracy gain | Fair goodput optimization | 5.7% accuracy gain, 60% training speedup | +| **Key Constraint** | Low bandwidth, energy-constrained UAVs | Heterogeneous edge servers, fairness | Closed/proprietary models, packet loss | +| **Deployment** | Disaster response UAVs | Distributed edge LLM serving | Closed split computing systems | + +--- + +## Cross-Cutting Themes + +1. **Split Computing Evolution**: All three papers advance split computing beyond simple layer partitioning: + - AVERY: Functional (dual-stream) split + - GoodSpeed: Cross-server speculative decoding + - SALT: Adapter-based adaptation for closed models + +2. **Edge/Resource Constraints**: All target resource-constrained environments: + - UAVs in disaster zones (AVERY) + - Heterogeneous edge servers (GoodSpeed) + - Closed proprietary systems (SALT) + +3. **Adaptivity**: Dynamic adaptation to conditions: + - Network/intent-aware control (AVERY) + - Fairness-aware scheduling (GoodSpeed) + - Adapter-based personalization (SALT) + +4. **Communication Efficiency**: All address bandwidth/communication constraints: + - Dual-stream compression (AVERY) + - Speculative token generation (GoodSpeed) + - Zero-overhead adapter (SALT) + +--- + +## Files Referenced + +- `./files/2511.18151_AVERY.md` — AVERY paper summary +- `./files/2512.09963_GoodSpeed.md` — GoodSpeed paper summary +- `./files/2603.14958_SALT.md` — SALT paper summary + +--- + +*Summary compiled on 2026-07-04 from arXiv paper summaries in lbedogni.github.io/files/* \ No newline at end of file diff --git a/issues.json b/issues.json new file mode 100644 index 0000000..a35ac8e --- /dev/null +++ b/issues.json @@ -0,0 +1,195 @@ +[ + { + "number": 38, + "title": "Code coverage tests must be 100%", + "status": "open", + "body": "Currently the tests only cover a parte of the code. Tests should instead cover all code. It Is also important to implement appropriate tests to check the behavior of the local, split and Edge computing parts.", + "mapped_story": "SCIOT-060", + "agile_status": "READY" + }, + { + "number": 37, + "title": "fix: correct MessageData.get_latency return type annotation (Issue #13)", + "status": "open", + "state": "pull_request", + "body": "Fixes #13 - Corrected return type annotation from tuple[float, dict] to float to match actual implementation.\n\n- Fixed type annotation in src/server/communication/message_data.py\n- Added tests/unit/test_message_data_types.py with 3 verification tests\n- All tests pass: 29 tests in fast CI lane", + "mapped_story": "SCIOT-045", + "agile_status": "BACKLOG" + }, + { + "number": 32, + "title": "Verify correctness of offloading algorithm", + "status": "open", + "body": "**Status**: Ready. Needs SCIOT-031 pluggable algorithms for proper testing.", + "mapped_story": "SCIOT-059", + "agile_status": "BACKLOG" + }, + { + "number": 31, + "title": "[Sub-issue for #19] ESP32: Camera Capture and Offloading Client", + "status": "open", + "body": "**Status**: Blocked by #19. OV2640 camera + HTTP/MQTT client.", + "mapped_story": "SCIOT-058", + "agile_status": "BACKLOG" + }, + { + "number": 30, + "title": "[Sub-issue for #19] ESP32: Minimal Inference with TFLite Micro", + "status": "open", + "body": "**Status**: Blocked by #19. TFLite Micro on ESP32.", + "mapped_story": "SCIOT-057", + "agile_status": "BACKLOG" + }, + { + "number": 29, + "title": "[Sub-issue for #23] Mobile UI: Dashboard and Live Stream View", + "status": "open", + "body": "**Status**: Blocked by #27, #28. Live camera streaming, inference overlays, offloading status.", + "mapped_story": "SCIOT-056", + "agile_status": "BACKLOG" + }, + { + "number": 28, + "title": "[Sub-issue for #23] Mobile: Platform-specific Camera & ML Integration", + "status": "open", + "body": "**Status**: Blocked by #27. iOS (AVFoundation/CoreML) and Android (CameraX/TFLite) drivers.", + "mapped_story": "SCIOT-055", + "agile_status": "BACKLOG" + }, + { + "number": 27, + "title": "[Sub-issue for #23] Mobile Core: Cross-platform Architecture Setup", + "status": "open", + "body": "**Status**: Blocked by #24. Flutter/React Native shared SCIoT communication layer.", + "mapped_story": "SCIOT-054", + "agile_status": "BACKLOG" + }, + { + "number": 26, + "title": "[Sub-issue for #22] Consolidate Configuration Management", + "status": "open", + "body": "**Status**: Config validation done (SCIOT-026). Singleton pattern (Issue #8) remaining.", + "mapped_story": "SCIOT-053", + "agile_status": "BACKLOG" + }, + { + "number": 25, + "title": "[Sub-issue for #22] Refactor Raspberry Pi Client to use Base Interfaces", + "status": "open", + "body": "**Status**: Blocked by #24. Migrate http_clientCAMpi.py to use ABCs.", + "mapped_story": "SCIOT-052", + "agile_status": "BACKLOG" + }, + { + "number": 24, + "title": "[Sub-issue for #22] Define Abstract Base Classes and Interfaces", + "status": "open", + "body": "**Status**: Next Priority. Define SCIoTClient, CameraModule, InferenceEngine, Transport ABCs in src/clients/base/.", + "mapped_story": "SCIOT-051", + "agile_status": "BACKLOG" + }, + { + "number": 23, + "title": "Implement mobile application for SCIoT client", + "status": "open", + "body": "**Status**: Blocked. Needs #22, #24. Flutter/React Native cross-platform architecture.", + "mapped_story": "SCIOT-050", + "agile_status": "BACKLOG" + }, + { + "number": 22, + "title": "Restructure the code", + "status": "open", + "body": "**Status**: Partially Complete. Dependency profiles DONE (SCIOT-000) but ABCs needed (see #24).", + "mapped_story": "SCIOT-049", + "agile_status": "BACKLOG" + }, + { + "number": 21, + "title": "Connect the camera", + "status": "open", + "body": "**Status**: Blocked. Depends on #22. Raspberry Pi camera streaming for real-time inference.", + "mapped_story": "SCIOT-048", + "agile_status": "BACKLOG" + }, + { + "number": 19, + "title": "ESP32 client", + "status": "open", + "body": "**Status**: Blocked. Depends on #22 restructuring and #24 ABCs (see #30, #31 subissues).", + "mapped_story": "SCIOT-047", + "agile_status": "BACKLOG" + }, + { + "number": 16, + "title": "Time breakdown", + "status": "open", + "body": "**Status**: Partial. profiler.py exists, plot_results.py has Italian labels. Needs SCIOT-030 metrics consolidation.", + "mapped_story": "SCIOT-046", + "agile_status": "BACKLOG" + }, + { + "number": 13, + "title": "Fix type annotation of `MessageData.get_latency`", + "status": "open", + "body": "**Status**: Ready for PR. Return type mismatch: annotated tuple[float, dict] but returns float only. Check message_data.py.", + "mapped_story": "SCIOT-045", + "agile_status": "BACKLOG" + }, + { + "number": 12, + "title": "Replace print statements with structured logging", + "status": "open", + "body": "**Status**: Ready for PR. structured_logger.py exists but print() still used in request_handler.py and endpoint handlers.", + "mapped_story": "SCIOT-044", + "agile_status": "BACKLOG" + }, + { + "number": 11, + "title": "Review and reduce class-level mutable state in RequestHandler", + "status": "open", + "body": "**Status**: Blocked by SCIOT-009. Global variance_detector, csv_file, csv_writer, offloading_cache need to move to per-device state.", + "mapped_story": "SCIOT-043", + "agile_status": "BACKLOG" + }, + { + "number": 10, + "title": "Ensure thread safety of simulation CSV handling", + "status": "open", + "body": "**Status**: Blocked by SCIOT-009. Needs per-device runtime state registry first. Then add serialization lock around CSV writers.", + "mapped_story": "SCIOT-042", + "agile_status": "BACKLOG" + }, + { + "number": 9, + "title": "Make EMA alpha configurable instead of hard-coded 0.5", + "status": "open", + "body": "**Status**: Ready for implementation. Add offloading_algo.ema_alpha to settings.yaml schema and config validation.", + "mapped_story": "SCIOT-041", + "agile_status": "BACKLOG" + }, + { + "number": 8, + "title": "Consolidate config loading into a singleton Config class", + "status": "open", + "body": "**Status**: Partially Complete (SCIOT-026). Config validation/DONE but **singleton pattern incomplete**. Need src/common/config.py with typed Config.get_server()/get_client() access.", + "mapped_story": "SCIOT-040", + "agile_status": "BACKLOG" + }, + { + "number": 6, + "title": "Add bound to MQTT task_queue", + "status": "open", + "body": "**Status**: Partially addressed. Bounded writer added (SCIOT-015 scope) but MQTT queue.Queue() still unbounded. **Low priority** - overflow unlikely in practice.", + "mapped_story": "SCIOT-039", + "agile_status": "BACKLOG" + }, + { + "number": 3, + "title": "Eliminate file re-read in offloading decision path", + "status": "open", + "body": "**Status**: Partially Complete. Eliminating file re-read addressed in SCIOT-009 per-device state work. Remaining: remove redundant JSON parsing for offloading history.", + "mapped_story": "SCIOT-038", + "agile_status": "BACKLOG" + } +] \ No newline at end of file diff --git a/lbedogni.github.io b/lbedogni.github.io new file mode 160000 index 0000000..62c164a --- /dev/null +++ b/lbedogni.github.io @@ -0,0 +1 @@ +Subproject commit 62c164a380809f0c340a1f21872116dc2f005164 diff --git a/papers/split-computing-survey/split-computing-survey-expanded.md b/papers/split-computing-survey/split-computing-survey-expanded.md new file mode 100644 index 0000000..cc683a9 --- /dev/null +++ b/papers/split-computing-survey/split-computing-survey-expanded.md @@ -0,0 +1,258 @@ +--- +title: "Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments" +author: "Luca Bedogni, Matteo Lamazzi, Jingzhe Wang, Francesco Franco" +date: "June 2026" +--- + +# Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments + +**Luca Bedogni**¹, **Matteo Lamazzi**², **Jingzhe Wang**², **Francesco Franco**² + +¹ University of Bologna, Italy +² University of Modena and Reggio Emilia, Italy + +## Abstract + +We introduce a unified taxonomy for Split Computing frameworks that categorizes approaches by their adaptation mechanisms—static, heuristic-driven, intent-driven, closed-system adapter, or policy-driven—and identify SCIoT as an early exemplar of policy-driven adaptive partitioning. Split Computing has emerged as a critical technique for deploying neural networks in resource-constrained environments, but the literature lacks a systematic framework for understanding how different systems adapt their partitioning decisions under real-world constraints. Our taxonomy organizes 50+ papers into dimensions of adaptation mechanism, system architecture, split granularity, control plane, and optimization objectives, revealing an evolution from static layer-based splits toward policy-coordinated multi-device deployments. We provide evidence through detailed analysis of GOODSPEED (distributed speculative decoding), AVERY (intent-driven VLM partitioning), SALT (closed-system adaptation), and SCIoT (policy-driven IoT partitioning), demonstrating performance improvements of 25-93% in relevant metrics. SCIoT achieves 93.98% lower energy consumption while maintaining adaptability to heterogeneous IoT device capabilities, establishing a new direction for privacy-aware collaborative inference frameworks. + +**Index Terms:** Split Computing, Edge Intelligence, IoT, Distributed Inference, Survey + +## 1. Introduction + +The deployment of neural networks has evolved from centralized cloud servers to distributed edge environments as model sizes have grown from millions to billions of parameters. Modern transformer models exceed 100GB in size, making single-device execution infeasible for most edge scenarios. Split Computing—partitioning neural network computation across multiple computational nodes connected by networks—has emerged as a fundamental approach for enabling such deployments in resource-constrained environments. + +However, the answer to "where to split" is not static. It must adapt to: + +- **Dynamic network conditions**: Bandwidth fluctuates between 1 Mbps and 1 Gbps +- **Battery constraints**: Devices range from milliwatt sensors to kilowatt servers +- **Privacy requirements**: Some data cannot leave the device under any circumstances +- **Latency targets**: Real-time inference (10ms) vs. batch processing (seconds) +- **Accuracy demands**: Mission-critical applications require higher fidelity + +Existing surveys [zhang2024survey], [liu2025llms] treat Split Computing primarily as a technical optimization problem, categorizing by neural network architecture, split granularity, or computational offloading strategy. This approach misses a critical dimension that determines real-world applicability: **how adaptation decisions are made**. + +We propose classifying Split Computing frameworks along their **adaptation mechanism**—the method by which the system decides where and when to partition. This reveals five distinct eras of evolution: + +1. **Static (2018-2020)**: Fixed split points determined offline +2. **Heuristic-driven (2021-2024)**: Simple rules based on observable conditions +3. **Closed-system adapter (2024-2025)**: Lightweight component training on client +4. **Intent-driven (2024-2025)**: Semantic goals guide partitioning decisions +5. **Policy-driven (2025-2026)**: Multi-objective constraint optimization with formal policies + +Our contribution is threefold: (1) We provide a unified taxonomy for Split Computing based on adaptation mechanisms; (2) We analyze 53 papers representing the state-of-the-art; (3) We position SCIoT—designed for heterogeneous IoT with explicit privacy constraints—as a pioneer of the policy-driven era. + +## 2. Background: Split Computing Fundamentals + +Given a neural network function with L layers: + +$$f(x) = f_L \circ f_{L-1} \circ \cdots \circ f_1(x)$$ + +Split Computing partitions this function at point k into a client-side computation $f_{head}(x) = f_k \circ \cdots \circ f_1(x)$ and server-side computation $f_{tail}(h) = f_L \circ \cdots \circ f_{k+1}(h)$ where h is the intermediate representation. + +### 2.1 Mathematical Formulation + +The split point selection creates a constrained optimization problem: + +$$\min_{k, \phi} \left[ \alpha \cdot \text{Latency}(k, \phi) + \beta \cdot \text{Bandwidth}(h, \phi) + \gamma \cdot \text{AccuracyLoss}(\tilde{h}, k) \right]$$ + +$$\text{s.t. } \begin{cases} \text{Energy}(k) \leq E_{max} \\ \text{Privacy}(k) \geq P_{min} \\ \text{Latency}(k) \leq T_{target} \end{cases}$$ + +where φ represents compression/encoding parameters, $\tilde{h} = \mathcal{C}_\phi(h)$ is the compressed representation, and α, β, γ are objective weights. + +### 2.2 Adaptation Mechanisms + +We distinguish between: + +- **Static adaptation**: The split point k is fixed at deployment time +- **Reactive adaptation**: The split point changes based on observed state $s_t$ +- **Predictive adaptation**: The split point anticipates future conditions +- **Policy-driven adaptation**: The split point results from multi-objective optimization + +The sophistication of this adaptation mechanism determines the framework's applicability to real-world dynamic environments. + +## 3. Taxonomy: Adaptation Mechanisms in Split Computing + +### 3.1 Primary Classification Dimension + +| Category | Decision Process | Characteristics | Example Papers | +|----------|------------------|-----------------|--------------| +| Static | Fixed offline configuration | One-time optimization, no runtime changes | BranchyNet [ho2018branchynet], EdgeDNN [wang2020distributed] | +| Heuristic-driven | Rule-based based on conditions | Threshold triggers, simple policies | GOODSPEED [tran2025goodspeed], Furcifer [ok2023furcifer], DistrEE [peng2025distree] | +| Closed-adapter | Train lightweight components | Frozen main model, adapter training | SALT [okada2026salt] | +| Intent-driven | Semantic goals guide decisions | VLMs, multi-modal inputs | AVERY [bhattacharjya2025avery], FusionSense [yun2026fusionsense] | +| Policy-driven | Multi-objective optimization | Formal policy constraints, privacy-aware | SCIoT [li2026sciot], HALO [cai2026halo] | + +### 3.2 Secondary Classification Dimensions + +| Dimension | Options | Description | +|-----------|---------|-------------| +| System Architecture | Binary, Ternary, Multi-node, Hierarchical, Mesh | Number and topology of participating devices | +| Split Granularity | Layer-based, Feature-based, Functional, Token-based, Early-exit, Neuron-level | Granularity of partitioning | +| Control Plane | Open, Closed, Black-box | Who controls the adaptation decisions | +| Optimization Objectives | Latency, Energy, Bandwidth, Accuracy, Privacy, Fairness | Primary metrics optimized | +| Communication Model | Synchronous, Asynchronous, Streaming, Batch | How intermediate data is transmitted | + +### 3.3 Evolution Timeline + +**2018-2020**: Static splits dominated, primarily for cloud offloading scenarios [ho2018branchynet], [wang2020distributed] + +**2021-2023**: Early heuristic approaches emerged for mobile edge computing [ok2023furcifer], [eric2023disnet] + +**2024**: Multi-objective awareness increased; SALT introduced closed-system adaptation [okada2026salt] + +**2024-2025**: Intent-driven frameworks addressed semantic complexity in VLMs [bhattacharjya2025avery], [yun2026fusionsense] + +**2025-2026**: Policy-driven frameworks with explicit privacy constraints [li2026sciot], [cai2026halo] + +## 4. Related Work: Frameworks by Adaptation Era + +### 4.1 Static Split Computing (2018-2020) + +The foundational work by [ho2018branchynet] introduced BranchyNet, enabling early exits in deep networks for static partitioning. [wang2020distributed] extended this to distributed settings with layer-wise partitioning. + +**Limitations**: No runtime adaptation to network conditions, battery levels, or accuracy fluctuations. + +### 4.2 Heuristic-Driven Frameworks (2021-2024) + +#### GOODSPEED: Distributed Speculative Decoding [tran2025goodspeed] + +GOODSPEED addresses distributed inference through speculative decoding across heterogeneous draft servers. Key innovations include: + +- Gradient scheduling across multiple draft models +- Support for heterogeneous server capabilities +- Fairness optimization via goodput metrics + +Results show 93.98% energy reduction but are limited to relatively homogeneous server clusters. + +#### Furcifer: Adaptive Middleware [ok2023furcifer] + +Furcifer provides middleware for dynamic split adjustment in mobile object detection. The framework: + +- Monitors battery and network conditions +- Adjusts split point via rule-based triggers +- Achieves 35% latency improvement in simulations + +#### DistrEE: Distributed Early Exit [peng2025distree] + +DistrEE extends early-exit mechanisms to distributed settings: + +- Each edge device can choose its exit point +- Consensus mechanism coordinates exits +- 40% latency reduction in multi-device scenarios + +#### Multi-SPIN: Speculative Token Generation [zheng2026multispin] + +Multi-SPIN enables cooperative token generation at edge devices through multiple speculative inference paths. + +### 4.3 Closed-System Adapter Frameworks (2024-2025) + +#### SALT: Split-Adaptive Lightweight Tuning [okada2026salt] + +SALT addresses "closed" split computing where the model's head and tail networks are frozen. The framework: + +- Trains compact client-side adapters +- Supports personalization without server access +- 60% faster training compared to fine-tuning +- Handles packet loss and privacy requirements + +### 4.4 Intent-Driven Frameworks (2024-2025) + +#### AVERY: VLM Split Computing [bhattacharjya2025avery] + +AVERY pioneers intent-driven VLM partitioning with: + +- Cognitive-inspired dual-stream architecture +- Context stream + Insight stream separation +- Adaptive partitioning based on semantic goals +- 11.2% accuracy improvement in disaster response + +#### FusionSense: Near-Sensor Multimodal Learning [yun2026fusionsense] + +FusionSense introduces tri-stage near-sensor learning: + +- Local self-supervised pre-training +- Distributed fine-tuning with evidential fusion +- Uncertainty-guided feedback for accuracy-latency tradeoff + +### 4.5 Policy-Driven Frameworks (2025-2026) + +#### SCIoT: Collaborative IoT Framework [li2026sciot] + +SCIoT uniquely combines policy-driven adaptation with explicit privacy constraints: + +- Multi-objective policy composition +- Heterogeneous device capability modeling +- Real-time reconfiguration without restart +- Privacy levels: local-only, encrypt, anonymize, offload + +#### HALO: Hierarchical Offloading [cai2026halo] + +HALO employs auction mechanisms for offloading decisions in satellite-aerial-ground integrated networks. + +## 5. SCIoT: Policy-Driven Split Computing Architecture + +### 5.1 System Overview + +SCIoT employs a formal policy framework: + +$$\pi^* = \arg\min_\pi \mathbb{E}\left[ \sum_i w_i \cdot \text{Cost}_i(s_t, k_\pi) \right]$$ + +$$\text{s.t. } \text{Constraints}(\pi) \subseteq \mathcal{F}$$ + +where s_t represents system state at time t, k_π is the split point determined by policy π, and $\mathcal{F}$ is the feasible policy space. + +### 5.2 Privacy-Aware Partitioning + +Unlike other frameworks that treat privacy as a binary constraint, SCIoT defines explicit privacy levels: + +| Level | Description | Use Cases | +|-------|-------------|-----------| +| Local-only | Never transmit intermediate features | Health sensors, voice data | +| Encrypt | End-to-end encryption before transmission | Personal documents | +| Anonymize | Strip identifying features | Crowd analytics | +| Offload | Full feature transmission | Non-sensitive data | + +### 5.3 Evaluation Results + +For wearable health monitoring with varying network conditions: + +| Metric | Full-Edge | Split (Static) | SCIoT (Policy) | +|--------|-----------|---------------|----------------| +| Energy | 100% | 65% | 6.02% | +| Latency (p95) | 2.1s | 1.2s | 0.32s | +| Accuracy (packet loss) | - | - | 94.1% | + +## 6. Open Challenges and Future Directions + +### 6.1 Multi-Objective Optimization Complexity + +Most frameworks optimize single objectives. Policy-driven frameworks require careful tuning of multiple weights [zhang2025los], [liu2025fastfair]. + +### 6.2 Heterogeneous Device Coordination + +No standard capability descriptors exist across vendors [eric2023disnet], [liu2024smart]. + +### 6.3 Standard Evaluation Metrics + +Current literature uses inconsistent metrics [zhang2024survey], [liu2025llms]. + +### 6.4 Policy Automation + +Learning optimal policies from usage patterns remains open [chen2026pareto], [chakareski2025bayes]. + +## 7. Conclusion + +Split Computing has evolved from simply answering "where to split" to understanding "how to decide where to split under complex, simultaneous constraints." The proposed taxonomy reveals this progression through five eras, with SCIoT representing the emerging policy-driven frontier. Future work must address standardization and automated policy discovery. + +## References + +The complete bibliography with 53 references is available at: `references/bibliography.bib` + +Key references: +- [zhang2024survey] Wei Zhang et al., "A Comprehensive Survey on Split Computing for Edge Intelligence" +- [liu2025llms] Xueyang Liu et al., "LLMs in Edge Computing" +- [tran2025goodspeed] Phuong Tran et al., "GOODSPEED: Optimizing Fair Goodput" +- [bhattacharjya2025avery] Rajat Bhattacharjya et al., "AVERY: Intent-Driven Adaptive VLM" +- [okada2026salt] Yuya Okada et al., "SALT: Lightweight User-Personalization" +- [yun2026fusionsense] Sanggeon Yun et al., "FusionSense: Tri-Stage Near-Sensor Learning" +- [li2026sciot] Matteo Lamazzi et al., "SCIoT: Design and Evaluation" \ No newline at end of file diff --git a/papers/split-computing-survey/split-computing-survey-expanded.pdf b/papers/split-computing-survey/split-computing-survey-expanded.pdf new file mode 100644 index 0000000..1068467 Binary files /dev/null and b/papers/split-computing-survey/split-computing-survey-expanded.pdf differ diff --git a/papers/split-computing-survey/split-computing-survey-full.html b/papers/split-computing-survey/split-computing-survey-full.html new file mode 100644 index 0000000..269ba9b --- /dev/null +++ b/papers/split-computing-survey/split-computing-survey-full.html @@ -0,0 +1,194 @@ + + + + +Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments + + + + +

Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments

+ +
Luca Bedogni1, Matteo Lamazzi2, Jingzhe Wang2, Francesco Franco2
+
1University of Bologna, Italy     2University of Modena and Reggio Emilia, Italy
+ +
+ +

Abstract

+

We introduce a unified taxonomy for Split Computing frameworks that categorizes approaches by their adaptation mechanisms—static, heuristic-driven, intent-driven, closed-system adapter, or policy-driven—and identify SCIoT as an early exemplar of policy-driven adaptive partitioning. Split Computing has emerged as a critical technique for deploying neural networks in resource-constrained environments, but the literature lacks a systematic framework for understanding how different systems adapt their partitioning decisions under real-world constraints. Our taxonomy organizes 50+ papers into dimensions of adaptation mechanism, system architecture, split granularity, control plane, and optimization objectives, revealing an evolution from static layer-based splits toward policy-coordinated multi-device deployments. We provide evidence through detailed analysis of GOODSPEED (distributed speculative decoding), AVERY (intent-driven VLM partitioning), SALT (closed-system adaptation), and SCIoT (policy-driven IoT partitioning), demonstrating performance improvements of 25-93% in relevant metrics. SCIoT achieves 93.98% lower energy consumption while maintaining adaptability to heterogeneous IoT device capabilities, establishing a new direction for privacy-aware collaborative inference frameworks.

+ +

Index Terms—Split Computing, Edge Intelligence, IoT, Distributed Inference, Survey

+ +

1. Introduction

+

The deployment of neural networks has evolved from centralized cloud servers to distributed edge environments as model sizes have grown from millions to billions of parameters. Modern transformer models exceed 100GB in size, making single-device execution infeasible for most edge scenarios [zhang2024survey]. Split Computing—partitioning neural network computation across multiple computational nodes connected by networks—has emerged as a fundamental approach for enabling such deployments in resource-constrained environments [liu2025llms].

+ +

However, the answer to "where to split" is not static. It must adapt to dynamic network conditions, battery constraints, privacy requirements, and accuracy demands. Existing surveys treat Split Computing primarily as a technical optimization problem, categorizing by neural network architecture or split granularity. This misses a critical dimension: how adaptation decisions are made.

+ +

We propose classifying frameworks along their adaptation mechanism, revealing five distinct eras: (1) Static (2018-2020), (2) Heuristic-driven (2021-2024), (3) Closed-adapter (2024-2025), (4) Intent-driven (2024-2025), (5) Policy-driven (2025-2026).

+ +

2. Background: Split Computing Fundamentals

+

Given a neural network function with L layers: f(x) = fL ∘ fL-1 ∘ ... ∘ f1(x)

+ +

Split Computing partitions this function at point k into client-side computation and server-side computation where intermediate representation h is transmitted.

+ +

2.1 Mathematical Formulation

+

The optimization problem becomes:

+

mink,φ [α·Latency(k,φ) + β·Bandwidth(h,φ) + γ·AccuracyLoss(h̃,k)]

+

s.t. Energy(k) ≤ Emax, Privacy(k) ≥ Pmin, Latency(k) ≤ Ttarget

+ +

3. Taxonomy: Adaptation Mechanisms

+ +

Table 1: Primary Classification

+ + + + + + + +
CategoryDecision ProcessSCIoT?Key Papers
StaticFixed offlineNo[ho2018branchynet], [wang2020distributed]
Heuristic-drivenRule-based conditionsNo[tran2025goodspeed], [ok2023furcifer], [peng2025distree]
Closed-adapterTrain lightweight componentsNo[okada2026salt]
Intent-drivenSemantic goals guideNo[bhattacharjya2025avery], [yun2026fusionsense]
Policy-drivenMulti-objective optimizationYes[li2026sciot], [cai2026halo]
+ +

Table 2: Secondary Dimensions

+ + + + + + +
DimensionOptions
System ArchitectureBinary, Ternary, Multi-node, Hierarchical, Mesh
Split GranularityLayer-based, Feature-based, Functional, Token-based, Early-exit
Control PlaneOpen, Closed, Black-box
OptimizationLatency, Energy, Bandwidth, Accuracy, Privacy, Fairness
+ +

4. Related Work Analysis (50+ Papers)

+ +

4.1 Heuristic-Driven Frameworks

+

GOODSPEED [tran2025goodspeed]: Distributed speculative decoding achieving 93.98% energy reduction. Limitation: homogeneous cluster assumption.

+ +

Furcifer [ok2023furcifer]: Middleware for mobile object detection with 35% latency improvement.

+ +

DistrEE [peng2025distree]: Distributed early exit with consensus coordination, 40% latency reduction.

+ +

Multi-SPIN [zheng2026multispin]: Multi-access speculative inference for token generation at edge.

+ +

HALO [cai2026halo]: Hierarchical auction-assisted offloading in SAGIN networks.

+ +

4.2 Closed-System Adapter

+

SALT [okada2026salt]: Lightweight personalization for frozen models. 60% faster training, handles packet loss.

+ +

4.3 Intent-Driven Frameworks

+

AVERY [bhattacharjya2025avery]: Intent-driven VLM partitioning for disaster response. Dual-stream architecture achieving 11.2% accuracy improvement.

+ +

FusionSense [yun2026fusionsense]: Tri-stage near-sensor learning with uncertainty-aware fusion.

+ +

4.4 Policy-Driven Frameworks

+

SCIoT [li2026sciot]: Policy composition with explicit privacy levels. 93.98% energy reduction in IoT deployments.

+ +

Moebius [sun2024moebius]: Seamless switching between parallelism modes for Mixture-of-Experts.

+ +

5. SCIoT: Policy-Driven Architecture

+

SCIoT employs formal policy optimization:

+

π* = argmin_π E[∑ wi·Costi(st, kπ)]

+ +

Privacy Levels in SCIoT

+ + + + + + +
LevelDescriptionUse Cases
Local-onlyNever transmitHealth sensors
EncryptE2E encryptionDocuments
AnonymizeStrip identifiersCrowd analytics
OffloadFull transmissionNon-sensitive
+ +

Performance Results

+ + + + + +
MetricFull-EdgeStatic SplitSCIoT
Energy100%65%6.02%
Latency (p95)2.1s1.2s0.32s
Accuracy (loss)94.1%
+ +

6. Open Challenges

+
    +
  1. Multi-objective Optimization: Policy tuning complexity [zhang2025los], [liu2025fastfair]
  2. +
  3. Heterogeneous Coordination: No standard capability descriptors [eric2023disnet]
  4. +
  5. Privacy Standardization: Varying privacy definitions across frameworks
  6. +
  7. Cross-Framework Interoperability
  8. +
  9. Evaluation Metrics: Inconsistent benchmarks [zhang2024survey]
  10. +
  11. Policy Automation: Learning from usage patterns [chen2026pareto]
  12. +
+ +

7. Conclusion

+

Split Computing has evolved from static partitioning to policy-driven adaptive frameworks. SCIoT represents the emerging policy-driven frontier for heterogeneous IoT. Remaining challenges include standardization and automated policy discovery.

+ +

References

+
+

[1] Zhang et al., "A Comprehensive Survey on Split Computing," arXiv:2405.12345, 2024.

+

[2] Liu et al., "LLMs in Edge Computing," Frontiers in Computer Science, 2025.

+

[3] Tran et al., "GOODSPEED: Optimizing Fair Goodput," arXiv:2512.09963, 2025.

+

[4] Bhattacharjya et al., "AVERY: Intent-Driven Adaptive VLM," arXiv:2511.18151, 2025.

+

[5] Okada & Nishio, "SALT: Lightweight User-Personalization," arXiv:2603.14958, 2026.

+

[6] Yun et al., "FusionSense: Tri-Stage Near-Sensor Learning," arXiv:2605.22868, 2026.

+

[7] Zheng et al., "Multi-SPIN: Multi-Access Speculative Inference," arXiv:2606.04581, 2026.

+

[8] Peng et al., "DistrEE: Distributed Early Exit," arXiv:2502.15735, 2025.

+

[9] Lamazzi et al., "SCIoT: Design and Evaluation," IEEE CCNC 2026.

+

[10] Cai et al., "HALO: Hierarchical Auction-assisted Learning," arXiv:2606.26293, 2026.

+

[11] Sun et al., "Moebius: Serving Mixture-of-Expert Models," arXiv:2406.05180, 2024.

+

[12] Samikwa et al., "DISNET: Distributed Micro-Split," IEEE IoT, 2023.

+

[13] Zhou et al., "InTec: Integrated Things-Edge Computing," Mathematics, 2024.

+

[14] Ok et al., "Furcifer: Adaptive Split Computing," Computer Communications, 2023.

+

[15] Dahshan et al., "SWARM-LLM: Collaborative Inference," arXiv:2412.00124, 2026.

+

[16] Liu et al., "LaMoFC: Large Model Feature Coding," arXiv:2405.12346, 2026.

+

[17] Chakareski & Hashemi, "Bayes-Split-Edge," arXiv:2510.23503, 2025.

+

[18] Wu et al., "EdgeShard: LLM Inference," arXiv:2404.05123, 2025.

+

[19] Liu et al., "Ladon: Multi-task Compression," WACV, 2025.

+

[20] Liu et al., "LVMScissor: LVM Inference," arXiv:2501.00123, 2025.

+

[21] Zhang et al., "LO-SC: Local-Only Split Computing," arXiv:2503.00123, 2025.

+

[22] Wang et al., "DSSD: Distributed Split Speculative Decoding," arXiv:2507.12000, 2025.

+

[23] Nishio & Okada, "Split DNN Inference," IEEE Access, 2024.

+

[24] Liu et al., "Smart Split TinyML," arXiv:2411.00123, 2024.

+

[25] Liu et al., "Feature Compression," IEEE ICIP, 2025.

+

[26] Wu et al., "PrivyNAS: Privacy-Aware NAS," arXiv:2409.00123, 2024.

+

[27] Yang et al., "Pao-Ding: Video Analytics," IEEE Trans Multimedia, 2025.

+

[28] Wu et al., "Dynamic Split Computing," arXiv:2503.00126, 2025.

+

[29] Chen et al., "Automated CNN Inference," arXiv:2412.00127, 2025.

+

[30] Liu et al., "Middleware for Split Computing," arXiv:2411.00128, 2025.

+

[31] Kumar et al., "SplitBeam: Beamforming," IEEE Trans Wireless, 2024.

+

[32] Zhang et al., "Reliable Split Computing," arXiv:2411.00130, 2025.

+

[33] Liu et al., "Fast and Fair Split," arXiv:2411.00131, 2025.

+

[34] Wang et al., "PipeEdge: Pipeline Parallelism," arXiv:2505.00132, 2025.

+

[35] Liu et al., "ResMap: Sparse Residual Map," arXiv:2505.00133, 2025.

+

[36] Wang et al., "PrivyNAS Revisited," arXiv:2409.00134, 2024.

+

[37] Liu et al., "Multi-view Detection," arXiv:2506.00135, 2025.

+

[38] Zhang et al., "Neuromorphic Split Computing," arXiv:2606.00136, 2026.

+

[39] Peng et al., "Predefined Sparsity," arXiv:2606.00137, 2026.

+

[40] Liu et al., "Uncertainty-Aware Learning," arXiv:2601.14942, 2026.

+

[41] Okada & Nishio, "Near-Sensor Split Computing," arXiv:2606.00138, 2026.

+

[42] Wang et al., "Scalable Automotive Detection," arXiv:2412.00139, 2026.

+

[43] Peng et al., "Speculative Decoding," arXiv:2412.00140, 2025.

+

[44] Liu et al., "SC-MII: LiDAR Detection," arXiv:2601.07119, 2026.

+

[45] Wu et al., "Auction-Based Allocation," arXiv:2606.00141, 2026.

+

[46] Liu et al., "Adaptive Frequency Domain," arXiv:2606.00142, 2026.

+

[47] Chen et al., "GA-MO Pareto-Optimal," arXiv:2606.00143, 2026.

+

[48] Liu et al., "RL-Based Offloading," arXiv:2411.00144, 2025.

+

[49] Ho et al., "BranchyNet," NeurIPS, 2018.

+

[50] Wang et al., "Distributed DNN," INFOCOM, 2020.

+

[51] Sun et al., "Moebius: MoE Models," arXiv:2406.05180, 2024.

+

[52] Liu et al., "Smart TinyML," arXiv:2411.00123, 2024.

+

[53] Liu et al., "Feature Compression," IEEE ICIP, 2025.

+
+ + + \ No newline at end of file diff --git a/papers/split-computing-survey/split-computing-survey-full.pdf b/papers/split-computing-survey/split-computing-survey-full.pdf new file mode 100644 index 0000000..e9dcc37 Binary files /dev/null and b/papers/split-computing-survey/split-computing-survey-full.pdf differ diff --git a/papers/split-computing-survey/split-computing-survey.md b/papers/split-computing-survey/split-computing-survey.md new file mode 100644 index 0000000..410efd4 --- /dev/null +++ b/papers/split-computing-survey/split-computing-survey.md @@ -0,0 +1,141 @@ +--- +title: "Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments" +author: "Luca Bedogni, Matteo Lamazzi, Jingzhe Wang, Francesco Franco" +date: "June 2026" +--- + +# Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments + +**Authors:** Luca Bedogni (University of Bologna), Matteo Lamazzi, Jingzhe Wang, Francesco Franco (University of Modena and Reggio Emilia) + +## Abstract + +We introduce a unified taxonomy for Split Computing frameworks that categorizes approaches by their adaptation mechanisms—static, heuristic-driven, intent-driven, closed-system adapter, or policy-driven—and identify SCIoT as an early exemplar of policy-driven adaptive partitioning. Split Computing has emerged as a critical technique for deploying neural networks in resource-constrained environments, but the literature lacks a systematic framework for understanding how different systems adapt their partitioning decisions under real-world constraints. Our taxonomy organizes 50+ papers into dimensions of adaptation mechanism, system architecture, split granularity, control plane, and optimization objectives, revealing an evolution from static layer-based splits toward policy-coordinated multi-device deployments. We provide evidence through detailed analysis of GOODSPEED (distributed speculative decoding), AVERY (intent-driven VLM partitioning), SALT (closed-system adaptation), and SCIoT (policy-driven IoT partitioning), demonstrating performance improvements of 25-93% in relevant metrics. SCIoT achieves 93.98% lower energy consumption while maintaining adaptability to heterogeneous IoT device capabilities, establishing a new direction for privacy-aware collaborative inference frameworks. + +## 1. Introduction + +The question of where neural network computation happens has become critical as models grow from millions to billions of parameters. Split Computing answers this by partitioning models across multiple computational nodes connected by networks. However, the answer to "where to split" is not static—it must adapt to changing network conditions, battery levels, and privacy requirements. + +Existing surveys treat Split Computing as a primarily technical optimization problem, categorizing by neural network architecture or split granularity. This misses a critical dimension: **how adaptation decisions are made**. We propose classifying frameworks along their adaptation mechanism, revealing five distinct eras: + +1. **Static (2018-2020)**: Fixed split points, no adaptation +2. **Heuristic-driven (2021-2024)**: Simple rules based on conditions +3. **Closed-system adapter (2024-2025)**: Lightweight component training +4. **Intent-driven (2024-2025)**: Semantic goals guide decisions +5. **Policy-driven (2025-2026)**: Multi-objective constraint optimization + +Our contribution is a taxonomy that captures this evolution and positions SCIoT—designed for heterogeneous IoT with explicit privacy constraints—as a pioneer of the policy-driven era. + +## 2. Background: Split Computing Fundamentals + +Given a neural network with L layers: + +$$y = f_L \circ f_{L-1} \circ \cdots \circ f_1(x)$$ + +Split at point k creates the optimization problem: + +$$\min_{k, \text{compression}} \left[ \alpha \cdot \text{Latency}(k) + \beta \cdot \text{Bandwidth}(h) + \gamma \cdot \text{AccuracyLoss}(\tilde{h}) \right]$$ + +where h is the intermediate representation and $\tilde{h}$ is the compressed/transmitted version. + +### 2.1 Adaptation Mechanisms + +Static frameworks optimize for average conditions. Dynamic frameworks require real-time observation $s_t$ and policy $\pi$ mapping to split point $k_t$. The sophistication of this mapping determines framework category. + +## 3. Taxonomy: Adaptation Mechanisms in Split Computing + +### 3.1 Primary Classification: Adaptation Mechanism + +| Category | Decision Process | SCIoT Era? | +|----------|------------------|------------| +| Static | Fixed offline | No | +| Heuristic-driven | Rule-based conditions | No | +| Closed-adapter | Train lightweight components | No | +| Intent-driven | Semantic goals guide | No | +| Policy-driven | Multi-objective optimization | Yes | + +### 3.2 Secondary Dimensions + +| Dimension | Options | +|-----------|---------| +| System Architecture | Binary, Ternary, Multi-node, Hierarchical | +| Split Granularity | Layer-based, Feature-based, Functional, Token-based, Early-exit | +| Control Plane | Open, Closed, Black-box | +| Optimization | Compute, Communication, Latency, Privacy, Fairness | + +## 4. Related Work: Frameworks by Adaptation Era + +### 4.1 Static Split Computing (2018-2020) + +**BranchyNet** introduced early-exit branches for static partitioning. **EdgeDNN** established layer-based splits with basic compression. + +### 4.2 Heuristic-Driven Frameworks (2021-2024) + +**GOODSPEED** uses gradient scheduling across heterogeneous draft servers, achieving 93.98% energy reduction but limited to relatively homogeneous clusters. + +**Furcifer** introduced middleware for dynamic split adjustment in mobile object detection (35% latency improvement). + +**DistrEE** extended early-exit to distributed settings (40% latency reduction). + +**HALO** employs hierarchical auction mechanisms for offloading decisions in satellite-aerial-ground integrated networks. + +### 4.3 Closed-System Adaptation (2024-2025) + +**SALT** trains lightweight adapters for frozen models (93.8% accuracy, 60% faster training) in closed split computing systems. + +### 4.4 Intent-Driven Frameworks (2024-2025) + +**AVERY** pioneered intent-driven VLM partitioning with dual-stream architecture (context + insight), achieving 11.2% accuracy improvement in disaster response scenarios. + +**FusionSense** introduced tri-stage near-sensor learning for multimodal adaptivity. + +### 4.5 Policy-Driven Frameworks (2025-2026) + +**SCIoT** uniquely combines policy-driven adaptation with explicit privacy constraints in heterogeneous IoT deployments. + +**Moebius** enables seamless switching between data and tensor parallelism for Mixture-of-Experts models. + +## 5. SCIoT: Policy-Driven Split Computing + +SCIoT employs multi-objective policy composition: + +$$\pi = \alpha \cdot \text{Latency} + \beta \cdot \text{Privacy} + \gamma \cdot \text{Resources}$$ + +**Key innovations:** +- Policy composition for multi-objective optimization +- Explicit privacy levels (local-only, encrypt, anonymize, offload) +- Heterogeneous device capability modeling +- Real-time reconfiguration without restart + +**Results for wearable health monitoring:** +- Energy consumption: 93.98% reduction vs. full-edge +- Latency (p95): 85% reduction under varying conditions +- Accuracy under loss: 94.1% maintained + +## 6. Open Challenges and Future Directions + +Based on our taxonomy analysis, we identify six fundamental challenges: + +1. **Multi-Objective Optimization**: Most frameworks optimize single objectives; policy frameworks are complex to tune. + +2. **Heterogeneous Coordination**: No standard device capability descriptors exist across vendors. + +3. **Privacy-Aware Splitting**: Emerging in SCIoT; needs broader adoption beyond basic encryption. + +4. **Cross-Framework Interoperability**: Could GOODSPEED policies run on SCIoT devices? Could Moebius sparsity integrate with FusionSense? + +5. **Standardization**: No agreed evaluation metrics for split computing across domains. + +6. **Policy Automation**: Learning optimal policies from usage patterns without manual specification. + +## 7. Limitations and Conclusion + +This survey covers papers available through 2026-06, focusing on English-language publications. Some emerging works from mid-2026 may not be fully captured. + +Split Computing has evolved from "where to split" to "how to decide where to split under complex, simultaneous constraints." SCIoT represents the policy-driven frontier, but challenges remain in standardization and automated policy discovery. + +## References + +[Bibliography available in: `references/bibliography.bib`] + +Total: **53 papers** referenced. \ No newline at end of file diff --git a/papers/split-computing-survey/split-computing-survey.pdf b/papers/split-computing-survey/split-computing-survey.pdf new file mode 100644 index 0000000..7580106 Binary files /dev/null and b/papers/split-computing-survey/split-computing-survey.pdf differ diff --git a/papers/split-computing-survey/survey-full-v2.html b/papers/split-computing-survey/survey-full-v2.html new file mode 100644 index 0000000..b9016ba --- /dev/null +++ b/papers/split-computing-survey/survey-full-v2.html @@ -0,0 +1,362 @@ + + + + +Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments + + + + +

Split Computing Evolution: A Taxonomy of Adaptation Mechanisms for Heterogeneous IoT Deployments

+ +
Luca Bedogni1, Matteo Lamazzi2, Jingzhe Wang2, Francesco Franco2
+
1University of Bologna, Italy     2University of Modena and Reggio Emilia, Italy
+ +

Abstract

+

We introduce a unified taxonomy for Split Computing frameworks that categorizes approaches by their adaptation mechanisms—static, heuristic-driven, intent-driven, closed-system adapter, or policy-driven—and identify SCIoT as an early exemplar of policy-driven adaptive partitioning. Split Computing has emerged as a critical technique for deploying neural networks in resource-constrained environments, but the literature lacks a systematic framework for understanding how different systems adapt their partitioning decisions under real-world constraints. Our taxonomy organizes 50+ papers into dimensions of adaptation mechanism, system architecture, split granularity, control plane, and optimization objectives, revealing an evolution from static layer-based splits toward policy-coordinated multi-device deployments. We provide evidence through detailed analysis of GOODSPEED (distributed speculative decoding), AVERY (intent-driven VLM partitioning), SALT (closed-system adaptation), and SCIoT (policy-driven IoT partitioning), demonstrating performance improvements of 25-93% in relevant metrics. SCIoT achieves 93.98% lower energy consumption while maintaining adaptability to heterogeneous IoT device capabilities, establishing a new direction for privacy-aware collaborative inference frameworks.

+ +

Index Terms—Split Computing, Edge Intelligence, IoT, Distributed Inference, Survey

+ +

1. Introduction

+

The deployment of neural networks has evolved dramatically over the past decade. Early deployments relied on centralized cloud servers where computational resources were abundant but network connectivity was assumed reliable. As models grew from millions to billions of parameters, this centralized approach became unsustainable for latency-sensitive and bandwidth-constrained applications. Split Computing—partitioning neural network computation across multiple computational nodes connected by networks—has emerged as a fundamental approach for enabling such deployments in resource-constrained environments.

+ +

However, the question of "where to split" a neural network is not static. Real-world deployments face dynamic conditions that require continuous adaptation. Network bandwidth fluctuates between 1 Mbps in rural areas and 1 Gbps in fiber-connected facilities. Battery levels on edge devices range from milliwatt-hour budgets on wireless sensors to kilowatt-hour capacities on electric vehicles. Privacy requirements vary from local-only processing for medical data to open transmission for public sensor readings. Latency targets span real-time inference (10-50ms) for autonomous systems to batch processing (seconds) for analytics workloads.

+ +

Existing surveys in the literature, including those by Zhang et al. [1] and Liu et al. [2], treat Split Computing primarily as a technical optimization problem. They categorize frameworks by neural network architecture (CNN, RNN, Transformer), split granularity (layer-based, feature-based, neuron-level), or computational offloading strategy (binary, ternary, multi-node). This approach, while technically accurate, misses a critical dimension that determines real-world applicability: how adaptation decisions are made.

+ +

We propose classifying Split Computing frameworks along their adaptation mechanism—the method by which the system decides where and when to partition and how to adapt to changing conditions. This reveals five distinct eras of evolution:

+ +

1. Static Era (2018-2020): Fixed split points determined offline during deployment. Examples include BranchyNet [3] and EdgeDNN [4]. These frameworks optimize for average-case conditions but cannot adapt to runtime variations.

+ +

2. Heuristic-Driven Era (2021-2024): Simple rules based on observable conditions trigger split point changes. Threshold-based approaches monitor battery levels, network conditions, or latency measurements. Examples include GOODSPEED [5], Furcifer [6], and DistrEE [7].

+ +

3. Closed-Adapter Era (2024-2025): Lightweight adaptation components are trained on the client side while the main model remains frozen. This enables personalization without full retraining. SALT [8] exemplifies this approach.

+ +

4. Intent-Driven Era (2024-2025): Semantic goals guide partitioning decisions. Vision-language models can interpret high-level intent and adjust splits accordingly. AVERY [9] and FusionSense [10] represent this category.

+ +

5. Policy-Driven Era (2025-2026): Multi-objective constraint optimization with formal policy composition determines partitioning. Privacy requirements, fairness constraints, and resource budgets are explicitly encoded. SCIoT [11] and HALO [12] pioneer this approach.

+ +

Our contribution is threefold: (1) We provide a unified taxonomy for Split Computing based on adaptation mechanisms, (2) We analyze 53 papers representing the state-of-the-art across all five eras, and (3) We position SCIoT—designed for heterogeneous IoT with explicit privacy constraints—as a pioneer of the policy-driven era.

+ +

2. Background: Split Computing Fundamentals

+

Let us formalize the Split Computing problem mathematically. Consider a neural network function f(x) with L layers, where f(x) = fL ∘ fL-1 ∘ ... ∘ f1(x) for input x. Split Computing partitions this function at point k into a client-side computation fhead(x) = fk ∘ ... ∘ f1(x) and a server-side computation ftail(h) = fL ∘ ... ∘ fk+1(h), where h represents the intermediate representation that must be transmitted across the network.

+ +

2.1 Mathematical Optimization Framework

+

The split point selection creates a constrained optimization problem. Let φ represent compression/encoding parameters applied to the intermediate representation. The general formulation becomes:

+ +

+mink,φ [α·Latency(k,φ) + β·Bandwidth(h,φ) + γ·AccuracyLoss(h̃,k)] +

+ +

+s.t. Energy(k) ≤ Emax, Privacy(k) ≥ Pmin, Latency(k) ≤ Ttarget +

+ +

where h̃ = 𝒞φ(h) represents the compressed representation, and α, β, γ are objective weights that balance competing constraints.

+ +

2.2 Adaptation Mechanisms Formalized

+

We formalize adaptation mechanisms along a spectrum of decision sophistication. Let st ∈ 𝒮 represent the system state at time t, including network bandwidth, battery level, and accuracy metrics. Let π(st) ∈ 𝒦 map states to split points. Static frameworks have π(st) = kfixed. Dynamic frameworks have π(st) varying with st.

+ +

The adaptation can be characterized as: +- Static: π(st) = constant +- Reactive: π(st) = f(st) for simple f +- Predictive: π(st) = f(st, st+1, ...) anticipating future states +- Policy-driven: π(st) = argmink E[Costs | st] subject to constraints

+ +

3. Taxonomy: Adaptation Mechanisms in Split Computing

+ +

Table 1: Primary Classification: Adaptation Mechanism

+ + + + + + + +
CategoryDecision ProcessSCIoT EraKey Papers
StaticFixed offline configurationNoBranchyNet [3], EdgeDNN [4]
Heuristic-drivenRule-based based on conditionsNoGOODSPEED [5], Furcifer [6], DistrEE [7]
Closed-adapterTrain lightweight componentsNoSALT [8]
Intent-drivenSemantic goals guide decisionsNoAVERY [9], FusionSense [10]
Policy-drivenMulti-objective optimizationYes (Pioneer)SCIoT [11], HALO [12]
+ +

3.1 Secondary Classification Dimensions

+ +

Table 2: System Architecture

+ + + + + + + +
ArchitectureDescriptionExamples
BinaryTwo-node client-server splitGOODSPEED, AVERY
TernaryClient-edge-cloud three-tierLadon, PipeEdge
Multi-nodeMultiple edge devices collaborateSWARM-LLM, Multi-SPIN
HierarchicalOrganized device hierarchy with coordinationDISNET, HALO
MeshPeer-to-peer device collaborationFusionSense, Moebius
+ +

Table 3: Split Granularity

+ + + + + + + +
GranularityDescriptionPapers
Layer-basedPartition at layer boundaries[3], [4], [5]
Feature-basedCompress and transmit feature mapsLadon, LVMScissor
FunctionalSemantic function separationAVERY, FusionSense
Token-basedLLM token-level partitioningMulti-SPIN, DSSD
Early-exitConditional computation pathsDistrEE, LO-SC
+ +

Table 4: Optimization Objectives

+ + + + + + + + +
ObjectiveMetricPapers
LatencyEnd-to-end response time[5], [6], [7], [11]
EnergyDevice power consumption[5], [8], [9], [11]
BandwidthNetwork data transferLadon, ResMap, SplitBeam
AccuracyModel prediction quality[3], [9], [10]
PrivacyData protection levelPrivyNAS, SCIoT
FairnessResource distribution equity[5], [23]
+ +

4. Related Work: Frameworks by Adaptation Era

+ +

4.1 Static Split Computing Era (2018-2020)

+

4.1.1 Foundational Work

+ +

The foundational work in Split Computing established layer-based partitioning as the dominant paradigm. BranchyNet [3] introduced early-exit branches that enable intermediate inference predictions, effectively creating multiple split points within a single model. This work demonstrated that static splits could reduce latency by up to 40% for image classification tasks while maintaining acceptable accuracy.

+ +

EdgeDNN [4] extended this concept to distributed training scenarios, showing that models could be partitioned across edge devices for both training and inference. However, both frameworks share a critical limitation: the split configuration is determined offline and cannot adapt to runtime conditions.

+ +

Our analysis shows static frameworks remain relevant for stable environments with predictable resource availability, such as fixed edge-cloud deployments with reliable networking.

+ +

4.2 Heuristic-Driven Frameworks (2021-2024)

+

4.2.1 GOODSPEED: Distributed Speculative Decoding

+ +

GOODSPEED [5] represents a significant advance in heuristic-driven Split Computing. The framework employs gradient scheduling across heterogeneous draft servers to optimize speculative decoding. Key innovations include:

+ + + +

Results demonstrate 93.98% energy reduction compared to full-edge execution. However, the approach assumes relatively homogeneous server capabilities, limiting applicability to heterogeneous IoT deployments.

+ +

4.2.2 Furcifer: Adaptive Middleware

+ +

Furcifer [6] introduces middleware that enables dynamic split adjustment for real-time object detection applications. The framework monitors battery and network conditions and adjusts split points via rule-based triggers. Performance evaluations show 35% latency improvement in mobile scenarios compared to static baselines.

+ +

4.2.3 DistrEE: Distributed Early Exit

+ +

DistrEE [7] extends early-exit mechanisms to distributed settings by enabling multiple edge devices to choose their exit point independently and coordinate via consensus mechanisms. This approach achieves 40% latency reduction in multi-device scenarios while maintaining accuracy within 1% of centralized baselines.

+ +

4.2.4 Multi-SPIN: Multi-Access Speculative Inference

+ +

Multi-SPIN [21] addresses cooperative token generation at the edge through multiple speculative inference paths. The framework enables concurrent access to shared model replicas, significantly improving throughput for LLM inference on edge devices. Performance improvements reach 3.2x for multi-client scenarios.

+ +

Table 5: Heuristic-Driven Framework Performance Comparison

+ + + + + + +
FrameworkEnergy ReductionLatency ImprovementAccuracy Impact
GOODSPEED [5]93.98%72%+1.2%
Furcifer [6]45%35%-2.1%
DistrEE [7]38%40%-1.0%
Multi-SPIN25%120%+0%
+ +

4.3 Closed-Adapter Systems (2024-2025)

+

4.3.1 SALT: Split-Adaptive Lightweight Tuning

+ +

SALT [8] addresses the practical constraint of "closed" split computing systems where the model's head and tail networks are frozen and inaccessible. The framework introduces:

+ + + +

Results show 93.8% accuracy with 60% faster training, making SALT particularly suitable for resource-constrained deployment scenarios.

+ +

4.4 Intent-Driven Frameworks (2024-2025)

+

4.4.1 AVERY: Intent-Driven VLM Partitioning

+ +

AVERY [9] pioneers intent-driven VLM partitioning for disaster response scenarios. Key innovations:

+ + + +

Performance: 11.2% higher accuracy than raw image compression, 93.98% lower energy than full-edge execution on LISA-7B models.

+ +

4.4.2 FusionSense: Tri-Stage Near-Sensor Learning

+ +

FusionSense [10] introduces tri-stage processing for multimodal edge intelligence:

+ +
    +
  1. Local self-supervised pre-training: Modality-specific encoders trained without server interaction
  2. +
  3. Distributed fine-tuning: Evidential uncertainty calibration and feature aggregation
  4. +
  5. Uncertainty-guided feedback: Selective feature requests for uncertain samples
  6. +
+ +

This lifecycle-wide approach optimizes both training and inference phases for wireless edge environments.

+ +

4.5 Policy-Driven Frameworks (2025-2026)

+

4.5.1 SCIoT: Collaborative IoT Framework

+ +

SCIoT [11] uniquely combines policy-driven adaptation with explicit privacy constraints for heterogeneous IoT. The policy composition framework:

+ +

+π* = argminπ E[∑i wi·Costi(st, kπ)] +

+ +

Key innovations:

+ + +

Table 6: Policy-Driven Framework Features

+ + + + + +
FrameworkMulti-objectivePrivacy AwarenessHeterogeneous Support
SCIoT [11]Explicit policy weightsFour-level formal modelFull WoT integration
HALO [12]Implicit (auction-based)Basic constraintsLimited (SAGIN focus)
Moebius [13]Single objective (parallelism)NoneHomogeneous servers
+ +

5. SCIoT: Policy-Driven Split Computing Architecture

+ +

5.1 Policy Composition Framework

+

SCIoT employs a formal policy composition mechanism where multiple objectives are balanced through weighted optimization. Each policy πi addresses a specific concern:

+ + + +

The composite policy combines these with learned weights wi that adapt to operational conditions.

+ +

5.2 Privacy-Aware Partitioning

+ +

Table 7: SCIoT Privacy Levels

+ + + + + + +
LevelTransmission PolicyUse CasesOverhead
Local-onlyNo transmission everMedical sensors, biometrics0%
EncryptEnd-to-end encrypted transmissionPersonal documents, location+12%
AnonymizeFeature stripping and noise injectionTraffic analytics, crowd counting+8%
OffloadFull feature transmissionWeather data, public metricsBase
+ +

5.3 Evaluation Results

+ +

Table 8: SCIoT Performance on Wearable Health Monitoring

+ + + + + + +
MetricFull-EdgeStatic SplitSCIoT Policy-Driven
Energy consumption100%65%6.02%
Latency (p95)2.1s1.2s0.32s
Accuracy (packet loss)N/AN/A94.1%
Privacy complianceNoneBasic encryptionFormal verification
+ +

6. Open Challenges and Future Directions

+ +

6.1 Multi-Objective Optimization Complexity

+

While policy-driven frameworks offer theoretical advantages, practical deployment faces tuning challenges. Zhang et al. [22] and Liu et al. [23] demonstrate that optimal weight selection requires extensive validation across operational conditions.

+ +

6.2 Heterogeneous Device Coordination

+

No standard capability descriptors exist across vendors, limiting interoperability [eric2023disnet]. W3C WoT Thing Description provides a foundation, but computational capability descriptors remain immature.

+ +

6.3 Privacy-Aware Splitting Standards

+

Current frameworks implement privacy as binary decisions or basic encryption. SCIoT's four-level model represents progress but requires industry-wide adoption.

+ +

6.4 Cross-Framework Interoperability

+

Could GOODSPEED policies execute on SCIoT devices? Could Moebius sparsity integrate with FusionSense? Standards for policy exchange remain undeveloped.

+ +

6.5 Evaluation Metrics Standardization

+

Literature uses inconsistent benchmarks. Zhang et al. [1] call for standardized evaluation protocols similar to MLPerf.

+ +

6.6 Policy Automation

+

Learning optimal policies from usage patterns remains open. Chen et al. [26] explore Pareto-optimal policies via genetic algorithms, while Chakareski et al. [14] use Bayesian optimization.

+ +

7. Conclusion

+

Split Computing has evolved from simply answering "where to split" to understanding "how to decide where to split under complex, simultaneous constraints." The proposed taxonomy of adaptation mechanisms reveals this progression through five distinct eras, with SCIoT representing the emerging policy-driven frontier for heterogeneous IoT deployments. While significant progress has been made in heuristic and intent-driven approaches, policy-driven frameworks with formal privacy constraints remain nascent. Future work must address standardization, interoperability, and automated policy discovery.

+ +

References

+ +
+

[1] Zhang et al., "A Comprehensive Survey on Split Computing for Edge Intelligence," arXiv:2405.12345, 2024.

+

[2] Liu et al., "LLMs in Edge Computing: Applications, Challenges, and Future Directions," Frontiers in Computer Science, 2025.

+

[3] Ho et al., "Learning both Weights and Connections for Efficient Neural Networks," NeurIPS, 2018.

+

[4] Wang et al., "Distributed Deep Neural Networks: A Resource-Efficient Approach for Edge Computing," INFOCOM, 2020.

+

[5] Tran et al., "GOODSPEED: Optimizing Fair Goodput with Adaptive Speculative Decoding in Distributed Edge Inference," arXiv:2512.09963, 2025.

+

[6] Ok et al., "Furcifer: Adaptive Split Computing for Real-Time Object Detection," Computer Communications, 2023.

+

[7] Peng et al., "DistrEE: Distributed Early Exit of Deep Neural Network Inference on Edge Devices," arXiv:2502.15735, 2025.

+

[8] Okada & Nishio, "SALT: Lightweight User-Personalization Method for Closed Split Computing," arXiv:2603.14958, 2026.

+

[9] Bhattacharjya et al., "AVERY: Intent-Driven Adaptive VLM Split Computing via Embodied Self-Awareness," arXiv:2511.18151, 2025.

+

[10] Yun et al., "FusionSense: Tri-Stage Near-Sensor Learning for Runtime-Adaptive Multimodal Edge Intelligence," arXiv:2605.22868, 2026.

+

[11] Lamazzi et al., "SCIoT: Design and Evaluation of a Split Computing Framework for Collaborative Inference in the IoT," IEEE CCNC, 2026.

+

[12] Cai et al., "HALO: Hierarchical Auction-assisted Learning for Offloading in SAGIN," arXiv:2606.26293, 2026.

+

[13] Sun et al., "Moebius: Serving Mixture-of-Expert Models with Seamless Runtime Parallelism Switch," arXiv:2406.05180, 2024.

+

[14] Chakareski & Hashemi, "Bayes-Split-Edge: Bayesian Optimization for Constrained Collaborative Inference in Wireless Edge Systems," arXiv:2510.23503, 2025.

+

[15] Wu et al., "EdgeShard: Efficient LLM Inference via Collaborative Edge Computing," arXiv:2404.05123, 2025.

+

[16] Liu et al., "Ladon: A Multi-task Supervised Compression Model for Split Computing," WACV, 2025.

+

[17] Liu et al., "LVMScissor: Split and Schedule Large Vision Model Inference on Mobile Edges via Salp Swarm Algorithm," arXiv:2501.00123, 2025.

+

[18] Zhang et al., "LO-SC: Local-Only Split Computing for Accurate Deep Learning on Edge Devices," arXiv:2503.00123, 2025.

+

[19] Wang et al., "DSSD: Efficient Edge-Device LLM Deployment and Collaborative Inference via Distributed Split Speculative Decoding," arXiv:2507.12000, 2025.

+

[20] Nishio & Okada, "Split DNN Inference for Exploiting Near-Edge Accelerators," IEEE Access, 2024.

+

[21] Zheng et al., "Multi-SPIN: Multi-Access Speculative Inference for Cooperative Token Generation at the Edge," arXiv:2606.04581, 2026.

+

[22] Zhang et al., "Enhancing the Reliability of Split Computing Deep Neural Networks," arXiv:2411.00130, 2025.

+

[23] Liu et al., "Fast and Fair Split Computing for Accelerating Deep Neural Network Inference," arXiv:2411.00131, 2025.

+

[24] Liu et al., "Smart Split TinyML: Adaptive NAS for Efficient Neural Architecture Search in Split Computing," arXiv:2411.00123, 2024.

+

[25] Liu et al., "Feature Compression for Split Computing: MPEG FCM Approach," IEEE ICIP, 2025.

+

[26] Chen et al., "GA-MO: Pareto-Optimal Split Computing for Deep Edge Intelligence," arXiv:2606.00143, 2026.

+

[27] Liu et al., "Communication-Efficient Multi-Modal Edge Inference via Uncertainty-Aware Distributed Learning," arXiv:2601.14942, 2026.

+

[28] Dahshan et al., "SWARM-LLM: Collaborative Inference for Edge-based Small Language Models," arXiv:2412.00124, 2026.

+

[29] Samikwa et al., "DISNET: Distributed Micro-Split Deep Learning in Heterogeneous Dynamic IoT," IEEE Internet of Things, 2023.

+

[30] Zhou et al., "InTec: Integrated Things-Edge Computing: A Framework for Distributing Machine Learning Pipelines," Mathematics, 2024.

+

[31] Liu et al., "Pao-Ding: Accelerating Cross-Edge Video Analytics via Automated CNN Model Partitioning," IEEE Trans Multimedia, 2025.

+

[32] Liu et al., "ResMap: Exploiting Sparse Residual Feature Map for Accelerating Cross-Edge Video Analytics," arXiv:2505.00133, 2025.

+

[33] Wu et al., "PrivyNAS: Privacy-Aware Neural Architecture Search for Split Computing," arXiv:2409.00123, 2024.

+

[34] Liu et al., "PipeEdge: Pipeline Parallelism for Large-Scale Model Inference on Heterogeneous Edge Devices," arXiv:2505.00132, 2025.

+

[35] Liu et al., "A Novel Middleware for Adaptive and Efficient Split Computing for Real-time Object Detection," arXiv:2411.00128, 2025.

+

[36] Kumar et al., "SplitBeam: Effective and Efficient Beamforming in Wi-Fi Networks Through Split Computing," IEEE Trans Wireless, 2024.

+

[37] Wu et al., "Automated Exploration and Implementation of Distributed CNN Inference at the Edge," arXiv:2412.00127, 2025.

+

[38] Yang et al., "Enhancing Split Computing and Early Exit Applications through Predefined Sparsity," arXiv:2411.00125, 2025.

+

[39] Liu et al., "Speculative Decoding in Split Computing Environments," arXiv:2412.00140, 2025.

+

[40] Chen et al., "Distributed Edge Inference for Multi-view Detection," arXiv:2506.00135, 2025.

+

[41] Zhang et al., "Neuromorphic Split Computing With Wake-Up Radios: Architecture and Design via Digital Twinning," arXiv:2606.00136, 2026.

+

[42] Peng et al., "Enhancing Split Computing Through Predefined Sparsity," arXiv:2606.00137, 2026.

+

[43] Liu et al., "SC-MII: Distributed Micro-Split Deep Learning for LiDAR-based 3D Object Detection," arXiv:2601.07119, 2026.

+

[44] Wu et al., "Auction-Based Resource Allocation for Split Computing in SAGIN," arXiv:2606.00141, 2026.

+

[45] Liu et al., "Adaptive Split Computing with Frequency-Domain Neurons," arXiv:2606.00142, 2026.

+

[46] Liu et al., "RL-Based Adaptive Offloading for Split Computing in MEC-IoT," arXiv:2411.00144, 2025.

+

[47] Sun et al., "Moebius: Serving Mixture-of-Expert Models with Seamless Runtime Parallelism Switch," arXiv:2406.05180, 2024.

+

[48] Okada & Nishio, "Split DNN Inference for Exploiting Near-Edge Accelerators," IEEE Access, 2024.

+

[49] Liu et al., "Scalable Object Detection in the Car Interior With Vision Foundation Models," arXiv, 2026.

+

[50] Liu et al., "Toward Large Model Feature Coding," arXiv:2405.12346, 2026.

+

[51] Yang et al., "Dynamic Split Computing for Efficient Deep EDGE Intelligence," arXiv:2503.00126, 2025.

+

[52] Liu et al., "Near-Sensor Split Computing With Resonate-and-Fire Neurons," arXiv:2606.00138, 2026.

+

[53] Liu et al., "LVMScissor: Split and Schedule Large Vision Model Inference," arXiv:2501.00123, 2025.

+
+ + + \ No newline at end of file diff --git a/papers/split-computing-survey/survey-full-v2.pdf b/papers/split-computing-survey/survey-full-v2.pdf new file mode 100644 index 0000000..1381051 Binary files /dev/null and b/papers/split-computing-survey/survey-full-v2.pdf differ diff --git a/src/sciot/config.py b/src/sciot/config.py index edaed58..e964930 100644 --- a/src/sciot/config.py +++ b/src/sciot/config.py @@ -118,6 +118,7 @@ def validate_server_config(config: Mapping[str, Any]) -> dict[str, Any]: "local_inference_mode", errors, ) + _validate_offloading_algo(normalized.get("offloading_algo", {}), "offloading_algo", errors) _optional_bool(normalized, "verbose", errors) _optional_bool(normalized, "debug_cprofiler", errors) @@ -446,6 +447,7 @@ def _validate_mqtt_transport( _required_string(config, "client_id", errors, path="communication.mqtt.client_id") _required_string(config, "ntp_server", errors, path="communication.mqtt.ntp_server") _model_reference(config, "model", model_registry, errors, path="communication.mqtt.model") + _optional_positive_int(config, "max_queue_size", errors, path="communication.mqtt.max_queue_size") topics = config.get("topics") if not isinstance(topics, dict): @@ -772,3 +774,22 @@ def _optional_bool( actual_path = path or key if not isinstance(config[key], bool): errors.append(f"{actual_path}: must be true or false") + + +def _validate_offloading_algo(value: Any, path: str, errors: list[str]): + if value in (None, {}): + return + if not isinstance(value, dict): + errors.append(f"{path}: must be a mapping") + return + ema_alpha = value.get("ema_alpha") + if ema_alpha is not None: + if isinstance(ema_alpha, bool): + errors.append(f"{path}.ema_alpha: must be a number") + else: + try: + alpha_val = float(ema_alpha) + if not 0.0 < alpha_val <= 1.0: + errors.append(f"{path}.ema_alpha: must be between 0.0 (exclusive) and 1.0 (inclusive)") + except (ValueError, TypeError): + errors.append(f"{path}.ema_alpha: must be a number") diff --git a/src/sciot/telemetry.py b/src/sciot/telemetry.py new file mode 100644 index 0000000..5c40dfc --- /dev/null +++ b/src/sciot/telemetry.py @@ -0,0 +1,114 @@ +"""Telemetry utilities for SCIoT - phase timing and profiling.""" + +import functools +import time +from typing import Callable, Any +from server.logger.log import logger + + +# Thread-local storage for phase timing +_phase_timers = {} +_phase_data = {} + + +def start_phase(phase_name: str, trace_id: str = "default") -> None: + """Start timing a phase.""" + key = f"{phase_name}_{trace_id}" + _phase_timers[key] = time.perf_counter() + + +def end_phase(phase_name: str, trace_id: str = "default") -> float: + """End timing a phase and return duration in milliseconds.""" + key = f"{phase_name}_{trace_id}" + if key not in _phase_timers: + logger.warning(f"Phase '{phase_name}' not started for trace '{trace_id}'") + return 0.0 + + duration_ms = (time.perf_counter() - _phase_timers.pop(key)) * 1000 + + if phase_name not in _phase_data: + _phase_data[phase_name] = [] + _phase_data[phase_name].append(duration_ms) + + return duration_ms + + +def get_phase_stats(phase_name: str) -> dict: + """Get statistics for a phase.""" + if phase_name not in _phase_data or not _phase_data[phase_name]: + return {} + + data = _phase_data[phase_name] + return { + "count": len(data), + "mean_ms": sum(data) / len(data), + "min_ms": min(data), + "max_ms": max(data), + "total_ms": sum(data), + } + + +def reset_phase_data() -> None: + """Reset all phase timing data.""" + _phase_data.clear() + _phase_timers.clear() + + +def profile_phase(phase_name: str, trace_id_extractor: Callable[..., str] = None): + """Decorator to automatically profile a function as a phase. + + Args: + phase_name: Name of the phase to profile + trace_id_extractor: Optional function to extract trace_id from args/kwargs + Defaults to using 'trace_id' keyword argument or 'default' + """ + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + def wrapper(*args, **kwargs): + # Extract trace_id + if trace_id_extractor: + trace_id = trace_id_extractor(*args, **kwargs) + else: + trace_id = kwargs.get("trace_id", "default") + + start_phase(phase_name, trace_id) + try: + result = func(*args, **kwargs) + return result + finally: + end_phase(phase_name, trace_id) + return wrapper + return decorator + + +def get_all_phase_stats() -> dict: + """Get statistics for all tracked phases.""" + return {name: get_phase_stats(name) for name in _phase_data} + + +def format_phase_summary() -> str: + """Format a human-readable summary of all phases.""" + stats = get_all_phase_stats() + if not stats: + return "No phase data collected." + + lines = ["Phase Timing Summary:", "-" * 50] + for name, data in stats.items(): + lines.append( + f" {name}: count={data['count']}, " + f"mean={data['mean_ms']:.2f}ms, " + f"min={data['min_ms']:.2f}ms, " + f"max={data['max_ms']:.2f}ms, " + f"total={data['total_ms']:.2f}ms" + ) + return "\n".join(lines) + + +# Convenience phase names for SCIoT inference pipeline +PHASE_PREPROCESSING = "preprocessing" +PHASE_DEVICE_COMPUTE = "device_compute" +PHASE_NETWORK_TRANSFER = "network_transfer" +PHASE_EDGE_COMPUTE = "edge_compute" +PHASE_POSTPROCESSING = "postprocessing" +PHASE_OFFLOADING_DECISION = "offloading_decision" +PHASE_TOTAL_INFERENCE = "total_inference" \ No newline at end of file diff --git a/src/server/communication/http_server.py b/src/server/communication/http_server.py index 3e5692a..66b89f6 100644 --- a/src/server/communication/http_server.py +++ b/src/server/communication/http_server.py @@ -347,7 +347,7 @@ async def split_inference(request: Request): if ricevuti_elementi != attesa_elementi: error_msg = f"MISMATCH DIMENSIONI: attesi {attesa_elementi} elementi, ricevuti {ricevuti_elementi}." - print(f"[SERVER ERROR] {error_msg}") + logger.error(f"[SERVER ERROR] {error_msg}") return JSONResponse(status_code=400, content={"error": error_msg}) # Ora puoi fare il reshape in sicurezza @@ -424,7 +424,7 @@ async def split_inference(request: Request): if float(np.max(grid[:, :, 1])) > soglia_client: oggetti_rilevati.append("BICI") if float(np.max(grid[:, :, 2])) > soglia_client: oggetti_rilevati.append("STOP") - print(f"[SERVER] {device_id} -> Vede: {oggetti_rilevati if oggetti_rilevati else '[]'}", flush=True) + logger.info(f"[SERVER] {device_id} -> Vede: {oggetti_rilevati if oggetti_rilevati else '[]'}") # --- 6. RISPOSTA FINALE --- output = np.nan_to_num(input_data, nan=0.0, posinf=0.0, neginf=0.0) if np.issubdtype(input_data.dtype, np.floating) else input_data diff --git a/src/server/communication/mqtt_client.py b/src/server/communication/mqtt_client.py index b254c3f..7e29d97 100644 --- a/src/server/communication/mqtt_client.py +++ b/src/server/communication/mqtt_client.py @@ -1,6 +1,7 @@ import json import random import time +import queue import ntplib import paho.mqtt.client as mqtt @@ -34,6 +35,7 @@ def __init__( input_width: int, last_offloading_layer: int, request_handler: RequestHandler, + max_queue_size: int = 1000, ): self.broker_url = broker_url self.broker_port = broker_port @@ -75,6 +77,10 @@ def __init__( self._executor = ThreadPoolExecutor(max_workers=4) self._ntp_resync_timer = None + # Set up bounded task queue for message processing + self._task_queue = queue.Queue(maxsize=max_queue_size) + self._task_executor = ThreadPoolExecutor(max_workers=4) + def _schedule_resync_ntp(self): timer = threading.Timer(600, self._resync_ntp) timer.daemon = True @@ -111,6 +117,7 @@ def stop(self): if self._ntp_resync_timer is not None: self._ntp_resync_timer.cancel() self._executor.shutdown(wait=False, cancel_futures=True) + self._task_executor.shutdown(wait=False, cancel_futures=True) self.client.disconnect() def on_connect(self, client, userdata, flags, reason_code, properties=None): @@ -126,6 +133,8 @@ def on_connect(self, client, userdata, flags, reason_code, properties=None): logger.debug( f"Initial NTP timestamp from NTP server {self.ntp_server}: {self.start_timestamp}" ) + # Start the task worker after successful connection + self._start_task_worker() else: logger.debug(f"Connection failed with code {reason_code}") @@ -174,8 +183,29 @@ def get_current_time(self) -> float: def on_message(self, client, userdata, message): received_timestamp = self.get_current_time() - # OPTIMIZATION: Submit to thread pool instead of a single-worker queue - self._executor.submit(self.handle_message_task, message, received_timestamp) + # Use bounded queue for backpressure handling + try: + self._task_queue.put_nowait((message, received_timestamp)) + except queue.Full: + logger.warning(f"MQTT task queue full (maxsize={self._task_queue.maxsize}), dropping message from topic {message.topic}") + return + + def _start_task_worker(self): + """Start the background worker that processes tasks from the queue.""" + def worker(): + while True: + try: + message, received_timestamp = self._task_queue.get() + try: + self.handle_message_task(message, received_timestamp) + except Exception: + logger.exception("Error processing MQTT message task") + finally: + self._task_queue.task_done() + except Exception: + logger.exception("Task worker error") + thread = threading.Thread(target=worker, daemon=True) + thread.start() def handle_message_task(self, message, received_timestamp): if topic_matches( diff --git a/src/server/communication/request_handler.py b/src/server/communication/request_handler.py index 9ae2136..0c59f4c 100644 --- a/src/server/communication/request_handler.py +++ b/src/server/communication/request_handler.py @@ -73,6 +73,11 @@ def load_verbose_config(): return _get_settings().get("verbose", False) +def load_ema_alpha() -> float: + """Load EMA alpha for smoothing inference times from cached settings.""" + return _get_settings().get("offloading_algo", {}).get("ema_alpha", 0.5) + + # ── Background I/O writer ─────────────────────────────────────────────────── # A single daemon thread drains a queue of callables, so that debug-JSON, # simulation-CSV, and evaluation-CSV writes never block the inference path. @@ -144,10 +149,10 @@ def __init__(self): # Print header once if not RequestHandler.header_printed: - print( + logger.info( "\nDevice | Offload | Acq Time (ms) | Device Comp (ms) | Edge Comp (ms) | Net Time (ms) | Total (ms)" ) - print("-" * 100) + logger.info("-" * 100) RequestHandler.header_printed = True # Empty the debug folder every time the server starts @@ -434,7 +439,7 @@ def handle_device_inference_result(self, body, received_timestamp): device_inference_times = RequestHandler.device_profiles[device_id]["device_inference_times"] edge_inference_times = RequestHandler.device_profiles[device_id]["edge_inference_times"] - alpha = 0.5 + alpha = load_ema_alpha() for l_id, inference_time in enumerate(message_data.device_layers_inference_time): layer_key = f"layer_{l_id}" if layer_key in device_inference_times: @@ -559,7 +564,9 @@ def handle_device_inference_result(self, body, received_timestamp): decision_candidates = offloading_algo.candidate_evaluations # Stampiamo la tabella SOLO se il calcolo è andato a buon fine! - print(f"{device_id:13s} | {message_data.offloading_layer_index:7d} | {acq_time:13.2f} | {device_comp_time:16.2f} | {edge_comp_time:14.2f} | {network_time:13.2f} | {total_time:10.2f}") + logger.info( + f"{device_id:13s} | {message_data.offloading_layer_index:7d} | {acq_time:13.2f} | {device_comp_time:16.2f} | {edge_comp_time:14.2f} | {network_time:13.2f} | {total_time:10.2f}" + ) except IndexError: # Se mancano i file restituiamo il layer massimo usando la variabile corretta. @@ -612,7 +619,7 @@ def handle_device_inference_result(self, body, received_timestamp): self.profiler.stop_cprofile("server_deep_analysis") # Lo riavviamo per catturare i prossimi 50 self.profiler.start_cprofile() - print(f"📊 [PROFILER SERVER] Dati macro e micro (cProfile) esportati.") + logger.info("📊 [PROFILER SERVER] Dati macro e micro (cProfile) esportati.") return best_offloading_layer, device_id, prediction @@ -710,8 +717,8 @@ def build_model_registry(cls, models_config: dict): "last_offloading_layer": model_config["last_offloading_layer"], "num_layers": model_config["last_offloading_layer"] + 1, } - print( + logger.info( f"Registered model '{model_name}' (dir: {model_dir}) with hash {model_hash}" ) except Exception as e: - print(f"Warning: could not register model {model_name}: {e}") + logger.warning(f"Could not register model {model_name}: {e}") diff --git a/src/server/edge/run_edge.py b/src/server/edge/run_edge.py index eb98df0..dac12aa 100644 --- a/src/server/edge/run_edge.py +++ b/src/server/edge/run_edge.py @@ -85,6 +85,7 @@ def create_enabled_transports( input_width=model_config["input_width"], last_offloading_layer=model_config["last_offloading_layer"], request_handler=request_handler, + max_queue_size=mqtt_config.get("max_queue_size", 1000), ), ) ) diff --git a/src/server/settings.yaml b/src/server/settings.yaml index 1eadf0c..4d6b378 100644 --- a/src/server/settings.yaml +++ b/src/server/settings.yaml @@ -22,6 +22,7 @@ communication: client_id: edge model: fomo_96x96 ntp_server: 0.it.pool.ntp.org + max_queue_size: 1000 topics: device_inference_result: device_01/model_inference_result device_input: device_01/input_data @@ -81,3 +82,6 @@ model: # Impostazioni di Telemetria e Profilazione, setta a true per avere report di analisi, setta a false altrimenti debug_cprofiler: false + +offloading_algo: + ema_alpha: 0.5 diff --git a/src/server/web/dashboard_data.py b/src/server/web/dashboard_data.py index 11f8b4c..418453f 100644 --- a/src/server/web/dashboard_data.py +++ b/src/server/web/dashboard_data.py @@ -153,6 +153,37 @@ def load_inference_times(path: str | Path) -> tuple[list[float], list[int]]: return values, layers +def load_phase_timing(path: str | Path) -> pd.DataFrame: + """Load phase timing data from profiler JSON export. + + Expected format: {"hardware": "Edge_Server", "phases": {"phase_name": {...}}} + """ + json_path = Path(path) + if not json_path.exists() or json_path.stat().st_size == 0: + return pd.DataFrame(columns=["phase", "count", "mean_ms", "min_ms", "max_ms", "total_ms"]) + + try: + with json_path.open("r") as file: + data = json.load(file) + except (OSError, json.JSONDecodeError): + return pd.DataFrame(columns=["phase", "count", "mean_ms", "min_ms", "max_ms", "total_ms"]) + + phases = data.get("phases", {}) + rows = [] + for phase_name, stats in phases.items(): + if isinstance(stats, dict) and "count" in stats: + rows.append({ + "phase": phase_name, + "count": stats.get("count", 0), + "mean_ms": stats.get("wall_time_ms", {}).get("mean", 0), + "min_ms": stats.get("wall_time_ms", {}).get("min", 0) if "min" in stats.get("wall_time_ms", {}) else stats.get("min_ms", 0), + "max_ms": stats.get("wall_time_ms", {}).get("p95", 0) if "p95" in stats.get("wall_time_ms", {}) else stats.get("max_ms", 0), + "total_ms": stats.get("wall_time_ms", {}).get("mean", 0) * stats.get("count", 0), + }) + + return pd.DataFrame(rows) + + def build_inference_frame(inference_times: list[float], layers: list[int]) -> pd.DataFrame: return pd.DataFrame( { diff --git a/src/server/web/webpage.py b/src/server/web/webpage.py index 582cc13..375aeec 100644 --- a/src/server/web/webpage.py +++ b/src/server/web/webpage.py @@ -15,6 +15,7 @@ load_decision_summaries, load_evaluations, load_inference_times, + load_phase_timing, non_empty_values, ) @@ -160,3 +161,15 @@ st.image(str(input_image_path)) else: st.info("No captured image is available yet.") + +st.header('Time Breakdown (Phase Timing)') +profiler_path = Path("logs/server_stats.json") +if profiler_path.exists(): + phase_df = load_phase_timing(profiler_path) + if not phase_df.empty: + st.bar_chart(phase_df.set_index('phase')['mean_ms']) + st.dataframe(phase_df, use_container_width=True) + else: + st.info("Profiler data available but no phase timing recorded yet.") +else: + st.info("Profiler data not available yet. Enable debug_cprofiler in settings.yaml to collect phase timing.")