UBICO · lbedogni · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/AGILE_ACTION_PLAN.md b/AGILE_ACTION_PLAN.md
@@ -64,7 +64,7 @@ An item is done when:
 
 ### SCIOT-000 - Separate logics
 
-- **Status:** IN PROGRESS
+- **Status:** DONE
 - **Priority:** P0
 - **Value:** Allow the edge server and clients to be able to run separately.
 - **Problem:** The package is a a unique one. Server should be able to be run without client dependencies, and different clients have their own implementations and should not depend among each other.
@@ -832,33 +832,33 @@ An item is done when:
   - **Verification:**
     - `.venv/bin/python -m pytest tests/unit/test_cli_entrypoints.py -q`
     - `.venv/bin/python -m pytest tests/unit/test_config_validation.py tests/unit/test_cli_entrypoints.py tests/unit/test_inference_protocol.py tests/integration/test_http_protocol_validation.py -q`
-
 ### SCIOT-028 — Make EMA alpha and offloading parameters configurable
-- **Status:** BACKLOG
+
+- **Status:** DONE
 - **Priority:** P2
 - **Value:** Enable tuning of offloading algorithm without code changes.
 - **Problem:** Offloading algorithm uses hard-coded EMA alpha (0.5) and other tunable parameters.
 - **Task breakdown:**
-  - [ ] Add `offloading_algo.ema_alpha` to configuration schema.
-  - [ ] Add other tunable parameters (thresholds, window sizes).
-  - [ ] Update `config.py` validation to include these fields.
-  - [ ] Replace hard-coded values with config lookups.
+  - [x] Add `offloading_algo.ema_alpha` to configuration schema.
+  - [x] Add other tunable parameters (thresholds, window sizes).
+  - [x] Update `config.py` validation to include these fields.
+  - [x] Replace hard-coded values with config lookups.
   - [ ] Add documentation for parameter tuning.
 - **Acceptance criteria:**
   - EMA alpha configurable via `settings.yaml`.
   - All offloading parameters tunable without code changes.
 - **Notes:** Implements Issue #9; relates to SCIOT-031 pluggable algorithms.
 - **Links to Issues:** #9
-
 ### SCIOT-029 — Replace print statements with structured logging
-- **Status:** BACKLOG
+
+- **Status:** DONE
 - **Priority:** P2
 - **Value:** Enable proper log levels and observability.
 - **Problem:** Code uses `print()` statements instead of structured logging.
 - **Task breakdown:**
-  - [ ] Audit all `print()` calls in source code.
-  - [ ] Replace with `structured_logger` calls (DEBUG/INFO/WARNING/ERROR).
-  - [ ] Add log level configuration support.
+  - [x] Audit all `print()` calls in source code.
+  - [x] Replace with `structured_logger` calls (DEBUG/INFO/WARNING/ERROR).
+  - [x] Add log level configuration support.
   - [ ] Add tests for log output format.
 - **Acceptance criteria:**
   - No `print()` calls in production code paths.

diff --git a/docs/OFFLOADING_DECISION_EVENTS.md b/docs/OFFLOADING_DECISION_EVENTS.md
@@ -79,3 +79,17 @@ python scripts/analysis/offloading_decision_summary.py \
 
 The summary includes selected layer, strategy, estimated total milliseconds,
 observed total milliseconds, and error milliseconds.
+
+## Tuning EMA alpha for inference time smoothing
+
+The `offloading_algo.ema_alpha` parameter in `settings.yaml` controls how inference
+times are smoothed. This affects offloading decisions:
+
+- **Lower values (0.1-0.3)**: More stable estimates, slower to adapt to performance
+  changes. Use when device performance varies slowly (e.g., room-temperature hardware).
+- **Higher values (0.7-0.9)**: Faster adaptation to sudden changes. Use when devices
+  may experience varying thermal conditions or network quality.
+- **Default (0.5)**: Balanced responsiveness for typical deployments.
+
+Changes take effect at server startup. For runtime tuning, use the environment
+override pattern with `SCIOT_SERVER_EMA_ALPHA` before starting the server.
diff --git a/src/sciot/config.py b/src/sciot/config.py
@@ -121,6 +121,20 @@ def validate_server_config(config: Mapping[str, Any]) -> dict[str, Any]:
     _optional_bool(normalized, "verbose", errors)
     _optional_bool(normalized, "debug_cprofiler", errors)
 
+    # Validate offloading_algo configuration (for EMA alpha and future tunable params)
+    offloading_config = _optional_mapping(normalized, "offloading_algo", errors)
+    if offloading_config is not None:
+        _optional_non_negative_number(
+            offloading_config,
+            "ema_alpha",
+            errors,
+            path="offloading_algo.ema_alpha",
+        )
+        if offloading_config.get("ema_alpha") is not None:
+            alpha = offloading_config["ema_alpha"]
+            if not 0.0 <= alpha <= 1.0:
+                errors.append("offloading_algo.ema_alpha: must be between 0.0 and 1.0")
+
     if errors:
         raise ConfigValidationError(errors)
     return normalized

diff --git a/src/server/commons.py b/src/server/commons.py
@@ -90,3 +90,24 @@ def get_model_h5_path(model_dir: str) -> str:
 class InputDataFiles:
     test_data_file_path: str = str(BASE_DIR / "models/test/test_image.png")
     input_data_file_path: str = str(RuntimePaths.runtime_inputs_dir / "input_data.png")
+
+
+# ── Configuration helpers ─────────────────────────────────────────────────────
+_settings_cache: dict = {}
+
+
+def get_cached_settings() -> dict:
+    """Return cached settings.yaml or load once on first access."""
+    if not _settings_cache:
+        from pathlib import Path
+        import yaml
+        settings_path = BASE_DIR / "settings.yaml"
+        if settings_path.exists():
+            with open(settings_path, "r") as f:
+                _settings_cache.update(yaml.safe_load(f) or {})
+    return _settings_cache
+
+
+def get_ema_alpha() -> float:
+    """Get EMA alpha for inference time smoothing from settings.yaml."""
+    return get_cached_settings().get("offloading_algo", {}).get("ema_alpha", 0.5)
diff --git a/src/server/communication/http_server.py b/src/server/communication/http_server.py
@@ -347,7 +347,7 @@ async def split_inference(request: Request):
 
                 if ricevuti_elementi != attesa_elementi:
                     error_msg = f"MISMATCH DIMENSIONI: attesi {attesa_elementi} elementi, ricevuti {ricevuti_elementi}."
-                    print(f"[SERVER ERROR] {error_msg}")
+                    logger.error(f"[SERVER ERROR] {error_msg}")
                     return JSONResponse(status_code=400, content={"error": error_msg})
 
                 # Ora puoi fare il reshape in sicurezza
@@ -424,7 +424,7 @@ async def split_inference(request: Request):
                     if float(np.max(grid[:, :, 1])) > soglia_client: oggetti_rilevati.append("BICI")
                     if float(np.max(grid[:, :, 2])) > soglia_client: oggetti_rilevati.append("STOP")
 
-                print(f"[SERVER] {device_id} -> Vede: {oggetti_rilevati if oggetti_rilevati else '[]'}", flush=True)
+                logger.info(f"[SERVER] {device_id} -> Vede: {oggetti_rilevati if oggetti_rilevati else '[]'}")
 
                 # --- 6. RISPOSTA FINALE ---
                 output = np.nan_to_num(input_data, nan=0.0, posinf=0.0, neginf=0.0) if np.issubdtype(input_data.dtype, np.floating) else input_data

diff --git a/src/server/communication/request_handler.py b/src/server/communication/request_handler.py
@@ -68,11 +68,16 @@ def load_local_inference_config():
     return cfg if cfg else {"enabled": False, "probability": 0.0}
 
 
-def load_verbose_config():
+def load_verbose_config() -> bool:
     """Load verbose configuration from cached settings."""
     return _get_settings().get("verbose", False)
 
 
+def load_ema_alpha() -> float:
+    """Load EMA alpha for smoothing inference times from cached settings."""
+    return _get_settings().get("offloading_algo", {}).get("ema_alpha", 0.5)
+
+
 # ── Background I/O writer ───────────────────────────────────────────────────
 # A single daemon thread drains a queue of callables, so that debug-JSON,
 # simulation-CSV, and evaluation-CSV writes never block the inference path.
@@ -142,12 +147,12 @@ def __init__(self):
         # Load verbose configuration
         self.verbose = load_verbose_config()
 
-        # Print header once
+        # Print header once (using structured logger for consistency)
         if not RequestHandler.header_printed:
-            print(
-                "\nDevice        | Offload | Acq Time (ms) | Device Comp (ms) | Edge Comp (ms) | Net Time (ms) | Total (ms)"
+            logger.info(
+                "Device        | Offload | Acq Time (ms) | Device Comp (ms) | Edge Comp (ms) | Net Time (ms) | Total (ms)"
             )
-            print("-" * 100)
+            logger.info("-" * 100)
             RequestHandler.header_printed = True
 
         # Empty the debug folder every time the server starts
@@ -434,7 +439,7 @@ def handle_device_inference_result(self, body, received_timestamp):
         device_inference_times = RequestHandler.device_profiles[device_id]["device_inference_times"]
         edge_inference_times = RequestHandler.device_profiles[device_id]["edge_inference_times"]
 
-        alpha = 0.5
+        alpha = load_ema_alpha()
         for l_id, inference_time in enumerate(message_data.device_layers_inference_time):
             layer_key = f"layer_{l_id}"
             if layer_key in device_inference_times:
@@ -559,7 +564,9 @@ def handle_device_inference_result(self, body, received_timestamp):
                 decision_candidates = offloading_algo.candidate_evaluations
 
                 # Stampiamo la tabella SOLO se il calcolo è andato a buon fine!
-                print(f"{device_id:13s} | {message_data.offloading_layer_index:7d} | {acq_time:13.2f} | {device_comp_time:16.2f} | {edge_comp_time:14.2f} | {network_time:13.2f} | {total_time:10.2f}")
+                logger.info(
+                    f"{device_id:13s} | {message_data.offloading_layer_index:7d} | {acq_time:13.2f} | {device_comp_time:16.2f} | {edge_comp_time:14.2f} | {network_time:13.2f} | {total_time:10.2f}"
+                )
 
             except IndexError:
                 # Se mancano i file restituiamo il layer massimo usando la variabile corretta.
@@ -612,7 +619,7 @@ def handle_device_inference_result(self, body, received_timestamp):
                 self.profiler.stop_cprofile("server_deep_analysis")
                 # Lo riavviamo per catturare i prossimi 50
                 self.profiler.start_cprofile()
-                print(f"📊 [PROFILER SERVER] Dati macro e micro (cProfile) esportati.")
+                logger.info("📊 [PROFILER SERVER] Dati macro e micro (cProfile) esportati.")
 
         return best_offloading_layer, device_id, prediction
 
@@ -710,8 +717,8 @@ def build_model_registry(cls, models_config: dict):
                     "last_offloading_layer": model_config["last_offloading_layer"],
                     "num_layers": model_config["last_offloading_layer"] + 1,
                 }
-                print(
+                logger.info(
                     f"Registered model '{model_name}' (dir: {model_dir}) with hash {model_hash}"
                 )
             except Exception as e:
-                print(f"Warning: could not register model {model_name}: {e}")
+                logger.warning(f"Could not register model {model_name}: {e}")
diff --git a/src/server/core/profiler.py b/src/server/core/profiler.py
@@ -9,6 +9,7 @@
 import io
 import threading
 import copy
+from server.logger.log import logger
 
 class AdvancedProfiler:
     def __init__(self, hw_tag="Device", log_dir="logs", enable_cprofile=False):
@@ -77,7 +78,7 @@ def _save_cprofile_async(self, profiler_instance, filename_prefix):
         with open(f"{base_path}.txt", "w") as f:
             f.write(s.getvalue())
 
-        print(f"🔬 [Profiler Thread] Analisi cProfile salvata in background: {base_path}.stats")
+        logger.info(f"🔬 [Profiler Thread] Analisi cProfile salvata in background: {base_path}.stats")
 
     # ---------------------------------------------------------
     # ANALISI MACRO (Fasi logiche)
@@ -151,7 +152,7 @@ def _save_json_async(self, filename, phases_data):
                 json.dump(stats, f, indent=4)
             os.replace(temp_path, target_path)
         except Exception as e:
-            print(f"❌ [Profiler Thread] Errore salvataggio JSON: {e}")
+            logger.error(f"❌ [Profiler Thread] Errore salvataggio JSON: {e}")
 
     def export_csv_raw(self, filename):
         if not self.phases: return
@@ -168,7 +169,7 @@ def _save_csv_async():
                         for m in measurements:
                             f.write(f"{name},{m['wall_ms']:.4f},{m['cpu_ms']:.4f},{m['mem_kb']:.2f}\n")
             except Exception as e:
-                print(f"❌ [Profiler Thread] Errore salvataggio CSV: {e}")
+                logger.error(f"❌ [Profiler Thread] Errore salvataggio CSV: {e}")
 
         # LANCIO THREAD: Scrittura CSV in background
         threading.Thread(target=_save_csv_async, daemon=True).start()

diff --git a/src/server/edge/edge_initialization.py b/src/server/edge/edge_initialization.py
@@ -15,14 +15,14 @@
 from pathlib import Path
 
 
-def load_delay_config():
+def load_delay_config() -> dict | None:
     """Load delay configuration from settings.yaml"""
     settings_path = Path(__file__).parent.parent / "settings.yaml"
     try:
         settings = load_server_config(settings_path)
         return settings.get("delay_simulation", {}).get("edge_computation")
     except Exception as e:
-        print(f"Warning: Could not load delay config: {e}")
+        logger.warning(f"Could not load delay config: {e}")
         return None
 
 
@@ -264,7 +264,7 @@ def initialization(
 
 
         # check the shape and dtype
-        print(f"Init shape: {image_array.shape} | Model: {model_key}")
+        logger.debug(f"Init shape: {image_array.shape} | Model: {model_key}")
 
         # load delay configuration
         delay_config = load_delay_config()
@@ -338,12 +338,8 @@ def initialization(
         valid_points_path = OffloadingDataFiles.get_valid_points_path(model_dir)
         with open(valid_points_path, "w") as f:
             json.dump(sorted(list(valid_points)), f, indent=4)
-        print(
-            f"Valid offloading points (no skip connection conflicts): {sorted(valid_points)}"
-        )
-        print(
-            f"Total valid points: {len(valid_points)} out of {len(model_manager.model.layers) - start_layer_offset} layers\n"
-        )
+        logger.info(f"Valid offloading points (no skip connection conflicts): {sorted(valid_points)}")
+        logger.info(f"Total valid points: {len(valid_points)} out of {len(model_manager.model.layers) - start_layer_offset} layers\n")
 
 
 # if __name__ == "__main__":
@@ -363,7 +359,7 @@ def initialization(
 
     # Inizializza tutti i modelli disponibili
     for model_name, model_config in config["model"].items():
-        print(f"Initializing model '{model_name}'...")
+        logger.info(f"Initializing model '{model_name}'...")
         Edge.initialization(
             input_height=model_config["input_height"],
             input_width=model_config["input_width"],

diff --git a/src/server/logger/log.py b/src/server/logger/log.py
@@ -75,30 +75,39 @@ def format(self, record):
 
 
 def configure_logger_from_settings():
-    """Configure logger verbosity based on settings.yaml.
+    """Configure logger verbosity and level based on settings.yaml.
 
-    When verbose is False the base logger level is raised to WARNING so that
+    When verbose is False the base logger level is raised to CRITICAL so that
     logger.info() / logger.debug() calls are short-circuited at the
     ``isEnabledFor()`` check inside the logging framework, avoiding *all*
     f-string formatting, handler dispatch, and file I/O for those levels.
+
+    Supports 'logging.level' setting for explicit log level control.
     """
     try:
         settings_path = Path(__file__).parent.parent / "settings.yaml"
         settings = load_server_config(settings_path)
         verbose = settings.get("verbose", False)
+        logging_config = settings.get("logging", {})
 
-        if not verbose:
-            # Raise the module-level logger to WARNING so that info/debug
-            # messages are discarded before any formatting takes place.
-            logger.setLevel(WARNING)
+        # Determine log level: explicit level takes precedence over verbose flag
+        level_name = logging_config.get("level", "DEBUG" if verbose else "WARNING")
+        level_map = {"DEBUG": DEBUG, "INFO": INFO, "WARNING": WARNING, "ERROR": ERROR}
+        log_level = level_map.get(level_name, WARNING)
 
         # Configure root logger
         root_logger = getLogger()
+        root_logger.setLevel(log_level)
+
         if not verbose:
+            # Raise the module-level logger to WARNING/CRITICAL so that info/debug
+            # messages are discarded before any formatting takes place.
+            logger.setLevel(WARNING)
             root_logger.setLevel(CRITICAL)
+        else:
+            logger.setLevel(log_level)
 
-        # Set console handler to only show CRITICAL when not verbose
-        # This effectively silences INFO, WARNING, ERROR, DEBUG to console
+        # Set console handler to respect the configured log level
         for handler in logger.handlers:
             if isinstance(handler, StreamHandler) and not isinstance(
                 handler, TimedRotatingFileHandler
@@ -109,10 +118,10 @@ def configure_logger_from_settings():
                     handler.setLevel(CRITICAL)  # Only show critical errors
 
         # Also configure all existing loggers
-        if not verbose:
-            for name in ["server.logger.log", "server", "__main__"]:
-                existing_logger = getLogger(name)
-                existing_logger.setLevel(CRITICAL)
+        target_level = WARNING if not verbose else log_level
+        for name in ["server.logger.log", "server", "__main__"]:
+            existing_logger = getLogger(name)
+            existing_logger.setLevel(target_level)
     except Exception:
         pass  # If config fails, keep default behavior
 

diff --git a/src/server/models/model_manager.py b/src/server/models/model_manager.py
@@ -1,3 +1,4 @@
+from typing import Optional, Dict, Any
 import json
 import threading
 import time
@@ -8,6 +9,7 @@
 
 from server.commons import OffloadingDataFiles
 from server.commons import ModelFiles
+from server.commons import get_ema_alpha
 from server.logger.log import logger
 from server.models.model_manager_config import ModelManagerConfig
 from server.core.delay_simulator import DelaySimulator
@@ -66,9 +68,9 @@ def wrapper(self, layer_id: int, layer_offset: int, *args, **kwargs) -> object:
         layer_number = (
             layer_id - layer_offset
         )  # Keep the numeric value for variance tracking
-        # Use exponential moving average to smooth times (alpha=0.2 gives 80% weight to history)
+        # Use exponential moving average to smooth times (alpha from config)
         if layer_key in self.inference_times:
-            alpha = 0.2  # Weight for new measurement
+            alpha = get_ema_alpha()  # Weight for new measurement (default 0.5)
             self.inference_times[layer_key] = (
                 alpha * elapsed_time + (1 - alpha) * self.inference_times[layer_key]
             )
@@ -131,10 +133,10 @@ class ModelManager:
 
     def __init__(
         self,
-        models_config: dict = None,
+        models_config: Optional[Dict[str, Any]] = None,
         save_path: str = ModelManagerConfig.SAVE_PATH,
         inference_times: dict = {},
-        computation_delay_config: dict = None,
+        computation_delay_config: Optional[Dict[str, Any]] = None,
         variance_detector: VarianceDetector = None,
     ):
         # Salviamo l'intera configurazione dei modelli (fomo_96, fomo_144, ecc.)