Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 105 additions & 6 deletions osism/tasks/conductor/sonic/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
# VXLAN VTEP name used for VXLAN tunnel configuration
VXLAN_VTEP_NAME = "vtepServ"

# Default listen port of the SONiC telemetry/gNMI container, used for the
# GNMI_ONLY control-plane ACL rule when TELEMETRY|gnmi does not set a port.
DEFAULT_GNMI_PORT = "8080"

# Top-level scaffold keys that the orchestrator and downstream helpers index
# into directly. Kept as a single source of truth so that the production code
# and the test helpers cannot drift apart when keys are added or removed.
Expand Down Expand Up @@ -105,11 +109,11 @@
# difference being only that the generator depends on this one as an input.
# Distinct from inherited tables, which the generator also writes selected
# fields into; the distinction is read-only vs. read-and-update, not the access
# syntax (inherited tables are read via config.get() too). Empty for now; the
# first consumer is the gNMI listen port, read from TELEMETRY. Must stay
# disjoint from OWNED_TABLE_KEYS -- an image-consumed table dropped up front
# would always read back empty.
IMAGE_CONSUMED_TABLE_KEYS = ()
# syntax (inherited tables are read via config.get() too). For example, the
# gNMI listen port is read from TELEMETRY. Must stay disjoint from
# OWNED_TABLE_KEYS -- an image-consumed table dropped up front would always
# read back empty.
IMAGE_CONSUMED_TABLE_KEYS = ("TELEMETRY",)
Comment thread
berendt marked this conversation as resolved.

# Owned tables that are also scaffolded: every scaffold key except the
# inherited ones. The orchestrator setdefault-creates these up front, so
Expand All @@ -126,6 +130,8 @@
# defaults (location "Data Center", contact "info@example.com") even when the
# device has no SNMP data in NetBox.
ON_DEMAND_OWNED_TABLE_KEYS = (
"ACL_RULE",
"ACL_TABLE",
"ROUTE_REDISTRIBUTE",
"SNMP_SERVER",
"SNMP_AGENT_ADDRESS_CONFIG",
Expand Down Expand Up @@ -194,7 +200,8 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version=
base config but never modified by the generator. The defining
property is behavioural (read-only), not the access syntax:
inherited tables are also read via config.get(), so .get() alone
does not distinguish the two. Empty for now.
does not distinguish the two. IMAGE_CONSUMED_TABLE_KEYS lists the
current members.
- Pass-through: every table the generator never references. Left
untouched and unmanaged; not a supported place for operator
customizations either.
Expand Down Expand Up @@ -424,6 +431,8 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version=
config["MGMT_INTERFACE"][f"eth0|{oob_ip}/{prefix_len}"] = {}
metalbox_ip = _get_metalbox_ip_for_device(device)
config["STATIC_ROUTE"]["mgmt|0.0.0.0/0"] = {"nexthop": metalbox_ip}
# Restrict control-plane services (SNMP, gNMI) to the OOB network
_add_ctrlplane_acls(config, oob_ip, prefix_len)
else:
oob_ip = None

Expand Down Expand Up @@ -2326,3 +2335,93 @@ def _add_snmp_configuration(config, device, oob_ip):
counter += 1

logger.debug(f"Added snmp_server_target {host}")


def _get_gnmi_port(config):
"""Return the gNMI server port for the GNMI_ONLY control-plane ACL rule.

The telemetry/gNMI container reads its listen port from
TELEMETRY|gnmi|port and falls back to 8080 when unset, so the ACL rule
follows the same lookup against the (image-consumed) TELEMETRY table,
treating a present-but-empty value (null, "") as unset like the
container does. A malformed value (non-numeric or outside 1-65535)
also falls back to the default, with a warning, rather than
propagating: the rule must always carry a bindable L4_DST_PORT --
caclmgrd otherwise skips the whole EXTERNAL_CLIENT table and gNMI
would silently stay unrestricted -- and the container cannot bind such
a value either, so the substituted default cannot mismatch a live gNMI
listener.
"""
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
port = config.get("TELEMETRY", {}).get("gnmi", {}).get("port")
if port is None or str(port).strip() == "":
return DEFAULT_GNMI_PORT
try:
port_number = int(str(port).strip())
except ValueError:
port_number = -1
if not 1 <= port_number <= 65535:
logger.warning(
f"TELEMETRY|gnmi|port value {port!r} is not a usable TCP port; "
f"using default {DEFAULT_GNMI_PORT} for the GNMI_ONLY ACL rule"
)
return DEFAULT_GNMI_PORT
return str(port_number)


def _add_ctrlplane_acls(config, oob_ip, prefix_len):
"""Add control-plane ACLs restricting SNMP and gNMI to the OOB network.

Emits ACL_TABLE entries of type CTRLPLANE bound to the SNMP and
EXTERNAL_CLIENT (gNMI/telemetry) caclmgrd services, plus one ACL_RULE
per table accepting only the device's OOB management subnet (the
network-normalised oob_ip/prefix_len). caclmgrd installs an implicit
default-drop for every service bound in a CTRLPLANE table, so sources
outside the OOB subnet can no longer reach SNMP or gNMI. The SSH_ONLY
table (#2329) belongs here as well once implemented.

caclmgrd's EXTERNAL_CLIENT service has no built-in destination port
(verified against sonic-host-services 202211 through master): the rule
must carry it in L4_DST_PORT, otherwise caclmgrd skips the whole table
and the gNMI restriction would silently not exist.

ACL_TABLE and ACL_RULE are generator-owned (ON_DEMAND_OWNED_TABLE_KEYS),
so they are rebuilt from scratch on every regen and stay absent when the
device has no OOB IP. They are also multi-owner
(MULTI_OWNER_OWNED_TABLE_KEYS): the SSH_ONLY table (#2329) belongs here as
well once implemented, so this helper merges only its own SNMP_ONLY /
GNMI_ONLY keys per key rather than rebinding the table wholesale -- the
central owned-table drop in generate_sonic_config clears stale entries up
front, and per-key merge lets coexisting control-plane helpers compose. The
rules are IPv4 (SRC_IP); a non-IPv4 OOB IP logs a warning and emits nothing
rather than failing the whole config generation.
"""
network = ipaddress.ip_network(f"{oob_ip}/{prefix_len}", strict=False)
if network.version != 4:
Comment thread
berendt marked this conversation as resolved.
logger.warning(
f"OOB IP {oob_ip}/{prefix_len} is not IPv4; skipping control-plane ACLs"
)
return
Comment on lines +2398 to +2403

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Guard against invalid OOB IPs raising ValueError during ip_network() construction.

If oob_ip or prefix_len are malformed, ipaddress.ip_network will raise ValueError and abort config generation. Consider wrapping this call in a try/except ValueError, logging a warning (similar to the non-IPv4 case), and returning early so bad input only skips ACL generation rather than breaking the whole config.


accept_from_oob = {
"PRIORITY": "9999",
"PACKET_ACTION": "ACCEPT",
"SRC_IP": str(network),
"IP_TYPE": "IP",
}
config.setdefault("ACL_TABLE", {})
config["ACL_TABLE"]["SNMP_ONLY"] = {
"policy_desc": "SNMP_ONLY",
"type": "CTRLPLANE",
"services": ["SNMP"],
}
config["ACL_TABLE"]["GNMI_ONLY"] = {
"policy_desc": "GNMI_ONLY",
"type": "CTRLPLANE",
"services": ["EXTERNAL_CLIENT"],
}
config.setdefault("ACL_RULE", {})
config["ACL_RULE"]["SNMP_ONLY|RULE_1"] = dict(accept_from_oob)
config["ACL_RULE"]["GNMI_ONLY|RULE_1"] = {
**accept_from_oob,
"L4_DST_PORT": _get_gnmi_port(config),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for ``_add_ctrlplane_acls`` in ``config_generator`` (#2330)."""

import pytest

from osism.tasks.conductor.sonic.config_generator import (
ON_DEMAND_OWNED_TABLE_KEYS,
TOP_LEVEL_SCAFFOLD_KEYS,
_add_ctrlplane_acls,
)


def test_add_ctrlplane_acls_emits_snmp_and_gnmi_ctrlplane_tables():
config = {}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

assert config["ACL_TABLE"] == {
"SNMP_ONLY": {
"policy_desc": "SNMP_ONLY",
"type": "CTRLPLANE",
"services": ["SNMP"],
},
"GNMI_ONLY": {
"policy_desc": "GNMI_ONLY",
"type": "CTRLPLANE",
"services": ["EXTERNAL_CLIENT"],
},
}


def test_add_ctrlplane_acls_rules_accept_network_normalised_oob_subnet():
"""SRC_IP must carry the network address, not the device's host IP."""
config = {}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

assert config["ACL_RULE"]["SNMP_ONLY|RULE_1"] == {
"PRIORITY": "9999",
"PACKET_ACTION": "ACCEPT",
"SRC_IP": "10.42.0.0/24",
"IP_TYPE": "IP",
}
assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"] == {
"PRIORITY": "9999",
"PACKET_ACTION": "ACCEPT",
"SRC_IP": "10.42.0.0/24",
"IP_TYPE": "IP",
"L4_DST_PORT": "8080",
}


def test_add_ctrlplane_acls_gnmi_rule_requires_dst_port_snmp_does_not():
"""caclmgrd's EXTERNAL_CLIENT service has no built-in destination port;
without L4_DST_PORT in the rule it skips the whole table and the gNMI
restriction would silently not exist. SNMP has a fixed service port
(161) in caclmgrd's ACL_SERVICES, so its rule must not pin one."""
config = {}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"]["L4_DST_PORT"] == "8080"
assert "L4_DST_PORT" not in config["ACL_RULE"]["SNMP_ONLY|RULE_1"]


def test_add_ctrlplane_acls_gnmi_port_follows_telemetry_table():
Comment thread
berendt marked this conversation as resolved.
"""A TELEMETRY|gnmi|port from the base config wins over the 8080 default
(and non-string values are normalised to the string ConfigDB expects)."""
config = {"TELEMETRY": {"gnmi": {"port": 50051}}}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"]["L4_DST_PORT"] == "50051"


@pytest.mark.parametrize(
"port",
[None, "", " ", "not-a-port", "8080.5", 0, 70000, -1],
ids=[
"null",
"empty",
"blank",
"non-numeric",
"float",
"zero",
"too-big",
"negative",
],
)
def test_add_ctrlplane_acls_unusable_gnmi_port_falls_back_to_default(port):
"""A present-but-unusable TELEMETRY|gnmi|port must not leak into the rule:
caclmgrd cannot bind it and would skip the whole EXTERNAL_CLIENT table,
silently leaving gNMI unrestricted. The helper substitutes the 8080
default instead -- safe, because the gNMI container cannot bind such a
value either, so there is no live listener the rule could mismatch."""
config = {"TELEMETRY": {"gnmi": {"port": port}}}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"]["L4_DST_PORT"] == "8080"


def test_add_ctrlplane_acls_merges_into_co_owned_tables_per_key():
"""ACL_TABLE / ACL_RULE are multi-owner (SSH, SNMP, gNMI): the helper must
merge only its own keys, never rebind the table wholesale, so a sibling
control-plane helper's entries survive. Stale carry-over from a prior regen
is cleared by the central owned-table drop in generate_sonic_config, not
here (see the ownership model and MULTI_OWNER_OWNED_TABLE_KEYS)."""
config = {
"ACL_TABLE": {"SSH_ONLY": {"type": "CTRLPLANE", "services": ["SSH"]}},
"ACL_RULE": {"SSH_ONLY|RULE_1": {"PRIORITY": "9999"}},
}

_add_ctrlplane_acls(config, "10.42.0.5", 24)

# A sibling helper's entries are left untouched ...
assert config["ACL_TABLE"]["SSH_ONLY"] == {"type": "CTRLPLANE", "services": ["SSH"]}
assert config["ACL_RULE"]["SSH_ONLY|RULE_1"] == {"PRIORITY": "9999"}
# ... and this helper's own entries are added alongside them.
assert set(config["ACL_TABLE"]) == {"SSH_ONLY", "SNMP_ONLY", "GNMI_ONLY"}
assert set(config["ACL_RULE"]) == {
"SSH_ONLY|RULE_1",
"SNMP_ONLY|RULE_1",
"GNMI_ONLY|RULE_1",
}


def test_add_ctrlplane_acls_non_ipv4_oob_ip_emits_nothing():
Comment thread
berendt marked this conversation as resolved.
"""The rules are IPv4-only (SRC_IP); an IPv6 OOB IP must not produce
half-correct tables — and must not break config generation either."""
config = {}

_add_ctrlplane_acls(config, "2001:db8::5", 64)

assert "ACL_TABLE" not in config
assert "ACL_RULE" not in config


def test_ctrlplane_acl_tables_are_on_demand_owned():
"""#2330 requires ACL_TABLE/ACL_RULE to be rebuilt from scratch on every
regen and absent without an OOB IP. Both guarantees hang on the keys
being on-demand owned: owned tables are dropped from the base config up
front, and on-demand (unlike scaffolded) tables are not re-created as
empty dicts."""
for key in ("ACL_TABLE", "ACL_RULE"):
assert key in ON_DEMAND_OWNED_TABLE_KEYS
assert key not in TOP_LEVEL_SCAFFOLD_KEYS
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from osism.tasks.conductor.sonic.config_generator import (
OWNED_TABLE_KEYS,
TOP_LEVEL_SCAFFOLD_KEYS,
_add_ctrlplane_acls,
clear_all_caches,
clear_metalbox_devices_cache,
clear_metalbox_ip_cache,
Expand Down Expand Up @@ -95,6 +96,7 @@ def patch(name, **kw):
_add_loopback_configuration=patch("_add_loopback_configuration"),
_add_log_server_configuration=patch("_add_log_server_configuration"),
_add_snmp_configuration=patch("_add_snmp_configuration"),
_add_ctrlplane_acls=patch("_add_ctrlplane_acls"),
Comment thread
berendt marked this conversation as resolved.
_add_vrf_configuration=patch("_add_vrf_configuration"),
_add_portchannel_configuration=patch("_add_portchannel_configuration"),
get_cached_device_interfaces=patch(
Expand Down Expand Up @@ -395,6 +397,74 @@ def test_generate_sonic_config_no_oob_ip_leaves_mgmt_empty_and_passes_none(
assert snmp_oob is None


def test_generate_sonic_config_oob_ip_wires_ctrlplane_acls(
mocker, patch_orchestrator_helpers, make_orchestrator_device
):
"""With an OOB IP the orchestrator delegates the control-plane ACLs
(#2330) to ``_add_ctrlplane_acls`` with the raw OOB IP and prefix —
network normalisation is the helper's job."""
patch_base_config(mocker)
patch_orchestrator_helpers.get_device_oob_ip.return_value = ("10.42.0.5", 24)
device = make_orchestrator_device()

config = generate_sonic_config(device, "HWSKU")

patch_orchestrator_helpers._add_ctrlplane_acls.assert_called_once_with(
config, "10.42.0.5", 24
)


def test_generate_sonic_config_gnmi_port_survives_owned_table_drop_end_to_end(
mocker, patch_orchestrator_helpers, make_orchestrator_device
):
"""End-to-end with the real ACL helper: a TELEMETRY|gnmi|port from the
base config must reach the generated GNMI_ONLY rule. TELEMETRY is
image-consumed while ACL_TABLE/ACL_RULE are owned and dropped up front,
so this pins the whole flow -- the port is read after the drop and
emitted into the final output -- which the wiring tests above (helper
mocked) and the helper unit tests (no orchestrator) only cover
compositionally."""
base = make_base_config()
base["TELEMETRY"] = {"gnmi": {"port": 50051}}
patch_base_config(mocker, base_config=base)
patch_orchestrator_helpers.get_device_oob_ip.return_value = ("10.42.0.5", 24)
# Pass the wired call through to the real helper instead of the fixture
# mock, keeping every other orchestrator dependency patched.
patch_orchestrator_helpers._add_ctrlplane_acls.side_effect = _add_ctrlplane_acls
device = make_orchestrator_device()

config = generate_sonic_config(device, "HWSKU")

assert config["ACL_TABLE"]["GNMI_ONLY"]["services"] == ["EXTERNAL_CLIENT"]
assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"] == {
"PRIORITY": "9999",
"PACKET_ACTION": "ACCEPT",
"SRC_IP": "10.42.0.0/24",
"IP_TYPE": "IP",
"L4_DST_PORT": "50051",
}


def test_generate_sonic_config_no_oob_ip_skips_ctrlplane_acls(
mocker, patch_orchestrator_helpers, make_orchestrator_device
):
"""Without an OOB IP no control-plane ACLs are wired and the owned
ACL_TABLE / ACL_RULE tables stay absent — stale base-config content is
removed by the up-front owned-table drop, not re-created."""
base = make_base_config()
base["ACL_TABLE"] = {"SNMP_ONLY": {"type": "CTRLPLANE"}}
base["ACL_RULE"] = {"SNMP_ONLY|RULE_1": {"PRIORITY": "9999"}}
patch_base_config(mocker, base_config=base)
patch_orchestrator_helpers.get_device_oob_ip.return_value = None
device = make_orchestrator_device()

config = generate_sonic_config(device, "HWSKU")

patch_orchestrator_helpers._add_ctrlplane_acls.assert_not_called()
assert "ACL_TABLE" not in config
assert "ACL_RULE" not in config


# ---------------------------------------------------------------------------
# generate_sonic_config — breakout merge
# ---------------------------------------------------------------------------
Expand Down