217 lines
8.6 KiB
Python
217 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) 2026 Umber
|
|
#
|
|
# Licensed under the Apache License, Version 2.0; see LICENSE.
|
|
#
|
|
# Lab fronthaul (PCIe) hot-swap harness — not part of pytest; run from repo root with an editable install.
|
|
# Examples:
|
|
# python3 scripts/system/pcie_hotswap_harness.py --dry-run
|
|
# python3 scripts/system/pcie_hotswap_harness.py --fabric-json configs/my-fabric.json --lab-ini configs/default.ini --dry-run
|
|
# FIWI_REMOTE_IP=192.168.1.39 python3 scripts/system/pcie_hotswap_harness.py --dry-run --paths 2
|
|
#
|
|
# This script is meant to reveal design gaps. See DESIGN_GAPS below.
|
|
|
|
"""PCIe fronthaul hot-swap exercise (async, optional SSH + future Power binding).
|
|
|
|
DESIGN_GAPS (fill as the stack grows)
|
|
--------------------------------------
|
|
1. **Port routing** — With ``--fabric-json``, Acroname port per ``radio_id`` is in the file and
|
|
copied to ``Fabric.rrh_power_ports``; ``one_cycle`` still does not call ``Power.on/off``. Placeholder
|
|
mode (``--paths`` only) has no port map.
|
|
2. **Shared ``Power`` + lock** — Fields exist on ``Fabric``; hot-swap harness must still call
|
|
``power`` only under ``power_lock`` (not wired in ``one_cycle`` yet).
|
|
3. **Enumeration truth** — We only shell out placeholders (``lspci``, ``true``). Real
|
|
checks need agreed sysfs / ``pciutils`` / driver contracts and pass/fail criteria.
|
|
4. **Telemetry ingest** — ``FrontHaulTelemetry`` has no parser from Adnacom JSON/CLI yet.
|
|
5. **INI → targets** — With ``--fabric-json``, lab INI is merged **after** JSON load (``[fabric]``,
|
|
``[fabric.rrh.*]``) unless ``--no-lab-ini``. INI-only JSON without interactive bind:
|
|
``scripts/system/fabric_realize.py`` or ``python3 -m fiwicontrol.fabric build`` / ``bind``.
|
|
6. **SPC hook** — After KPI extraction, ``fiwicontrol.spc`` charts belong in a separate reporting
|
|
step, not inside this harness.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Repo root on path when run as ``python3 scripts/system/pcie_hotswap_harness.py``
|
|
_REPO_ROOT = __file__.rsplit("/scripts/system/", 1)[0]
|
|
if _REPO_ROOT not in sys.path:
|
|
sys.path.insert(0, _REPO_ROOT + "/src")
|
|
|
|
from fiwicontrol.commands import ssh_node
|
|
from fiwicontrol.fabric import Fabric
|
|
from fiwicontrol.fabric.fabric import FabricBindingStatus, FabricDefinition
|
|
from fiwicontrol.lab.inventory_config import default_lab_ini_path
|
|
from fiwicontrol.fronthaul import FrontHaul
|
|
from fiwicontrol.radio import RadioHead
|
|
|
|
|
|
async def _remote_smoke(node: ssh_node, *, label: str) -> str:
|
|
"""Minimal SSH proof; extend with lspci/sysfs once contracts exist."""
|
|
session = await node.rexec(cmd="set -e; uname -n; lspci -nn 2>/dev/null | head -n 3 || true")
|
|
out = ""
|
|
if session.results is not None:
|
|
out = session.results.decode("utf-8", errors="replace").strip()
|
|
logging.info("[%s] remote smoke:\n%s", label, out or "(no stdout)")
|
|
return out
|
|
|
|
|
|
async def one_cycle(
|
|
*,
|
|
label: str,
|
|
rrh: RadioHead,
|
|
node: ssh_node | None,
|
|
dry_run: bool,
|
|
settle_s: float,
|
|
acroname_port: int | None = None,
|
|
) -> None:
|
|
"""One conceptual remove → wait → restore → verify (placeholders)."""
|
|
if acroname_port is not None:
|
|
logging.info("[%s] cycle start radio_id=%s acroname_port=%s", label, rrh.radio_id, acroname_port)
|
|
else:
|
|
logging.info("[%s] cycle start radio_id=%s", label, rrh.radio_id)
|
|
if dry_run:
|
|
logging.info("[%s] DRY-RUN: would drop VBUS / assert link down", label)
|
|
await asyncio.sleep(settle_s)
|
|
logging.info("[%s] DRY-RUN: would restore power / wait for training", label)
|
|
else:
|
|
await asyncio.sleep(settle_s)
|
|
|
|
if node is not None:
|
|
await _remote_smoke(node, label=label)
|
|
else:
|
|
logging.warning("[%s] no SSH node; skip remote checks", label)
|
|
|
|
logging.info("[%s] cycle end", label)
|
|
|
|
|
|
async def run_campaign(
|
|
*,
|
|
fabric: Fabric,
|
|
dry_run: bool,
|
|
iterations: int,
|
|
settle_s: float,
|
|
) -> None:
|
|
node = fabric.concentrator
|
|
for i in range(iterations):
|
|
logging.info("=== iteration %s / %s (fabric_id=%s) ===", i + 1, iterations, fabric.fabric_id)
|
|
async with asyncio.TaskGroup() as tg:
|
|
for h in fabric.rrhs:
|
|
port = fabric.rrh_power_ports.get(h.radio_id)
|
|
tg.create_task(
|
|
one_cycle(
|
|
label="{}#{}".format(h.radio_id, i),
|
|
rrh=h,
|
|
node=node,
|
|
dry_run=dry_run,
|
|
settle_s=settle_s,
|
|
acroname_port=port,
|
|
)
|
|
)
|
|
|
|
|
|
def _build_placeholder_rrhs(n: int) -> list[RadioHead]:
|
|
out: list[RadioHead] = []
|
|
for i in range(n):
|
|
fh = FrontHaul(medium="pcie", vendor_id=None, device_id=None, link_states=())
|
|
out.append(
|
|
RadioHead(
|
|
radio_id="rrh-{:02d}".format(i + 1),
|
|
patch_panel_port=100 + i,
|
|
fronthaul=fh,
|
|
)
|
|
)
|
|
return out
|
|
|
|
|
|
def main() -> int:
|
|
p = argparse.ArgumentParser(description=__doc__)
|
|
p.add_argument("--dry-run", action="store_true", help="log only; no Acroname calls (none wired yet)")
|
|
p.add_argument("--rig-ip", default=os.environ.get("FIWI_REMOTE_IP"), help="SSH target (default: $FIWI_REMOTE_IP)")
|
|
p.add_argument(
|
|
"--fabric-json",
|
|
metavar="PATH",
|
|
help="Load RRH bindings + Acroname port map from JSON (see docs/fabric-builder.md; python -m fiwicontrol.fabric build -o …)",
|
|
)
|
|
p.add_argument(
|
|
"--strict-fabric-ready",
|
|
action="store_true",
|
|
help="Exit 2 unless Fabric.binding_cache_status is READY (needs live Acroname discovery)",
|
|
)
|
|
p.add_argument(
|
|
"--lab-ini",
|
|
metavar="PATH",
|
|
default=None,
|
|
help="Lab inventory INI merged over fabric JSON (default: FIWI_LAB_INI or configs/default.ini)",
|
|
)
|
|
p.add_argument(
|
|
"--no-lab-ini",
|
|
action="store_true",
|
|
help="Do not merge [fabric] / [fabric.rrh.*] from lab INI (JSON only)",
|
|
)
|
|
p.add_argument("--paths", type=int, default=1, metavar="N", help="placeholder RRH count (ignored with --fabric-json)")
|
|
p.add_argument("--iterations", type=int, default=1, help="sequential outer iterations")
|
|
p.add_argument("--settle", type=float, default=0.5, help="seconds between placeholder phases")
|
|
args = p.parse_args()
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
|
|
|
if args.fabric_json:
|
|
st = Fabric.binding_cache_status(args.fabric_json)
|
|
logging.info("Fabric JSON binding_cache_status=%s", st.value)
|
|
if args.strict_fabric_ready and st is not FabricBindingStatus.READY:
|
|
logging.error("Strict fabric check failed (status is not READY)")
|
|
return 2
|
|
if args.no_lab_ini:
|
|
definition = FabricDefinition.load(args.fabric_json)
|
|
else:
|
|
ini_p = Path(args.lab_ini) if args.lab_ini else default_lab_ini_path()
|
|
if ini_p.is_file():
|
|
logging.info("Merging lab INI %s over fabric JSON", ini_p)
|
|
definition = FabricDefinition.load_json_merged_with_ini(args.fabric_json, ini_p)
|
|
else:
|
|
logging.warning("Lab INI not found (%s); loading fabric JSON only", ini_p)
|
|
definition = FabricDefinition.load(args.fabric_json)
|
|
fabric = Fabric.from_definition(definition, power_lock=asyncio.Lock())
|
|
if args.rig_ip:
|
|
fabric = fabric.with_concentrator_override(name="rig", ipaddr=args.rig_ip)
|
|
if fabric.concentrator is None and not args.rig_ip:
|
|
logging.warning("No concentrator after JSON/INI merge and no --rig-ip; SSH checks skipped")
|
|
else:
|
|
rrhs = _build_placeholder_rrhs(max(1, args.paths))
|
|
if not args.rig_ip:
|
|
logging.warning("No --rig-ip or FIWI_REMOTE_IP: SSH checks skipped")
|
|
concentrator = ssh_node(name="rig", ipaddr=args.rig_ip) if args.rig_ip else None
|
|
fabric = Fabric(
|
|
fabric_id="pcie-hotswap-harness",
|
|
rrhs=tuple(rrhs),
|
|
concentrator=concentrator,
|
|
power_lock=asyncio.Lock(),
|
|
)
|
|
|
|
exit_code = 0
|
|
try:
|
|
asyncio.run(
|
|
run_campaign(
|
|
fabric=fabric,
|
|
dry_run=args.dry_run,
|
|
iterations=max(1, args.iterations),
|
|
settle_s=args.settle,
|
|
)
|
|
)
|
|
except* Exception as eg:
|
|
for e in eg.exceptions:
|
|
logging.error("campaign failed: %s", e)
|
|
exit_code = 1
|
|
return exit_code
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|