From 2034da7185fcdab9672c60a2c4b7deafda7dfbbd Mon Sep 17 00:00:00 2001 From: Robert McMahon Date: Thu, 23 Apr 2026 19:01:21 -0700 Subject: [PATCH] Add RRH mapping and power-control workflows for local and relay Acronames. This updates the fabric builder and system scripts so operators can isolate ports, power local+remote RRHs, discover PCI chip candidates, and write radio-to-BDF mappings with documented commands. Made-with: Cursor --- configs/clubhouse-uax-24.json | 154 ++++++++ docs/system-test-scripts.md | 116 ++++-- scripts/system/assign_rrh_pcie_bdf.py | 153 ++++++++ scripts/system/discover_wifi_pci.py | 343 +++++++++++++++++- scripts/system/map_fiber_ports.py | 151 ++++++++ scripts/system/power_only_7915.py | 100 +----- scripts/system/rrh_power_control.py | 128 +++++++ src/fiwicontrol/fabric/builder.py | 496 ++++++++++++++++++++++---- src/fiwicontrol/lab/discovery.py | 16 +- 9 files changed, 1454 insertions(+), 203 deletions(-) create mode 100644 configs/clubhouse-uax-24.json create mode 100644 scripts/system/assign_rrh_pcie_bdf.py create mode 100644 scripts/system/map_fiber_ports.py create mode 100644 scripts/system/rrh_power_control.py diff --git a/configs/clubhouse-uax-24.json b/configs/clubhouse-uax-24.json new file mode 100644 index 0000000..d50dd85 --- /dev/null +++ b/configs/clubhouse-uax-24.json @@ -0,0 +1,154 @@ +{ + "schema_version": 1, + "fabric_id": "Clubhouse24", + "discovery_fingerprint": "99d9ce818902c2f63d0f309d87821020648d0cbed7cf427bd24a9d5571d8c3f6", + "concentrator_name": "fedora42", + "concentrator_ipaddr": "192.168.1.101", + "rrhs": [ + { + "radio_id": "rh-04", + "acroname_port": 0, + "acroname_module_serial": 295883981, + "patch_panel_port": 4 + }, + { + "radio_id": "rh-02", + "acroname_port": 1, + "acroname_module_serial": 295883981, + "patch_panel_port": 2 + }, + { + "radio_id": "rh-03", + "acroname_port": 2, + "acroname_module_serial": 295883981, + "patch_panel_port": 3 + }, + { + "radio_id": "rh-01", + "acroname_port": 3, + "acroname_module_serial": 295883981, + "patch_panel_port": 1 + }, + { + "radio_id": "rh-17", + "acroname_port": 0, + "acroname_module_serial": 882238458, + "patch_panel_port": 17 + }, + { + "radio_id": "rh-11", + "acroname_port": 1, + "acroname_module_serial": 882238458, + "patch_panel_port": 11 + }, + { + "radio_id": "rh-10", + "acroname_port": 2, + "acroname_module_serial": 882238458, + "patch_panel_port": 10 + }, + { + "radio_id": "rh-07", + "acroname_port": 3, + "acroname_module_serial": 882238458, + "patch_panel_port": 7 + }, + { + "radio_id": "rh-08", + "acroname_port": 4, + "acroname_module_serial": 882238458, + "patch_panel_port": 8 + }, + { + "radio_id": "rh-18", + "acroname_port": 5, + "acroname_module_serial": 882238458, + "patch_panel_port": 18 + }, + { + "radio_id": "rh-19", + "acroname_port": 6, + "acroname_module_serial": 882238458, + "patch_panel_port": 19 + }, + { + "radio_id": "rh-12", + "acroname_port": 7, + "acroname_module_serial": 882238458, + "patch_panel_port": 12 + }, + { + "radio_id": "rh-24", + "acroname_port": 0, + "acroname_module_serial": 3346268699, + "patch_panel_port": 24 + }, + { + "radio_id": "rh-21", + "acroname_port": 1, + "acroname_module_serial": 3346268699, + "patch_panel_port": 21 + }, + { + "radio_id": "rh-09", + "acroname_port": 2, + "acroname_module_serial": 3346268699, + "patch_panel_port": 9 + }, + { + "radio_id": "rh-23", + "acroname_port": 3, + "acroname_module_serial": 3346268699, + "patch_panel_port": 23 + }, + { + "radio_id": "rh-20", + "acroname_port": 0, + "acroname_module_serial": 1960815024, + "patch_panel_port": 20 + }, + { + "radio_id": "rh-15", + "acroname_port": 1, + "acroname_module_serial": 1960815024, + "patch_panel_port": 15 + }, + { + "radio_id": "rh-22", + "acroname_port": 2, + "acroname_module_serial": 1960815024, + "patch_panel_port": 22 + }, + { + "radio_id": "rh-06", + "acroname_port": 3, + "acroname_module_serial": 1960815024, + "patch_panel_port": 6 + }, + { + "radio_id": "rh-13", + "acroname_port": 4, + "acroname_module_serial": 1960815024, + "patch_panel_port": 13 + }, + { + "radio_id": "rh-16", + "acroname_port": 5, + "acroname_module_serial": 1960815024, + "patch_panel_port": 16 + }, + { + "radio_id": "rh-14", + "acroname_port": 6, + "acroname_module_serial": 1960815024, + "patch_panel_port": 14 + }, + { + "radio_id": "rh-05", + "acroname_port": 7, + "acroname_module_serial": 1960815024, + "patch_panel_port": 5 + } + ], + "lab_ini": "/home/rjmcmahon/Code/FiWiControl/configs/default.ini" +} diff --git a/docs/system-test-scripts.md b/docs/system-test-scripts.md index 3830341..b82a428 100644 --- a/docs/system-test-scripts.md +++ b/docs/system-test-scripts.md @@ -25,10 +25,10 @@ Keep **pytest** strict and deterministic. Keep **system scripts** explicit about `**scripts/system/pcie_hotswap_harness.py`** models a **fronthaul (PCIe) hot-swap campaign**: -1. Build a `**Fabric`**: either load `**--fabric-json**` (`**FabricDefinition**` from disk → `**Fabric.rrhs**`, `**rrh_power_ports**`, fingerprint) or build **N placeholder** `**RadioHead`** instances (each with a `**FrontHaul**`) via `**--paths**` and wrap them in `**Fabric**` (optional concentrator `**ssh_node**`, `**power_lock**`). -2. For each **iteration**, run `**asyncio.TaskGroup`**: every RRH runs `**one_cycle**` **concurrently** (stressing shared-resource design: one BrainStem, one rig SSH target, and so on). -3. Each cycle: **log** remove/restore phases ( `**--dry-run`** ) or placeholders for future `**Power**` calls, then optionally **SSH** to the concentrator for a minimal **smoke** command (`uname`, sample `lspci` output). -4. Exit **non-zero** if the async campaign raises (including `**TaskGroup`** child failures), using `**except* Exception**` so `**ExceptionGroup**` surfaces every underlying error. +1. Build a `**Fabric`**: either load `**--fabric-json`** (`**FabricDefinition**` from disk → `**Fabric.rrhs**`, `**rrh_power_ports**`, fingerprint) or build N placeholder `**RadioHead**` instances (each with a `**FrontHaul**`) via `**--paths**` and wrap them in `**Fabric**` (optional concentrator `**ssh_node**`, `**power_lock**`). +2. For each **iteration**, run `**asyncio.TaskGroup`**: every RRH runs `**one_cycle`** **concurrently** (stressing shared-resource design: one BrainStem, one rig SSH target, and so on). +3. Each cycle: **log** remove/restore phases ( `**--dry-run`** ) or placeholders for future `**Power`** calls, then optionally **SSH** to the concentrator for a minimal **smoke** command (`uname`, sample `lspci` output). +4. Exit **non-zero** if the async campaign raises (including `**TaskGroup`** child failures), using `**except* Exception`** so `**ExceptionGroup**` surfaces every underlying error. The script’s module docstring lists **DESIGN_GAPS** — known extension points so harness scope stays explicit. @@ -38,9 +38,9 @@ The script’s module docstring lists **DESIGN_GAPS** — known extension points Full workflow (INI → discovery → prompts → JSON): `**docs/fabric-builder.md`**. -`**pip install -e ".[power]"**` on the workstation that sees the Acroname hub. +`**pip install -e ".[power]"`** on the workstation that sees the Acroname hub. -1. **Fabric builder** — use `**build`** when a lab INI must be loaded first; `**bind**` is the same with INI optional if the default path is missing: +1. **Fabric builder** — use `**build`** when a lab INI must be loaded first; `**bind`** is the same with INI optional if the default path is missing: ```bash python3 -m fiwicontrol.fabric build -o configs/my-fabric.json -c configs/default.ini python3 -m fiwicontrol.fabric bind -o configs/my-fabric.json -c configs/default.ini @@ -49,7 +49,7 @@ Full workflow (INI → discovery → prompts → JSON): `**docs/fabric-builder.m ```bash python3 -m fiwicontrol.fabric status -f configs/my-fabric.json ``` -3. **Harness** — load that graph (optional `**--strict-fabric-ready`** to require `**READY**` status): +3. **Harness** — load that graph (optional `**--strict-fabric-ready`** to require `**READY`** status): ```bash python3 scripts/system/pcie_hotswap_harness.py --fabric-json configs/my-fabric.json --dry-run ``` @@ -60,9 +60,9 @@ Types live under `**fiwicontrol.fabric**` (`**FabricDefinition**`, `**FabricRRHB ## Concentrator dump (`scripts/system/dump_concentrator.py`) -**Purpose:** capture **this machine’s** concentrator-relevant facts in one place: CPU summary from `**/proc/cpuinfo`**, and (by default) a **local host probe** — `**lspci -tv`**, `**/sys/bus/pci/devices/*/current_link_width**` (and related link fields), and `**dmidecode -t baseboard**` when the binary succeeds (often after `**sudo**`, because SMBIOS is not always readable as a normal user). +**Purpose:** capture **this machine’s** concentrator-relevant facts in one place: CPU summary from `**/proc/cpuinfo`**, and (by default) a local host probe — `**lspci -tv`**, `**/sys/bus/pci/devices/*/current_link_width**` (and related link fields), and `**dmidecode -t baseboard**` when the binary succeeds (often after `**sudo**`, because SMBIOS is not always readable as a normal user). -**Default output is human text**, not JSON: a short CPU block; one line with the **total count** of sysfs PCI devices that expose negotiated link width/speed; a **Wi‑Fi / wireless-only** table (`**K of N`**) for PCI class `**0x028…**` (network + wireless) with `**w`/`W**` lanes, **GT/s** current/max, `**class`**, and a **chip** column from `**lspci -nn`** (preferred) or sysfs `**vendor**` / `**device**` hex pair (long chip strings are truncated); a **peek** at the first `**--lspci-lines`** rows of `**lspci -tv**` (default **18**, remainder summarized); and the **first 14 lines** of `**dmidecode -t baseboard`** when that command succeeds (often requires `**sudo**` on Fedora). +**Default output is human text**, not JSON: a short CPU block; one line with the **total count** of sysfs PCI devices that expose negotiated link width/speed; a **Wi‑Fi / wireless-only** table (`**K of N`**) for PCI class `**0x028…`** (network + wireless) with `**w`/`W**` lanes, GT/s current/max, `**class**`, and a **chip** column from `**lspci -nn`** (preferred) or sysfs `**vendor`** / `**device**` hex pair (long chip strings are truncated); a **peek** at the first `**--lspci-lines`** rows of `**lspci -tv`** (default **18**, remainder summarized); and the **first 14 lines** of `**dmidecode -t baseboard`** when that command succeeds (often requires `**sudo`** on Fedora). | Flag | Meaning | @@ -71,10 +71,10 @@ Types live under `**fiwicontrol.fabric**` (`**FabricDefinition**`, `**FabricRRHB | `**--no-host-probe**` | CPU-only; skip `**lspci**`, sysfs PCI enumeration, and `**dmidecode**`. | | `**--pci-sysdir DIR**` | Override `**/sys/bus/pci/devices**` (testing or nonstandard roots). | | `**--pci-all**` | After the Wi‑Fi table, append a second table of **other** “interesting” non-wireless links (wide ports / downgrades), still capped by `**--pci-max-rows`**. | -| `**--pci-max-rows N**` | Cap for the optional second table (default **40**). | -| `**--lspci-lines N`** | Lines of `**lspci -tv**` in human output (**0** = omit that block; default **18**). | +| `**--pci-max-rows N`** | Cap for the optional second table (default **40**). | +| `**--lspci-lines N`** | Lines of `**lspci -tv`** in human output (**0** = omit that block; default **18**). | | `**--label NAME`** | Shown in the human header only. | -| `**--proc-cpuinfo PATH**` | Override `**/proc/cpuinfo**` (tests or chroots). | +| `**--proc-cpuinfo PATH`** | Override `**/proc/cpuinfo**` (tests or chroots). | **Examples:** @@ -90,15 +90,15 @@ sudo python3 scripts/system/dump_concentrator.py python3 scripts/system/dump_concentrator.py --json > /tmp/concentrator.json ``` -**Python API:** `**fiwicontrol.concentrator.ConcentratorPlatform`**, `**ConcentratorPlatformSnapshot**`, `**PciDeviceLinkSnapshot**`, `**format_concentrator_platform_snapshot_human()**` (same layout as the script’s default text; optional `**lspci_nn_by_bdf=**` for tests). Implementation lives in `**src/fiwicontrol/concentrator/host.py**` (package `**fiwicontrol.concentrator**` — local workstation facts, parallel to `**fiwicontrol.radio**` for RRH aggregates; not part of fabric JSON). +**Python API:** `**fiwicontrol.concentrator.ConcentratorPlatform`**, `**ConcentratorPlatformSnapshot`**, `**PciDeviceLinkSnapshot**`, `**format_concentrator_platform_snapshot_human()**` (same layout as the script’s default text; optional `**lspci_nn_by_bdf=**` for tests). Implementation lives in `**src/fiwicontrol/concentrator/host.py**` (package `**fiwicontrol.concentrator**` — local workstation facts, parallel to `**fiwicontrol.radio**` for RRH aggregates; not part of fabric JSON). -When the harness (or your script) loads `**--fabric-json**`, it **merges lab INI by default** (same file as `**fiwicontrol.lab`**: `**FIWI_LAB_INI**`, else `**configs/default.ini**` if present). Pass `**--lab-ini PATH**` to point at another file. Merged keys include optional `**[fabric]**` (`**fabric_id**`, `**concentrator**` → `**[machine.*]**` SSH target) and optional `**[fabric.rrh.]**` to override Acroname port / patch panel / module serial for rows already present in the JSON. Use `**--no-lab-ini**` to skip. JSON supplies `**discovery_fingerprint**` and the RRH binding list (key `**rrhs**`; Python: `**FabricDefinition.rrhs**`) from `**fabric build**` / `**bind**` or `**fabric_realize.py --json**`. +When the harness (or your script) loads `**--fabric-json**`, it **merges lab INI by default** (same file as `**fiwicontrol.lab`**: `**FIWI_LAB_INI`**, else `**configs/default.ini**` if present). Pass `**--lab-ini PATH**` to point at another file. Merged keys include optional `**[fabric]**` (`**fabric_id**`, `**concentrator**` → `**[machine.*]**` SSH target) and optional `**[fabric.rrh.]**` to override Acroname port / patch panel / module serial for rows already present in the JSON. Use `**--no-lab-ini**` to skip. JSON supplies `**discovery_fingerprint**` and the RRH binding list (key `**rrhs**`; Python: `**FabricDefinition.rrhs**`) from `**fabric build**` / `**bind**` or `**fabric_realize.py --json**`. --- ## Acroname discovery smoke test (`scripts/system/test_acroname_usb_discovery.py`) -Runs BrainStem USB enumeration **per `[machine.*]` row** in the lab INI: `**usb=local`** on the workstation you run from, `**usb=remote**` over SSH (same interpreter contract as `**fiwicontrol.power --discovery-json**`). Prints a short table per machine, `**brainstem_version**` from discovery JSON (with an SSH fallback pip probe when the remote build omits that field), and a **total module count** across hosts. +Runs BrainStem USB enumeration **per `[machine.*]` row** in the lab INI: `**usb=local`** on the workstation you run from, `**usb=remote`** over SSH (same interpreter contract as `**fiwicontrol.power --discovery-json**`). Prints a short table per machine, `**brainstem_version**` from discovery JSON (with an SSH fallback pip probe when the remote build omits that field), and a **total module count** across hosts. ```bash python3 scripts/system/test_acroname_usb_discovery.py @@ -110,11 +110,71 @@ Use `**--local-only**` to skip the INI and probe only this machine’s USB. See --- +## Wi-Fi PCI chip discovery (`scripts/system/discover_wifi_pci.py`) + +Prints Wi-Fi / MediaTek-looking `**lspci -nn**` lines with suggested `**pcie_bdf**` values. + +```bash +# Just discover chips/BDFs visible right now +python3 scripts/system/discover_wifi_pci.py + +# JSON output for tooling +python3 scripts/system/discover_wifi_pci.py --json +``` + +### Optional: power all RRHs ON first + +When radios are currently off, add `**--power-all-on**` so PCI devices are present before discovery: + +```bash +python3 scripts/system/discover_wifi_pci.py --power-all-on -f configs/clubhouse-uax-24.json +``` + +`**--power-all-on**` powers both local and relay-host RRH ports (using `**-c/--lab-ini**` for relay resolution): + +```bash +python3 scripts/system/discover_wifi_pci.py --power-all-on -f configs/clubhouse-uax-24.json -c configs/default.ini +``` + +Safety preview (no toggles): + +```bash +python3 scripts/system/discover_wifi_pci.py --power-all-on -f configs/clubhouse-uax-24.json -c configs/default.ini --dry-run +``` + +`**--power-all-on**` uses RRH bindings from the fabric JSON (`**acroname_module_serial**` + `**acroname_port**`) and drives Acroname ports locally via BrainStem and remotely via SSH (relay hosts from the lab INI). + +--- + +## Interactive RRH -> PCI mapping (`scripts/system/assign_rrh_pcie_bdf.py`) + +Use this when you want to assign `**pcie_bdf**` values to each `**radio_id**` and write them to the lab INI: + +```bash +python3 scripts/system/assign_rrh_pcie_bdf.py --power-on-first -f configs/clubhouse-uax-24.json -c configs/default.ini +``` + +Behavior: + +- Powers local + relay RRHs on first (same logic as `**discover_wifi_pci.py --power-all-on**`). +- Lists Wi-Fi PCI candidates from local `**lspci -nn**`. +- Prompts once per `**radio_id**` to pick a candidate index or type a BDF directly. +- Writes/updates `**[fabric.rrh.] pcie_bdf = ...**` in the INI. + +Prompt shortcuts: + +- `**Enter**` — keep existing value +- `**-**` — clear `pcie_bdf` +- `****` — select from candidate list +- `****` — set explicit value (e.g. `**03:00.0**`) + +--- + ## Fabric compose + realize (`scripts/system/fabric_realize.py --realize`) -Loads the lab INI, runs **local** Acroname discovery, `**compose_definition`**, builds `**Fabric**`, then `**await fab.realize()**` (strict fingerprint check against live USB). Default **stdout** is an **OK** line plus `**print(fabric)`** (human `**Fabric.__str__**` summary). Pass `**--json**` for **stdout**-only `**FabricDefinition`** JSON after a successful realize. `**-v**` adds discovery / pre-realize fabric lines on **stderr**; `**--no-strict`** passes `**strict=False**` into `**Fabric.realize()**`. `**--realize-discovery-timeout SEC**` bounds Acroname discovery during `**--realize**` (default **120**). **Exit codes** and FDIR semantics: `**docs/fdir.md`** and `**fabric_realize.py --help**` (epilog). +Loads the lab INI, runs **local** Acroname discovery, `**compose_definition`**, builds `**Fabric`**, then `**await fab.realize()**` (strict fingerprint check against live USB). Default stdout is an OK line plus `**print(fabric)**` (human `**Fabric.__str__**` summary). Pass `**--json**` for **stdout**-only `**FabricDefinition`** JSON after a successful realize. `**-v`** adds discovery / pre-realize fabric lines on **stderr**; `**--no-strict`** passes `**strict=False`** into `**Fabric.realize()**`. `**--realize-discovery-timeout SEC**` bounds Acroname discovery during `**--realize**` (default **120**). **Exit codes** and FDIR semantics: `**docs/fdir.md`** and `**fabric_realize.py --help`** (epilog). -Without `**--realize**`, `**fabric_realize.py**` only composes the definition and prints a **human** workstation report (or `**--json`** / `**-o**` for definition JSON **without** calling `**Fabric.realize()`**). The human report can merge patch-panel labels into the Wi‑Fi PCIe table when `**--patch-panel-json PATH**` is set or when `**_panel.json**` exists beside the lab INI (see `**fiwicontrol.fabric.patch_panel_json**`). +Without `**--realize**`, `**fabric_realize.py**` only composes the definition and prints a **human** workstation report (or `**--json`** / `**-o`** for definition JSON **without** calling `**Fabric.realize()`**). The human report can merge patch-panel labels into the Wi‑Fi PCIe table when `**--patch-panel-json PATH`** is set or when `**_panel.json**` exists beside the lab INI (see `**fiwicontrol.fabric.patch_panel_json**`). --- @@ -125,15 +185,15 @@ Without `**--realize**`, `**fabric_realize.py**` only composes the definition an cd ~/Code/FiWiControl python3 -m pip install -e ".[dev]" ``` -2. **Python 3.11+** — the example uses `**asyncio.TaskGroup`** and `**except* Exception**`. -3. **Optional SSH to the rig** — same contract as elsewhere: passwordless `**root@`** for `**sshtype="ssh"**`. Optional `**FIWI_SSH_CONFIG**` is documented in `**docs/node-control-asyncio-design.md**`. -4. **Power / Acroname** — not wired in the example yet. When you add `**fiwicontrol.power`**, use `**pip install -e ".[power]"**` and follow `**docs/power-control-and-inventory.md**`. +2. **Python 3.11+** — the example uses `**asyncio.TaskGroup`** and `**except* Exception`**. +3. **Optional SSH to the rig** — same contract as elsewhere: passwordless `**root@`** for `**sshtype="ssh"`**. Optional `**FIWI_SSH_CONFIG**` is documented in `**docs/node-control-asyncio-design.md**`. +4. **Power / Acroname** — not wired in the example yet. When you add `**fiwicontrol.power`**, use `**pip install -e ".[power]"`** and follow `**docs/power-control-and-inventory.md**`. --- ## How to run the example -From the **repository root** (the script prepends `**src`** to `**sys.path**` if needed): +From the **repository root** (the script prepends `**src`** to `**sys.path`** if needed): ```bash # Safe: no SSH, no hardware — exercises structure only @@ -154,7 +214,7 @@ python3 scripts/system/pcie_hotswap_harness.py --dry-run --paths 2 --rig-ip 192. | `**--fabric-json PATH**` | Load `**FabricDefinition**` from JSON; sets `**Fabric.rrhs**` and `**rrh_power_ports**`. Without it, uses `**--paths**` placeholders. | | `**--lab-ini PATH**` | Lab INI merged after JSON (default: `**FIWI_LAB_INI**`, else `**configs/default.ini**` if present). | | `**--no-lab-ini**` | Skip INI merge; JSON only. | -| `**--strict-fabric-ready**` | Exit **2** unless `**Fabric.binding_cache_status`** is `**READY**` (requires live Acroname discovery). Only meaningful with `**--fabric-json**`. | +| `**--strict-fabric-ready**` | Exit **2** unless `**Fabric.binding_cache_status`** is `**READY`** (requires live Acroname discovery). Only meaningful with `**--fabric-json**`. | | `**--dry-run**` | Log only; no programmable power (none hooked up in this skeleton). | | `**--paths N**` | Placeholder RRH count (ignored when `**--fabric-json**` is set). | | `**--iterations M**` | Outer loop: run `**M**` sequential `**TaskGroup**` rounds. | @@ -168,7 +228,7 @@ python3 scripts/system/pcie_hotswap_harness.py --dry-run --paths 2 --rig-ip 192. ### 1. Thin `main()` — parse, configure logging, call `asyncio.run` -Keep **I/O policy** (flags, env) in `**main()`**. Keep **async** logic in `**async def`** functions so tests or imports can reuse the coroutines without a second event loop. +Keep **I/O policy** (flags, env) in `**main()`**. Keep async logic in `**async def`** functions so tests or imports can reuse the coroutines without a second event loop. ### 2. One coroutine per “story”: `one_cycle`, `run_campaign` @@ -176,7 +236,7 @@ Name coroutines after **user-visible steps** (cycle, campaign, smoke). Pass **ex ### 3. Concurrency with `TaskGroup` -When multiple RRHs run together, `**async with asyncio.TaskGroup() as tg:`** + `**tg.create_task(...)**` fails fast and bundles errors in an `**ExceptionGroup**`. Catch with `**except* Exception**` at the boundary that owns `**asyncio.run**`, log each sub-exception, and return a process exit code. +When multiple RRHs run together, `**async with asyncio.TaskGroup() as tg:`** + `**tg.create_task(...)`** fails fast and bundles errors in an `**ExceptionGroup**`. Catch with `**except* Exception**` at the boundary that owns `**asyncio.run**`, log each sub-exception, and return a process exit code. ### 4. Dry-run first @@ -184,11 +244,11 @@ Always provide a path that **does not touch hardware** so engineers can validate ### 5. Domain types from the library -Attach `**FrontHaul`** to `**RadioHead**` even when fields are `**None**` — it documents **intent** and keeps the harness aligned with production models. Pass a `**Fabric`** into the async campaign so **shared** resources (concentrator SSH, bench `**Power`**, `**asyncio.Lock**`, `**rrh_power_ports**`) have one home. Prefer `**--fabric-json**` (bound once via `**python3 -m fiwicontrol.fabric bind**`) over ad hoc placeholders; reserve `**--paths**` for laptop-only smoke. +Attach `**FrontHaul`** to `**RadioHead`** even when fields are `**None**` — it documents **intent** and keeps the harness aligned with production models. Pass a `**Fabric`** into the async campaign so **shared** resources (concentrator SSH, bench `**Power`**, `**asyncio.Lock`**, `**rrh_power_ports**`) have one home. Prefer `**--fabric-json**` (bound once via `**python3 -m fiwicontrol.fabric bind**`) over ad hoc placeholders; reserve `**--paths**` for laptop-only smoke. ### 6. Remote checks via `ssh_node` -Use `**await node.rexec(cmd="...", ...)**` for one-shot remote work. For **periodic** sampling, prefer `**Command`** / `**CommandManager**` from `**fiwicontrol.commands**` (see `**docs/node-control-asyncio-design.md**`). +Use `**await node.rexec(cmd="...", ...)**` for one-shot remote work. For **periodic** sampling, prefer `**Command`** / `**CommandManager`** from `**fiwicontrol.commands**` (see `**docs/node-control-asyncio-design.md**`). ### 7. Document gaps in the script @@ -198,7 +258,7 @@ A short **DESIGN_GAPS** or **TODO** block at the top of the harness documents ho ## Checklist for a new system script -1. [ ] Lives under `**scripts/system/`** with a `**#!/usr/bin/env python3**` shebang. +1. [ ] Lives under `**scripts/system/`** with a `**#!/usr/bin/env python3`** shebang. 2. [ ] `**argparse**` (or equivalent) documents every assumption; `**--help**` is accurate. 3. [ ] `**--dry-run**` (or equivalent) when hardware is involved. 4. [ ] `**logging**` at INFO for operator visibility; avoid `**print**` for control flow. @@ -212,7 +272,7 @@ A short **DESIGN_GAPS** or **TODO** block at the top of the harness documents ho ## Related docs - `**docs/pcie-hotswap-setup.md`** — PCIe harness prerequisites and JSON generation. -- `**docs/fabric-builder.md**` — lab INI + `**python3 -m fiwicontrol.fabric build**` / `**bind**`. +- `**docs/fabric-builder.md`** — lab INI + `**python3 -m fiwicontrol.fabric build**` / `**bind**`. - `**docs/install.md**` — workstation and rig setup, `**pip install -e**`. - `**docs/node-control-asyncio-design.md**` — `**ssh_node**`, `**Command**`, timeouts, running tests. - `**docs/power-control-and-inventory.md**` — Acroname / Monsoon, INI, `**--verify-inventory**`. diff --git a/scripts/system/assign_rrh_pcie_bdf.py b/scripts/system/assign_rrh_pcie_bdf.py new file mode 100644 index 0000000..ea23a2d --- /dev/null +++ b/scripts/system/assign_rrh_pcie_bdf.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026 Umber +# +# Licensed under the Apache License, Version 2.0; see LICENSE. + +"""Assign PCI BDFs to RRH IDs and write [fabric.rrh.*] pcie_bdf in INI. + +Workflow: +1) Optionally power all RRHs on (local + remote) using discover_wifi_pci helpers. +2) Discover Wi-Fi/MediaTek PCI candidates from local lspci -nn. +3) Prompt for pcie_bdf per radio_id from fabric JSON. +4) Write/update [fabric.rrh.] pcie_bdf in lab INI. +""" + +from __future__ import annotations + +import argparse +import configparser +import re +from pathlib import Path + +from fiwicontrol.fabric.fabric import FabricDefinition +from fiwicontrol.lab.inventory_config import default_lab_ini_path + +from discover_wifi_pci import _main_async as _discover_wifi_main_async + +_REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _resolve(path_text: str) -> Path: + p = Path(path_text).expanduser() + if not p.is_absolute(): + p = (_REPO_ROOT / p).resolve() + return p + + +def _wifi_candidates_from_lspci() -> list[tuple[str, str]]: + import subprocess + + p = subprocess.run(["lspci", "-nn"], capture_output=True, text=True, check=False) + if p.returncode != 0: + raise SystemExit(p.stderr or "lspci failed") + pat = re.compile(r"^([0-9a-f:.]+)\s+", re.IGNORECASE) + want = re.compile( + r"mediatek|mt79|7915|7925|7921|wireless|network controller.*802\.11|wi[- ]?fi", + re.IGNORECASE, + ) + out: list[tuple[str, str]] = [] + for line in p.stdout.splitlines(): + t = line.strip() + if not want.search(t): + continue + m = pat.match(t) + if not m: + continue + out.append((m.group(1).lower(), t)) + return out + + +def _load_or_create_ini(path: Path) -> configparser.ConfigParser: + cp = configparser.ConfigParser() + cp.read(path, encoding="utf-8") + return cp + + +def _write_ini(path: Path, cp: configparser.ConfigParser) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + cp.write(f) + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("-f", "--fabric-json", default="configs/clubhouse-uax-24.json") + ap.add_argument("-c", "--lab-ini", default=str(default_lab_ini_path())) + ap.add_argument( + "--power-on-first", + action="store_true", + help="Power local and remote RRHs on before lspci discovery", + ) + ap.add_argument( + "--dry-run", + action="store_true", + help="With --power-on-first, print power actions without toggling", + ) + args = ap.parse_args() + + fabric_json = _resolve(args.fabric_json) + lab_ini = _resolve(args.lab_ini) + if not fabric_json.is_file(): + raise SystemExit("Fabric JSON not found: {}".format(fabric_json)) + if not lab_ini.is_file(): + raise SystemExit("Lab INI not found: {}".format(lab_ini)) + + if args.power_on_first: + ns = argparse.Namespace( + json=False, + power_all_on=True, + power_remote_on=True, + fabric_json=str(fabric_json), + lab_ini=lab_ini, + dry_run=bool(args.dry_run), + ) + import asyncio + + asyncio.run(_discover_wifi_main_async(ns)) + + fd = FabricDefinition.load(fabric_json) + rrhs = sorted(fd.rrhs, key=lambda r: r.radio_id) + if not rrhs: + raise SystemExit("No RRHs in {}".format(fabric_json)) + + cands = _wifi_candidates_from_lspci() + if not cands: + raise SystemExit("No Wi-Fi PCI candidates found from lspci -nn") + + print("\nPCI candidates:") + for i, (bdf, line) in enumerate(cands, start=1): + print(" [{:>2}] {:<12} {}".format(i, bdf, line)) + + cp = _load_or_create_ini(lab_ini) + print("\nAssign pcie_bdf per radio_id (Enter to keep existing, '-' to clear).") + for rrh in rrhs: + sec = "fabric.rrh.{}".format(rrh.radio_id) + existing = cp.get(sec, "pcie_bdf", fallback="").strip() if cp.has_section(sec) else "" + prompt = "radio_id={} current=[{}] choose index or bdf: ".format(rrh.radio_id, existing) + raw = input(prompt).strip() + if raw == "": + continue + if raw == "-": + if cp.has_section(sec): + cp.remove_option(sec, "pcie_bdf") + continue + if raw.isdigit(): + idx = int(raw) + if idx < 1 or idx > len(cands): + print(" skip: invalid index {}".format(raw)) + continue + bdf = cands[idx - 1][0] + else: + bdf = raw.lower() + if not cp.has_section(sec): + cp.add_section(sec) + cp.set(sec, "pcie_bdf", bdf) + print(" set {} pcie_bdf={}".format(sec, bdf)) + + _write_ini(lab_ini, cp) + print("\nUpdated {}".format(lab_ini)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/system/discover_wifi_pci.py b/scripts/system/discover_wifi_pci.py index 5cdff32..a007900 100644 --- a/scripts/system/discover_wifi_pci.py +++ b/scripts/system/discover_wifi_pci.py @@ -13,9 +13,24 @@ for hub ports and module serials. from __future__ import annotations import argparse +import asyncio +import configparser import re import subprocess import sys +from pathlib import Path + +from fiwicontrol.commands.node_control import ssh_node +from fiwicontrol.fabric.fabric import FabricDefinition +from fiwicontrol.lab.discovery import ( + discover_acroname_modules, + discover_devices_remote_async, + parse_remote_discovery_payload, +) +from fiwicontrol.lab.inventory_config import default_lab_ini_path, load_inventory_ini +from fiwicontrol.fabric.ini_merge import parse_fabric_ini_overlay + +_REPO_ROOT = Path(__file__).resolve().parents[2] def _run_lspci() -> str: @@ -35,14 +50,261 @@ def _run_lspci() -> str: return p.stdout -def main() -> int: - ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - ap.add_argument( - "--json", - action="store_true", - help="Emit one JSON object per match: bdf, line", +def _resolve_json_path(fabric_json: str) -> Path: + p = Path(fabric_json).expanduser() + if not p.is_absolute(): + p = (_REPO_ROOT / p).resolve() + if p.is_file(): + return p + raise SystemExit("Fabric JSON not found: {}".format(p)) + + +async def _power_all_rrh_ports_on( + *, + fabric_json: str, + dry_run: bool, + show_skips: bool = True, + chip_by_radio_id: dict[str, str] | None = None, +) -> None: + path = _resolve_json_path(fabric_json) + fd = FabricDefinition.load(path) + rrhs = list(fd.rrhs) + if not rrhs: + raise SystemExit("No RRHs in {}".format(path)) + modules = discover_acroname_modules() + by_serial = {int(m.serial_number): m for m in modules} + done: set[tuple[int, int]] = set() + missing_serials: set[int] = set() + for rrh in rrhs: + if rrh.acroname_module_serial is None: + raise RuntimeError("{} missing acroname_module_serial in {}".format(rrh.radio_id, path)) + serial = int(rrh.acroname_module_serial) + port = int(rrh.acroname_port) + key = (serial, port) + if key in done: + continue + mod = by_serial.get(serial) + if mod is None: + missing_serials.add(serial) + if show_skips: + print( + "SKIP: radio_id={} module={} port={} (module not on local USB host)".format( + rrh.radio_id, serial, port + ), + file=sys.stderr, + ) + continue + if dry_run: + chip = (chip_by_radio_id or {}).get(rrh.radio_id, "unknown") + print( + "DRY-RUN ON: radio_id={} module={} port={} chip={}".format( + rrh.radio_id, serial, port, chip + ) + ) + else: + await asyncio.to_thread(_set_port_enabled_by_serial_sync, serial=serial, port=port, enabled=True) + chip = (chip_by_radio_id or {}).get(rrh.radio_id, "unknown") + print("ON: radio_id={} module={} port={} chip={}".format(rrh.radio_id, serial, port, chip)) + done.add(key) + if missing_serials and show_skips: + msg = ", ".join(str(s) for s in sorted(missing_serials)) + print( + "Note: skipped remote/non-local Acroname serial(s): {}. " + "Only modules attached to this machine can be powered here.".format(msg), + file=sys.stderr, + ) + + +async def _discover_remote_serial_hosts(*, lab_ini: Path) -> dict[int, tuple[str, str, str]]: + """Map module serial -> (host_name, ipaddr, sshtype) for relay hosts in lab INI.""" + doc = load_inventory_ini(lab_ini) + out: dict[int, tuple[str, str, str]] = {} + for host in doc.hosts: + if host.mode != "relay" or not host.ipaddr: + continue + node = ssh_node( + name=host.name, + ipaddr=host.ipaddr, + ssh_controlmaster=True, + sshtype=host.sshtype, + silent_mode=True, + ) + payload = await discover_devices_remote_async(node) + mods, _ = parse_remote_discovery_payload(payload) + for m in mods: + out[int(m.serial_number)] = (host.name, host.ipaddr, host.sshtype) + return out + + +async def _set_port_enabled_remote( + *, + host_name: str, + ipaddr: str, + sshtype: str, + serial: int, + port: int, + enabled: bool, +) -> None: + node = ssh_node( + name=host_name, + ipaddr=ipaddr, + ssh_controlmaster=True, + sshtype=sshtype, + silent_mode=True, ) - args = ap.parse_args() + py_bool = "True" if enabled else "False" + cmd = ( + "python3 -c 'import brainstem.discover as d; " + "from brainstem import stem; " + "from brainstem.result import Result; " + "serial={serial}; port={port}; enabled={enabled}; " + "specs=d.findAllModules(d.Spec.USB, buffer_length=128); " + "spec=next((s for s in specs if int(s.serial_number)==serial), None); " + "assert spec is not None, f\"serial {{serial}} not found\"; " + "m={{17: stem.USBHub2x4, 19: stem.USBHub3p, 24: stem.USBHub3c}}.get(int(spec.model)); " + "assert m is not None, f\"unsupported model {{spec.model}}\"; " + "h=m(); " + "err=int(h.connectFromSpec(spec)); " + "assert err==Result.NO_ERROR, f\"connectFromSpec err={{err}}\"; " + "err=int(h.hub.port[port].setEnabled(enabled)); " + "h.disconnect(); " + "assert err==Result.NO_ERROR, f\"setEnabled err={{err}}\"'" + ).format(serial=int(serial), port=int(port), enabled=py_bool) + session = await node.rexec(cmd=cmd, IO_TIMEOUT=30.0, CMD_TIMEOUT=90, CONNECT_TIMEOUT=30.0) + out = session.results.decode("utf-8", errors="replace") + if "Traceback" in out or "AssertionError" in out: + raise RuntimeError("remote setEnabled failed on {}: {}".format(ipaddr, out.strip())) + + +def _set_port_enabled_by_serial_sync(*, serial: int, port: int, enabled: bool) -> None: + import brainstem.discover as discover + from brainstem import stem + from brainstem.result import Result + + specs = discover.findAllModules(discover.Spec.USB, buffer_length=128) + spec = next((s for s in specs if int(s.serial_number) == int(serial)), None) + if spec is None: + raise RuntimeError("module serial {} not found on local USB".format(serial)) + model_map = {17: stem.USBHub2x4, 19: stem.USBHub3p, 24: stem.USBHub3c} + cls = model_map.get(int(spec.model)) + if cls is None: + raise RuntimeError("unsupported Acroname model {} for serial {}".format(spec.model, serial)) + hub = cls() + err = int(hub.connectFromSpec(spec)) + if err != Result.NO_ERROR: + raise RuntimeError("connectFromSpec failed for serial {}: {}".format(serial, err)) + try: + err = int(hub.hub.port[int(port)].setEnabled(bool(enabled))) + if err != Result.NO_ERROR: + raise RuntimeError( + "setEnabled failed for serial {} port {} enabled {}: {}".format(serial, port, enabled, err) + ) + finally: + try: + hub.disconnect() + except Exception: + pass + + +def _bdf_to_chip_line_map() -> dict[str, str]: + text = _run_lspci() + pat = re.compile(r"^([0-9a-f:.]+)\s+(.+)$", re.IGNORECASE) + out: dict[str, str] = {} + for line in text.splitlines(): + t = line.strip() + m = pat.match(t) + if not m: + continue + out[m.group(1).lower()] = m.group(2).strip() + return out + + +def _radio_id_chip_map_from_ini(lab_ini: Path) -> dict[str, str]: + cp = configparser.ConfigParser() + cp.read(lab_ini, encoding="utf-8") + by_bdf = _bdf_to_chip_line_map() + out: dict[str, str] = {} + prefix = "fabric.rrh." + for sec in cp.sections(): + if not sec.startswith(prefix): + continue + rid = sec[len(prefix) :].strip() + if not rid: + continue + bdf = cp.get(sec, "pcie_bdf", fallback="").strip().lower() + if not bdf: + continue + out[rid] = by_bdf.get(bdf, "unknown") + return out + + +async def _main_async(args: argparse.Namespace) -> int: + chip_by_radio_id: dict[str, str] = {} + if args.lab_ini is not None: + ini_for_chip = args.lab_ini.expanduser().resolve() + else: + ini_for_chip = default_lab_ini_path().expanduser().resolve() + if ini_for_chip.is_file(): + try: + chip_by_radio_id = _radio_id_chip_map_from_ini(ini_for_chip) + except Exception: + chip_by_radio_id = {} + + if args.power_all_on: + await _power_all_rrh_ports_on( + fabric_json=args.fabric_json, + dry_run=args.dry_run, + show_skips=False, + chip_by_radio_id=chip_by_radio_id, + ) + lab_ini = (args.lab_ini if args.lab_ini is not None else default_lab_ini_path()).expanduser().resolve() + if not lab_ini.is_file(): + raise SystemExit("Lab INI not found for remote power-on: {}".format(lab_ini)) + path = _resolve_json_path(args.fabric_json) + fd = FabricDefinition.load(path) + remote_map = await _discover_remote_serial_hosts(lab_ini=lab_ini) + done: set[tuple[int, int]] = set() + for rrh in fd.rrhs: + if rrh.acroname_module_serial is None: + continue + serial = int(rrh.acroname_module_serial) + port = int(rrh.acroname_port) + key = (serial, port) + if key in done: + continue + host = remote_map.get(serial) + if host is None: + continue + host_name, ipaddr, sshtype = host + if args.dry_run: + print( + "DRY-RUN REMOTE ON: radio_id={} host={} module={} port={} chip={}".format( + rrh.radio_id, + ipaddr, + serial, + port, + chip_by_radio_id.get(rrh.radio_id, "unknown"), + ) + ) + else: + await _set_port_enabled_remote( + host_name=host_name, + ipaddr=ipaddr, + sshtype=sshtype, + serial=serial, + port=port, + enabled=True, + ) + print( + "ON(remote): radio_id={} host={} module={} port={} chip={}".format( + rrh.radio_id, + ipaddr, + serial, + port, + chip_by_radio_id.get(rrh.radio_id, "unknown"), + ) + ) + done.add(key) text = _run_lspci() # Network controller (class 02xx) wireless often 0280; MediaTek / MT7915 etc. in product string. @@ -82,6 +344,34 @@ def main() -> int: ) return 0 + ini_for_map = (args.lab_ini if args.lab_ini is not None else default_lab_ini_path()).expanduser().resolve() + if ini_for_map.is_file(): + try: + overlay = parse_fabric_ini_overlay(ini_for_map) + if overlay.rrh_overrides: + print("--- Current RRH -> pcie_bdf mapping from INI ---\n") + any_row = False + cp = configparser.ConfigParser() + cp.read(ini_for_map, encoding="utf-8") + for rid in sorted(overlay.rrh_overrides.keys()): + row = overlay.rrh_overrides[rid] + if getattr(row, "patch_panel_port", None) is None and getattr(row, "acroname_port", None) is None: + # still show if pcie_bdf exists via raw read below + pass + sec = "fabric.rrh.{}".format(rid) + if cp.has_section(sec): + bdf = cp.get(sec, "pcie_bdf", fallback="").strip() + pp = cp.get(sec, "patch_panel_port", fallback="").strip() + if bdf: + any_row = True + pp_txt = pp if pp else "-" + print(" {:<12} pcie_bdf={:<12} patch_panel_port={}".format(rid, bdf, pp_txt)) + if not any_row: + print(" (no pcie_bdf entries in [fabric.rrh.*] yet)") + print() + except Exception as exc: + print("Warning: could not read INI mapping table from {}: {}".format(ini_for_map, exc), file=sys.stderr) + print("--- Wi‑Fi / MediaTek‑looking PCI devices (for optional [fabric.rrh.*] pcie_bdf=) ---\n") for bdf, ln in matches: print(ln) @@ -99,5 +389,44 @@ def main() -> int: return 0 +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument( + "--json", + action="store_true", + help="Emit one JSON object per match: bdf, line", + ) + ap.add_argument( + "--power-all-on", + action="store_true", + help="Before lspci, power ON all RRH ports from --fabric-json (local + relay hosts)", + ) + ap.add_argument( + "--power-remote-on", + action="store_true", + help="Compatibility no-op: remote power-on is already included in --power-all-on", + ) + ap.add_argument( + "-f", + "--fabric-json", + default="configs/clubhouse-uax-24.json", + help="FabricDefinition JSON path used with --power-all-on", + ) + ap.add_argument( + "-c", + "--lab-ini", + type=Path, + default=None, + help="Lab INI path for --power-remote-on (default: configs/default.ini)", + ) + ap.add_argument( + "--dry-run", + action="store_true", + help="With --power-all-on, print ON actions without toggling ports", + ) + args = ap.parse_args() + return asyncio.run(_main_async(args)) + + if __name__ == "__main__": raise SystemExit(main()) diff --git a/scripts/system/map_fiber_ports.py b/scripts/system/map_fiber_ports.py new file mode 100644 index 0000000..dbab56c --- /dev/null +++ b/scripts/system/map_fiber_ports.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026 Umber +# +# Licensed under the Apache License, Version 2.0; see LICENSE. + +"""Interactive helper: all RRH ports off, then one-by-one power on for fiber mapping. + +Typical workflow: + 1) Turn all RRH ports in a fabric JSON OFF. + 2) For each RRH binding, turn ON only that one port. + 3) Operator observes which fiber/link comes up and records mapping. + 4) Script turns the port back OFF and moves to the next RRH. +""" + +from __future__ import annotations + +import argparse +import asyncio +from pathlib import Path + +from fiwicontrol.fabric.fabric import FabricDefinition, FabricRRHBinding +from fiwicontrol.lab.discovery import discover_acroname_modules +from fiwicontrol.power.acroname import AcronamePower + +_REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _resolve_json_path(fabric_json: str) -> Path: + p = Path(fabric_json).expanduser() + if not p.is_absolute(): + p = (_REPO_ROOT / p).resolve() + if p.is_file(): + return p + cfg_dir = (_REPO_ROOT / "configs").resolve() + candidates = sorted(cfg_dir.glob("*.json")) if cfg_dir.is_dir() else [] + if len(candidates) == 1: + print("Using fabric JSON: {}".format(candidates[0])) + return candidates[0] + msg = ["Fabric JSON not found: {}".format(p)] + if candidates: + msg.append("Found candidates under configs/:") + msg.extend(" - {}".format(c) for c in candidates) + else: + msg.append("No *.json files found under configs/.") + msg.append("Pass --fabric-json PATH") + raise SystemExit("\n".join(msg)) + + +def _module_map(): + modules = discover_acroname_modules() + return {int(m.serial_number): m for m in modules} + + +def _key(rrh: FabricRRHBinding) -> tuple[int, int]: + if rrh.acroname_module_serial is None: + raise RuntimeError("RRH {} missing acroname_module_serial".format(rrh.radio_id)) + return int(rrh.acroname_module_serial), int(rrh.acroname_port) + + +async def _all_off(rrhs: list[FabricRRHBinding]) -> None: + by_serial = _module_map() + done: set[tuple[int, int]] = set() + for rrh in rrhs: + serial, port = _key(rrh) + k = (serial, port) + if k in done: + continue + mod = by_serial.get(serial) + if mod is None: + raise RuntimeError("Acroname module serial {} not found on local USB".format(serial)) + await AcronamePower(mod).port_off(port) + done.add(k) + print("OFF: module={} port={}".format(serial, port)) + + +async def _step_map(rrhs: list[FabricRRHBinding], dwell: float) -> None: + by_serial = _module_map() + for idx, rrh in enumerate(rrhs, start=1): + serial, port = _key(rrh) + mod = by_serial.get(serial) + if mod is None: + raise RuntimeError("Acroname module serial {} not found on local USB".format(serial)) + ap = AcronamePower(mod) + + print( + "\n[{}/{}] radio_id={} module={} port={} patch_panel_port={}".format( + idx, + len(rrhs), + rrh.radio_id, + serial, + port, + rrh.patch_panel_port if rrh.patch_panel_port is not None else "", + ) + ) + input("Press Enter to power ON this port...") + await ap.port_on(port) + print("ON: module={} port={}".format(serial, port)) + input("Observe fiber/link, then press Enter to power OFF this port...") + await ap.port_off(port) + print("OFF: module={} port={}".format(serial, port)) + if dwell > 0: + await asyncio.sleep(dwell) + + +async def _run(fabric_json: str, dwell: float, skip_all_off: bool) -> None: + path = _resolve_json_path(fabric_json) + fd = FabricDefinition.load(path) + rrhs = list(fd.rrhs) + if not rrhs: + raise SystemExit("No RRHs in {}".format(path)) + print("Loaded {} RRH bindings from {}".format(len(rrhs), path)) + if not skip_all_off: + print("\nPhase 1: powering OFF all RRH ports...") + await _all_off(rrhs) + print("\nPhase 2: one-by-one port mapping...") + await _step_map(rrhs, dwell=dwell) + print("\nDone.") + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument( + "-f", + "--fabric-json", + default="configs/my-fabric.json", + help="Path to FabricDefinition JSON (default: configs/my-fabric.json)", + ) + p.add_argument( + "--dwell-seconds", + type=float, + default=0.0, + help="Optional pause after each OFF transition (default: 0)", + ) + p.add_argument( + "--skip-all-off", + action="store_true", + help="Skip initial global power-off phase", + ) + args = p.parse_args() + asyncio.run( + _run( + fabric_json=args.fabric_json, + dwell=max(0.0, float(args.dwell_seconds)), + skip_all_off=bool(args.skip_all_off), + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/system/power_only_7915.py b/scripts/system/power_only_7915.py index 150f29b..e55bd58 100644 --- a/scripts/system/power_only_7915.py +++ b/scripts/system/power_only_7915.py @@ -3,105 +3,9 @@ # # Licensed under the Apache License, Version 2.0; see LICENSE. -"""Power down all RRHs except one target radio_id (default: 7915).""" +"""Deprecated wrapper for ``rrh_power_control.py``.""" -from __future__ import annotations - -import argparse -import asyncio -from pathlib import Path - -from fiwicontrol.fabric.fabric import FabricDefinition -from fiwicontrol.lab.discovery import discover_acroname_modules -from fiwicontrol.power.acroname import AcronamePower - -_REPO_ROOT = Path(__file__).resolve().parents[2] - - -async def _power_only_one(*, fabric_json: str, keep_radio_id: str, dry_run: bool) -> None: - json_path = Path(fabric_json).expanduser() - if not json_path.is_absolute(): - json_path = (_REPO_ROOT / json_path).resolve() - if not json_path.is_file(): - cfg_dir = (_REPO_ROOT / "configs").resolve() - candidates = sorted(cfg_dir.glob("*.json")) if cfg_dir.is_dir() else [] - if len(candidates) == 1: - json_path = candidates[0] - print("Using fabric JSON: {}".format(json_path)) - else: - msg = ["Fabric JSON not found: {}".format(json_path)] - if candidates: - msg.append("Found candidates under configs/:") - msg.extend(" - {}".format(p) for p in candidates) - else: - msg.append("No *.json files found under configs/.") - msg.append("Pass --fabric-json PATH") - raise SystemExit("\n".join(msg)) - fd = FabricDefinition.load(json_path) - rrhs = {h.radio_id: h for h in fd.rrhs} - if keep_radio_id not in rrhs: - raise SystemExit("radio_id {!r} not found in {}".format(keep_radio_id, json_path)) - - modules = discover_acroname_modules() - by_serial = {int(m.serial_number): m for m in modules} - - keep = rrhs[keep_radio_id] - keep_key = (keep.acroname_module_serial, keep.acroname_port) - print( - "Keeping ON: radio_id={} module={} port={}".format( - keep_radio_id, keep_key[0], keep_key[1] - ) - ) - - for h in fd.rrhs: - key = (h.acroname_module_serial, h.acroname_port) - if key == keep_key: - continue - if h.acroname_module_serial is None: - raise RuntimeError( - "{}: missing acroname_module_serial in {}".format(h.radio_id, json_path) - ) - serial = int(h.acroname_module_serial) - mod = by_serial.get(serial) - if mod is None: - raise RuntimeError( - "{}: module serial {} not found on local USB".format(h.radio_id, serial) - ) - if dry_run: - print("DRY-RUN OFF: radio_id={} module={} port={}".format(h.radio_id, serial, h.acroname_port)) - continue - ap = AcronamePower(mod) - await ap.port_off(h.acroname_port) - print("OFF: radio_id={} module={} port={}".format(h.radio_id, serial, h.acroname_port)) - - -def main() -> int: - p = argparse.ArgumentParser(description=__doc__) - p.add_argument( - "-f", - "--fabric-json", - default="configs/my-fabric.json", - help="Path to FabricDefinition JSON (default: configs/my-fabric.json)", - ) - p.add_argument( - "--keep-radio-id", - default="7915", - help="radio_id to keep powered on (default: 7915)", - ) - p.add_argument( - "--dry-run", - action="store_true", - help="Print actions without toggling Acroname ports", - ) - args = p.parse_args() - asyncio.run( - _power_only_one( - fabric_json=args.fabric_json, - keep_radio_id=args.keep_radio_id, - dry_run=args.dry_run, - ) - ) - return 0 +from rrh_power_control import main if __name__ == "__main__": diff --git a/scripts/system/rrh_power_control.py b/scripts/system/rrh_power_control.py new file mode 100644 index 0000000..218f355 --- /dev/null +++ b/scripts/system/rrh_power_control.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026 Umber +# +# Licensed under the Apache License, Version 2.0; see LICENSE. + +"""Power control for RRHs in a FabricDefinition via Acroname ports. + +Default behavior keeps one target ``radio_id`` ON and powers OFF all others. +Use ``--all-off`` to power OFF every RRH port in the fabric JSON. +""" + +from __future__ import annotations + +import argparse +import asyncio +from pathlib import Path + +from fiwicontrol.fabric.fabric import FabricDefinition +from fiwicontrol.lab.discovery import discover_acroname_modules +from fiwicontrol.power.acroname import AcronamePower + +_REPO_ROOT = Path(__file__).resolve().parents[2] + + +async def _power_only_one( + *, + fabric_json: str, + keep_radio_id: str | None, + all_off: bool, + dry_run: bool, +) -> None: + json_path = Path(fabric_json).expanduser() + if not json_path.is_absolute(): + json_path = (_REPO_ROOT / json_path).resolve() + if not json_path.is_file(): + cfg_dir = (_REPO_ROOT / "configs").resolve() + candidates = sorted(cfg_dir.glob("*.json")) if cfg_dir.is_dir() else [] + if len(candidates) == 1: + json_path = candidates[0] + print("Using fabric JSON: {}".format(json_path)) + else: + msg = ["Fabric JSON not found: {}".format(json_path)] + if candidates: + msg.append("Found candidates under configs/:") + msg.extend(" - {}".format(p) for p in candidates) + else: + msg.append("No *.json files found under configs/.") + msg.append("Pass --fabric-json PATH") + raise SystemExit("\n".join(msg)) + fd = FabricDefinition.load(json_path) + rrhs = {h.radio_id: h for h in fd.rrhs} + if not all_off and keep_radio_id not in rrhs: + raise SystemExit("radio_id {!r} not found in {}".format(keep_radio_id, json_path)) + + modules = discover_acroname_modules() + by_serial = {int(m.serial_number): m for m in modules} + + keep_key: tuple[int | None, int] | None = None + if all_off: + print("Mode: all-off (powering OFF every RRH port in fabric JSON)") + else: + keep = rrhs[keep_radio_id] + keep_key = (keep.acroname_module_serial, keep.acroname_port) + print( + "Keeping ON: radio_id={} module={} port={}".format( + keep_radio_id, keep_key[0], keep_key[1] + ) + ) + + for h in fd.rrhs: + key = (h.acroname_module_serial, h.acroname_port) + if keep_key is not None and key == keep_key: + continue + if h.acroname_module_serial is None: + raise RuntimeError( + "{}: missing acroname_module_serial in {}".format(h.radio_id, json_path) + ) + serial = int(h.acroname_module_serial) + mod = by_serial.get(serial) + if mod is None: + raise RuntimeError( + "{}: module serial {} not found on local USB".format(h.radio_id, serial) + ) + if dry_run: + print("DRY-RUN OFF: radio_id={} module={} port={}".format(h.radio_id, serial, h.acroname_port)) + continue + ap = AcronamePower(mod) + await ap.port_off(h.acroname_port) + print("OFF: radio_id={} module={} port={}".format(h.radio_id, serial, h.acroname_port)) + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument( + "-f", + "--fabric-json", + default="configs/my-fabric.json", + help="Path to FabricDefinition JSON (default: configs/my-fabric.json)", + ) + p.add_argument( + "--keep-radio-id", + default="7915", + help="radio_id to keep powered on (default: 7915, ignored with --all-off)", + ) + p.add_argument( + "--all-off", + action="store_true", + help="Power off all RRH ports defined in the fabric JSON", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Print actions without toggling Acroname ports", + ) + args = p.parse_args() + asyncio.run( + _power_only_one( + fabric_json=args.fabric_json, + keep_radio_id=args.keep_radio_id if not args.all_off else None, + all_off=args.all_off, + dry_run=args.dry_run, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/fiwicontrol/fabric/builder.py b/src/fiwicontrol/fabric/builder.py index ae3e1d1..2047ad5 100644 --- a/src/fiwicontrol/fabric/builder.py +++ b/src/fiwicontrol/fabric/builder.py @@ -10,6 +10,7 @@ cross-file “builder_*” sprawl. I/O is stdin/stdout/stderr (injectable for te from __future__ import annotations +import asyncio import sys from dataclasses import dataclass, field from pathlib import Path @@ -37,6 +38,20 @@ def _prompt_line(stdin: TextIO, stdout: TextIO, label: str) -> str: return line.rstrip("\r\n") +def _downstream_slot_to_hub_port_index(mod: AcronameModuleInfo, slot: int) -> int: + """Map downstream slot ``0..N-1`` to ``hub.port[]`` index for VBUS / ``setEnabled``. + + For USB **Hub** products in typical FiWiControl benches, the first ``N`` user-facing + downstream J ports are controlled as ``hub.port[0]`` … ``hub.port[N-1]``. + + (``hub_port_entities`` can exceed ``N``; extra slots are not always valid targets for + ``setEnabled`` — e.g. USBHub2x4 has 6 port entities but only ``hub.port[0..3]`` are + the four switchable downstream ports; indexing ``hub.port[4]`` can fail with + a BrainStem error such as 21.) + """ + return int(slot) + + def fabric_identity_from_inventory( document: InventoryDocument, overlay: FabricIniOverlay, @@ -99,6 +114,66 @@ def _print_lab_inventory_summary( print(f" fabric.rrh.* radio_id: {', '.join(sorted(o.rrh_overrides))}", file=stdout) +async def _discover_relay_modules_async( + *, + document: InventoryDocument, + ssh_controlmaster: bool, + stdout: TextIO, + stderr: TextIO, +) -> tuple[list[AcronameModuleInfo], dict[int, "_ModuleSource"]]: + """Discover Acroname modules on INI relay hosts (usb=remote / mode=relay).""" + from fiwicontrol.commands.node_control import ssh_node + from fiwicontrol.lab.discovery import discover_devices_remote_async, parse_remote_discovery_payload + + relay_hosts = [h for h in document.hosts if h.mode == "relay" and h.ipaddr] + if not relay_hosts: + return [], {} + + all_modules: list[AcronameModuleInfo] = [] + source_by_serial: dict[int, _ModuleSource] = {} + print("\n=== USB discovery (relay hosts from lab INI) ===", file=stdout) + for host in relay_hosts: + try: + node = ssh_node( + name=host.name, + ipaddr=host.ipaddr, + ssh_controlmaster=ssh_controlmaster, + sshtype=host.sshtype, + silent_mode=True, + ) + payload = await discover_devices_remote_async(node) + mods, _ = parse_remote_discovery_payload(payload) + except Exception as exc: + print(f" [{host.name}] remote discovery failed: {exc}", file=stderr) + continue + print( + f" [{host.name}] discovered {len(mods)} module(s) via SSH {host.ipaddr}", + file=stdout, + ) + for m in mods: + print( + f" serial={m.serial_number} model={m.model_name} " + f"hub_port_entities={m.hub_port_entities} downstream_usb_ports={m.downstream_usb_ports}", + file=stdout, + ) + source_by_serial[int(m.serial_number)] = _ModuleSource( + mode="relay", + host_name=host.name, + ipaddr=host.ipaddr, + sshtype=host.sshtype, + ) + all_modules.extend(mods) + return all_modules, source_by_serial + + +@dataclass(frozen=True) +class _ModuleSource: + mode: str + host_name: str | None = None + ipaddr: str | None = None + sshtype: str = "ssh" + + @dataclass class _BindProgress: """Mutable state for one interactive bind pass (kept in one struct, not ten loose attributes).""" @@ -107,8 +182,7 @@ class _BindProgress: document: InventoryDocument | None = None overlay: FabricIniOverlay | None = None modules: list[AcronameModuleInfo] = field(default_factory=list) - selected: AcronameModuleInfo | None = None - hub_port_count: int = 0 + module_source_by_serial: dict[int, _ModuleSource] = field(default_factory=dict) class FabricInteractiveSession: @@ -147,10 +221,6 @@ class FabricInteractiveSession: return code if (code := self._phase_usb_discovery()) != 0: return code - if (code := self._phase_pick_module()) != 0: - return code - if (code := self._phase_hub_port_count()) != 0: - return code fabric_id, conc_name, conc_ip = self._phase_prompt_fabric_metadata() rrhs = self._phase_prompt_rrh_rows() if not rrhs: @@ -218,71 +288,207 @@ class FabricInteractiveSession: print("Install Acroname support: pip install -e '.[power]'", file=self._stderr) return 1 try: - self._p.modules = discover_acroname_modules() + local_modules = discover_acroname_modules() + for m in local_modules: + self._p.module_source_by_serial[int(m.serial_number)] = _ModuleSource(mode="local") except Exception as exc: print(f"Discovery failed: {exc}", file=self._stderr) return 1 + relay_modules: list[AcronameModuleInfo] = [] + if self._p.document is not None: + try: + relay_modules, relay_sources = asyncio.run( + _discover_relay_modules_async( + document=self._p.document, + ssh_controlmaster=self._ssh_controlmaster, + stdout=self._stdout, + stderr=self._stderr, + ) + ) + self._p.module_source_by_serial.update(relay_sources) + except Exception as exc: + print(f"Relay-host discovery failed: {exc}", file=self._stderr) + return 1 + # Merge local + relay by serial number (stable order: local first, then new relay serials). + merged_by_serial: dict[int, AcronameModuleInfo] = {} + for m in local_modules: + merged_by_serial[int(m.serial_number)] = m + for m in relay_modules: + merged_by_serial.setdefault(int(m.serial_number), m) + self._p.modules = list(merged_by_serial.values()) if not self._p.modules: - print("No Acroname / BrainStem modules found on USB.", file=self._stderr) + print( + "No Acroname / BrainStem modules found on local USB or relay hosts from INI.", + file=self._stderr, + ) return 1 fp = acroname_modules_fingerprint(self._p.modules) - print("\n=== USB discovery (this machine) ===", file=self._stdout) + print("\n=== USB discovery (active module set) ===", file=self._stdout) print( f"Discovered {len(self._p.modules)} module(s); fingerprint prefix {fp[:16]}…", file=self._stdout, ) for idx, m in enumerate(self._p.modules): + src = self._p.module_source_by_serial.get(int(m.serial_number), _ModuleSource(mode="local")) + if src.mode == "relay" and src.ipaddr: + src_txt = f"relay:{src.ipaddr}" + else: + src_txt = "local" print( f" [{idx}] serial={m.serial_number} model={m.model_name} " - f"hub_port_entities={m.hub_port_entities} downstream_usb_ports={m.downstream_usb_ports}", + f"hub_port_entities={m.hub_port_entities} downstream_usb_ports={m.downstream_usb_ports} " + f"source={src_txt}", file=self._stdout, ) return 0 - def _phase_pick_module(self) -> int: - assert self._p.modules - if len(self._p.modules) == 1: - self._p.selected = self._p.modules[0] - return 0 - raw = ( - _prompt_line( - self._stdin, - self._stdout, - f"Select module index [0-{len(self._p.modules) - 1}] (default 0): ", - ).strip() - or "0" + def _hub_port_count_for_module(self, mod: AcronameModuleInfo) -> int | None: + # Prefer downstream USB port count for prompts. hub_port_entities (BrainStem + # NUMBER_OF_PORTS) can exceed downstream count — e.g. USBHub2x4 reports 6 vs 4. + return ( + mod.downstream_usb_ports + if mod.downstream_usb_ports is not None + else mod.hub_port_entities + ) + + async def _connect_local_hub( + self, + *, + spec: object, + stem_cls: type, + serial: int, + ) -> object: + """``connectFromSpec`` with retries. Error 25 (common) = USB busy / not released yet.""" + from brainstem.result import Result + + last_err: int | None = None + hub: object | None = None + for attempt in range(12): + h = stem_cls() + err = int(h.connectFromSpec(spec)) + if err == Result.NO_ERROR: + hub = h + return hub + last_err = err + try: + h.disconnect() + except Exception: + pass + await asyncio.sleep(min(1.0, 0.06 * (1.55**attempt))) + raise RuntimeError( + "connectFromSpec failed for serial {}: {} (after 12 attempts; if this persists, close other " + "BrainStem clients and retry)".format(serial, last_err) + ) + + async def _set_module_ports_local(self, mod: AcronameModuleInfo, *, active_port: int | None) -> None: + from brainstem import discover, stem + from brainstem.result import Result + + await asyncio.sleep(0.05) + specs = discover.findAllModules(discover.Spec.USB, buffer_length=128) + spec = next((s for s in specs if int(s.serial_number) == int(mod.serial_number)), None) + if spec is None: + raise RuntimeError("local module serial {} not found".format(mod.serial_number)) + model_map = {17: stem.USBHub2x4, 19: stem.USBHub3p, 24: stem.USBHub3c} + cls = model_map.get(int(spec.model)) + if cls is None: + raise RuntimeError("unsupported model {} for serial {}".format(spec.model, mod.serial_number)) + hub = await self._connect_local_hub( + spec=spec, stem_cls=cls, serial=int(mod.serial_number) ) try: - sel = int(raw) - except ValueError: - print("Invalid module index.", file=self._stderr) - return 1 - if sel < 0 or sel >= len(self._p.modules): - print("Invalid module index.", file=self._stderr) - return 1 - self._p.selected = self._p.modules[sel] - return 0 - - def _phase_hub_port_count(self) -> int: - assert self._p.selected is not None - mod = self._p.selected - n = mod.hub_port_entities or mod.downstream_usb_ports - if n is None or n < 1: - raw = _prompt_line( - self._stdin, - self._stdout, - "Could not infer port count; enter hub port count: ", - ).strip() + n = self._hub_port_count_for_module(mod) + if n is None: + raise RuntimeError("cannot infer port count for serial {}".format(mod.serial_number)) + for li in range(int(n)): + phy = _downstream_slot_to_hub_port_index(mod, li) + enabled = bool(active_port is not None and int(active_port) == int(li)) + err = int(hub.hub.port[phy].setEnabled(enabled)) + if err != Result.NO_ERROR: + raise RuntimeError( + "setEnabled failed serial {} hub.port[{}] (downstream slot {}) enabled {}: {}".format( + mod.serial_number, phy, li, enabled, err + ) + ) + finally: try: - n = int(raw) if raw else 0 - except ValueError: - print("Need a positive port count.", file=self._stderr) - return 1 - if n < 1: - print("Need a positive port count.", file=self._stderr) - return 1 - self._p.hub_port_count = n - return 0 + hub.disconnect() + except Exception: + pass + + async def _set_module_ports_remote( + self, + mod: AcronameModuleInfo, + src: _ModuleSource, + *, + active_port: int | None, + ) -> None: + from fiwicontrol.commands.node_control import ssh_node + + if not src.ipaddr: + raise RuntimeError("remote source missing ipaddr for serial {}".format(mod.serial_number)) + node = ssh_node( + name=src.host_name or "relay", + ipaddr=src.ipaddr, + ssh_controlmaster=self._ssh_controlmaster, + sshtype=src.sshtype, + silent_mode=True, + ) + n = self._hub_port_count_for_module(mod) + if n is None: + raise RuntimeError("cannot infer port count for serial {}".format(mod.serial_number)) + active = "None" if active_port is None else str(int(active_port)) + cmd = ( + "python3 -c 'import brainstem.discover as d; " + "from brainstem import stem; " + "from brainstem.result import Result; " + "serial={serial}; n={n}; active={active}; " + "specs=d.findAllModules(d.Spec.USB, buffer_length=128); " + "spec=next((s for s in specs if int(s.serial_number)==serial), None); " + "assert spec is not None, f\"serial {{serial}} not found\"; " + "m={{17: stem.USBHub2x4, 19: stem.USBHub3p, 24: stem.USBHub3c}}.get(int(spec.model)); " + "assert m is not None, f\"unsupported model {{spec.model}}\"; " + "h=m(); " + "err=int(h.connectFromSpec(spec)); " + "assert err==Result.NO_ERROR, f\"connectFromSpec err={{err}}\"; " + "errs=[int(h.hub.port[li].setEnabled((active is not None and li==active))) for li in range(n)]; " + "assert all(e==Result.NO_ERROR for e in errs), f\"setEnabled errs={{errs}}\"; " + "h.disconnect()'" + ).format(serial=int(mod.serial_number), n=int(n), active=active) + out = "" + for attempt in range(12): + session = await node.rexec(cmd=cmd, IO_TIMEOUT=30.0, CMD_TIMEOUT=90, CONNECT_TIMEOUT=30.0) + out = session.results.decode("utf-8", errors="replace") + if "Traceback" not in out and "AssertionError" not in out: + return + await asyncio.sleep(min(1.0, 0.06 * (1.55**attempt))) + if "Traceback" in out or "AssertionError" in out: + raise RuntimeError( + "remote bulk setEnabled failed on {} for serial {}: {}".format( + src.ipaddr, mod.serial_number, out.strip() + ) + ) + + async def _set_all_ports_off_except(self, *, active_serial: int | None, active_port: int | None) -> None: + for mod in self._p.modules: + serial = int(mod.serial_number) + n = self._hub_port_count_for_module(mod) + if n is None or n < 1: + continue + src = self._p.module_source_by_serial.get(serial, _ModuleSource(mode="local")) + target = int(active_port) if active_serial == serial and active_port is not None else None + print( + "Preparing module {} (mode={}) active_port={}".format( + serial, + src.mode, + "" if target is None else target, + ), + file=self._stdout, + ) + if src.mode == "relay": + await self._set_module_ports_remote(mod, src, active_port=target) + else: + await self._set_module_ports_local(mod, active_port=target) def _phase_prompt_fabric_metadata(self) -> tuple[str, str | None, str | None]: inp, out = self._stdin, self._stdout @@ -309,28 +515,182 @@ class FabricInteractiveSession: return fabric_id, conc_name, conc_ip def _phase_prompt_rrh_rows(self) -> tuple[FabricRRHBinding, ...]: - assert self._p.selected is not None - mod = self._p.selected inp, out = self._stdin, self._stdout rows: list[FabricRRHBinding] = [] - for port in range(self._p.hub_port_count): - rid = _prompt_line( + used_radio_ids: set[str] = set() + modules = self._p.modules + if len(modules) == 1: + selected = modules + else: + bind_all_raw = _prompt_line( inp, out, - f"radio_id for Acroname port {port} (blank to skip): ", - ).strip() - if not rid: - continue - pp_raw = _prompt_line(inp, out, " Optional patch_panel_port (blank for none): ").strip() - pp = int(pp_raw) if pp_raw else None - rows.append( - FabricRRHBinding( - radio_id=rid, - acroname_port=port, - acroname_module_serial=mod.serial_number, - patch_panel_port=pp, + "Bind all discovered modules in this run? [Y/n]: ", + ).strip().lower() + if bind_all_raw in ("", "y", "yes"): + selected = modules + else: + raw = ( + _prompt_line( + inp, + out, + f"Select module index [0-{len(modules) - 1}] (default 0): ", + ).strip() + or "0" ) - ) + try: + sel = int(raw) + except ValueError: + print("Invalid module index.", file=self._stderr) + return () + if sel < 0 or sel >= len(modules): + print("Invalid module index.", file=self._stderr) + return () + selected = [modules[sel]] + + try: + # Establish a clean baseline once before interactive per-port mapping. + asyncio.run(self._set_all_ports_off_except(active_serial=None, active_port=None)) + except Exception as exc: + print("Failed to initialize all ports OFF state: {}".format(exc), file=self._stderr) + return () + + for mod_idx, mod in enumerate(selected): + n = self._hub_port_count_for_module(mod) + if n is None or n < 1: + raw_n = _prompt_line( + inp, + out, + "Could not infer port count for module serial {}. Enter hub port count: ".format( + mod.serial_number + ), + ).strip() + try: + n = int(raw_n) if raw_n else 0 + except ValueError: + print("Need a positive port count.", file=self._stderr) + return () + if n < 1: + print("Need a positive port count.", file=self._stderr) + return () + + if len(selected) > 1: + print( + "\nBinding module [{idx}] serial={serial} model={model} ports={ports}".format( + idx=mod_idx, + serial=mod.serial_number, + model=mod.model_name, + ports=n, + ), + file=out, + ) + for dslot in range(n): + phy = _downstream_slot_to_hub_port_index(mod, dslot) + try: + # One connect per module per step (all hubs in one pass). Avoids back-to-back + # connectFromSpec on the same serial (BrainStem may return e.g. error 25). + asyncio.run( + self._set_all_ports_off_except( + active_serial=int(mod.serial_number), + active_port=int(dslot), + ) + ) + except Exception as exc: + print( + "Failed to isolate module {} downstream slot {} (hub.port[{}]) during mapping: {}".format( + mod.serial_number, dslot, phy, exc + ), + file=self._stderr, + ) + return () + print( + " VBUS ON: module {} serial={} downstream slot {} → BrainStem hub.port[{}] " + "(identify which RRH / link, then enter patch panel below)".format( + mod.model_name, mod.serial_number, dslot, phy + ), + file=out, + ) + proposed: str | None = None + proposed_pp: int | None = None + if self._p.overlay is not None and self._p.overlay.rrh_overrides: + exact = [] + loose = [] + overlay_map = self._p.overlay.rrh_overrides + for rid0, ov in self._p.overlay.rrh_overrides.items(): + if rid0 in used_radio_ids: + continue + ovp = ov.acroname_port + if ovp is not None and int(ovp) != int(dslot) and int(ovp) != int(phy): + continue + if ov.acroname_module_serial == int(mod.serial_number): + exact.append(rid0) + elif ov.acroname_module_serial is None: + loose.append(rid0) + candidates = sorted(exact) or sorted(loose) + if candidates: + proposed = candidates[0] + ov = overlay_map.get(proposed) + if ov is not None: + proposed_pp = ov.patch_panel_port + + if proposed_pp is not None: + pp_raw = _prompt_line( + inp, + out, + " patch_panel_port slot {} hub[{}] [{}] (Enter=accept, '-'=none): ".format( + dslot, phy, proposed_pp + ), + ).strip() + if pp_raw == "-": + pp = None + else: + pp = int(pp_raw) if pp_raw else int(proposed_pp) + else: + pp_raw = _prompt_line( + inp, + out, + " patch_panel_port (slot {} hub[{}], blank for none): ".format( + dslot, phy + ), + ).strip() + pp = int(pp_raw) if pp_raw else None + + derived = "rh-{:02d}".format(pp) if pp is not None else None + rid_default = proposed or derived + if rid_default is not None: + rid_raw = _prompt_line( + inp, + out, + "radio_id slot {} hub[{}] [{}] (Enter=accept, '-'=skip): ".format( + dslot, phy, rid_default + ), + ).strip() + if rid_raw == "-": + continue + rid = rid_raw or rid_default + else: + rid = _prompt_line( + inp, + out, + "radio_id slot {} hub[{}] (blank to skip): ".format( + dslot, phy + ), + ).strip() + if not rid: + continue + rows.append( + FabricRRHBinding( + radio_id=rid, + acroname_port=phy, + acroname_module_serial=mod.serial_number, + patch_panel_port=pp, + ) + ) + used_radio_ids.add(rid) + try: + asyncio.run(self._set_all_ports_off_except(active_serial=None, active_port=None)) + except Exception as exc: + print("Warning: failed to return all ports to OFF state: {}".format(exc), file=self._stderr) return tuple(rows) def _phase_write_json( diff --git a/src/fiwicontrol/lab/discovery.py b/src/fiwicontrol/lab/discovery.py index fdd1cd8..0bdbb97 100644 --- a/src/fiwicontrol/lab/discovery.py +++ b/src/fiwicontrol/lab/discovery.py @@ -351,12 +351,24 @@ async def discover_devices_remote_async( raise RuntimeError("empty discovery output from remote host") try: return json.loads(text) - except json.JSONDecodeError as exc: + except json.JSONDecodeError: + # Some SSH setups print warnings (e.g. first-time known_hosts notices) before the + # JSON payload. Attempt to recover by finding the first decodable JSON object. + decoder = json.JSONDecoder() + for idx, ch in enumerate(text): + if ch != "{": + continue + try: + payload, _end = decoder.raw_decode(text[idx:]) + except json.JSONDecodeError: + continue + if isinstance(payload, dict): + return payload preview = text if len(text) <= 4000 else text[:4000] + "\n… (truncated)" raise RuntimeError( "remote host did not return JSON for {!r} (SSH failure, wrong interpreter, or " "non-discovery output on stdout). Raw output:\n{}".format(cmd, preview) - ) from exc + ) def monsoon_port_info_from_mapping(row: dict[str, Any]) -> MonsoonPortInfo: