Bug ID 722657: Mcpd and bigd monitor states are intermittently out-of-sync

Last Modified: Dec 18, 2024

Affected Product(s):
BIG-IP LTM(all modules)

Known Affected Versions:
12.0.0, 12.0.0 HF1, 12.0.0 HF2, 12.0.0 HF3, 12.0.0 HF4, 12.1.0, 12.1.0 HF1, 12.1.0 HF2, 12.1.1, 12.1.1 HF1, 12.1.1 HF2, 12.1.2, 12.1.2 HF1, 12.1.2 HF2, 12.1.3, 12.1.3.1, 12.1.3.2, 12.1.3.3, 12.1.3.4, 12.1.3.5, 12.1.3.6, 12.1.3.7, 12.1.4, 12.1.4.1, 12.1.5, 12.1.5.1, 12.1.5.2, 12.1.5.3, 12.1.6, 13.0.0, 13.0.0 HF1, 13.0.0 HF2, 13.0.0 HF3, 13.0.1, 13.1.0, 13.1.0.1, 13.1.0.2, 13.1.0.3, 13.1.0.4, 13.1.0.5, 13.1.0.6, 13.1.0.7, 13.1.0.8, 13.1.1, 13.1.1.2, 13.1.1.3, 13.1.1.4, 13.1.1.5, 13.1.3, 13.1.3.1, 13.1.3.2, 13.1.3.3, 13.1.3.4, 13.1.3.5, 13.1.3.6, 13.1.4, 13.1.4.1, 13.1.5, 13.1.5.1, 14.0.0, 14.0.0.1, 14.0.0.2, 14.0.0.3, 14.0.0.4, 14.0.0.5, 14.0.1, 14.0.1.1, 14.1.0, 14.1.0.1, 14.1.0.2, 14.1.0.3, 14.1.0.5, 14.1.0.6, 14.1.2, 14.1.2.1, 14.1.2.2, 14.1.2.3, 14.1.2.4, 14.1.2.5, 14.1.2.6, 14.1.2.7, 14.1.2.8, 14.1.3, 14.1.3.1, 14.1.4, 14.1.4.1, 14.1.4.2, 14.1.4.3, 14.1.4.4, 14.1.4.5, 14.1.4.6, 14.1.5, 14.1.5.1, 14.1.5.2, 14.1.5.3, 14.1.5.4, 14.1.5.6, 15.0.0, 15.0.1, 15.0.1.1, 15.0.1.2, 15.0.1.3, 15.0.1.4, 15.1.0, 15.1.0.1, 15.1.0.2, 15.1.0.3, 15.1.0.4, 15.1.0.5, 15.1.1, 15.1.2, 15.1.2.1, 15.1.3, 15.1.3.1, 15.1.4, 15.1.4.1, 15.1.5, 15.1.5.1, 15.1.6, 15.1.6.1, 15.1.7, 15.1.8, 15.1.8.1, 15.1.8.2, 15.1.9, 15.1.9.1, 15.1.10, 15.1.10.2, 15.1.10.3, 15.1.10.4, 15.1.10.5, 15.1.10.6, 16.0.0, 16.0.0.1, 16.0.1, 16.0.1.1, 16.0.1.2, 16.1.0, 16.1.1, 16.1.2, 16.1.2.1, 16.1.2.2, 16.1.3, 16.1.3.1, 16.1.3.2, 16.1.3.3, 16.1.3.4, 16.1.3.5, 16.1.4, 16.1.4.1, 16.1.4.2, 16.1.4.3, 16.1.5, 16.1.5.1, 17.0.0, 17.0.0.1, 17.0.0.2, 17.1.0, 17.1.0.1, 17.1.0.2, 17.1.0.3, 17.1.1, 17.1.1.1, 17.1.1.2, 17.1.1.3, 17.1.1.4

Fixed In:
17.1.2

Opened: Jun 01, 2018

Severity: 3-Major

Symptoms

Bigd only informs mcpd of the state of a node on a state change. If the pool member status happens to be incorrect, this can cause the following symptoms. -- Pool member status may be incorrect for a long time -- Traffic may be directed to a pool member that is actually down.

Impact

-- False monitor status in UI/CLI. -- Large number of RST connections as traffic is directed to a pool member that is actually DOWN

Conditions

-- Monitor is attached to pool member and bigd does not inform the state change event for a long time in certain corner cases. -- No periodic events from bigd to mcpd.

Workaround

None

Fix Information

Added new db variable, bigd.stateupdateinterval, to create additional messages that correct the pool member status in certain conditions.

Behavior Change

The bigd daemon can now create additional messages to inform mcpd of the status change for a monitored node or pool member, in case the message indicating the initial status change is not received or processed successfully by mcpd. This feature for a BIG-IP system by configuring the following sys db variable to a non-zero value: sys db bigd.stateupdateinterval { default-value "0" scf-config "true" value "0" value-range "integer min:0 max:600" } This value represents the number of seconds after an initial status change that bigd will wait before beginning to send additional status-change messages to mcpd. The first such additional message will be sent approximately the configured number of seconds after the initial message triggered by the monitored object's initial status change. Subsequent such messages will be sent at intervals approximately equal to two (2) times and four (4) the initial delay. This sequence of messages restarts after each change in the monitored object's status detected by bigd as a result of monitor pings. Since the processing of such messages triggers a modest amount of additional processing by mcpd, this value can be tuned for the desired balance between quick response and recovery from such conditions, and acceptable mcpd processing overhead.

Guides & references

K10134038: F5 Bug Tracker Filter Names and Tips