Add port flap and port down error tracking with faster ping
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
54
errors.go
54
errors.go
@@ -12,6 +12,8 @@ const (
|
|||||||
ErrorTypeNew = "new"
|
ErrorTypeNew = "new"
|
||||||
ErrorTypeUnreachable = "unreachable"
|
ErrorTypeUnreachable = "unreachable"
|
||||||
ErrorTypeHighUtilization = "high_utilization"
|
ErrorTypeHighUtilization = "high_utilization"
|
||||||
|
ErrorTypePortFlap = "port_flap"
|
||||||
|
ErrorTypePortDown = "port_down"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Error struct {
|
type Error struct {
|
||||||
@@ -139,6 +141,58 @@ func (e *ErrorTracker) AddUtilizationError(node *Node, portName string, utilizat
|
|||||||
e.t.NotifyUpdate()
|
e.t.NotifyUpdate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *ErrorTracker) AddPortFlap(node *Node, portName string) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
|
||||||
|
key := "flap:" + node.ID + ":" + portName
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
if existing, ok := e.errors[key]; ok {
|
||||||
|
existing.LastUpdated = now
|
||||||
|
e.t.NotifyUpdate()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
e.nextID++
|
||||||
|
e.errors[key] = &Error{
|
||||||
|
ID: fmt.Sprintf("err-%d", e.nextID),
|
||||||
|
NodeID: node.ID,
|
||||||
|
NodeName: node.DisplayName(),
|
||||||
|
Port: portName,
|
||||||
|
Type: ErrorTypePortFlap,
|
||||||
|
FirstSeen: now,
|
||||||
|
LastUpdated: now,
|
||||||
|
}
|
||||||
|
e.t.NotifyUpdate()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ErrorTracker) AddPortDown(node *Node, portName string) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
|
||||||
|
key := "down:" + node.ID + ":" + portName
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
if existing, ok := e.errors[key]; ok {
|
||||||
|
existing.LastUpdated = now
|
||||||
|
e.t.NotifyUpdate()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
e.nextID++
|
||||||
|
e.errors[key] = &Error{
|
||||||
|
ID: fmt.Sprintf("err-%d", e.nextID),
|
||||||
|
NodeID: node.ID,
|
||||||
|
NodeName: node.DisplayName(),
|
||||||
|
Port: portName,
|
||||||
|
Type: ErrorTypePortDown,
|
||||||
|
FirstSeen: now,
|
||||||
|
LastUpdated: now,
|
||||||
|
}
|
||||||
|
e.t.NotifyUpdate()
|
||||||
|
}
|
||||||
|
|
||||||
func (e *ErrorTracker) ClearError(errorID string) {
|
func (e *ErrorTracker) ClearError(errorID string) {
|
||||||
e.mu.Lock()
|
e.mu.Lock()
|
||||||
defer e.mu.Unlock()
|
defer e.mu.Unlock()
|
||||||
|
|||||||
2
nodes.go
2
nodes.go
@@ -246,7 +246,7 @@ func (n *Nodes) startNodePoller(node *Node) {
|
|||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
pollTicker := time.NewTicker(10 * time.Second)
|
pollTicker := time.NewTicker(10 * time.Second)
|
||||||
pingTicker := time.NewTicker(5 * time.Second)
|
pingTicker := time.NewTicker(3 * time.Second)
|
||||||
defer pollTicker.Stop()
|
defer pollTicker.Stop()
|
||||||
defer pingTicker.Stop()
|
defer pingTicker.Stop()
|
||||||
|
|
||||||
|
|||||||
2
ping.go
2
ping.go
@@ -172,7 +172,7 @@ func (t *Tendrils) pingNode(node *Node) {
|
|||||||
|
|
||||||
anyReachable := false
|
anyReachable := false
|
||||||
for _, ipStr := range ips {
|
for _, ipStr := range ips {
|
||||||
if t.ping.Ping(ipStr, 2*time.Second) {
|
if t.ping.Ping(ipStr, 1*time.Second) {
|
||||||
anyReachable = true
|
anyReachable = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|||||||
42
snmp.go
42
snmp.go
@@ -17,6 +17,7 @@ type ifaceCounters struct {
|
|||||||
outPkts uint64
|
outPkts uint64
|
||||||
inBytes uint64
|
inBytes uint64
|
||||||
outBytes uint64
|
outBytes uint64
|
||||||
|
uptime uint64
|
||||||
timestamp time.Time
|
timestamp time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -252,9 +253,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
|
|||||||
status, hasStatus := ifOperStatus[ifIndex]
|
status, hasStatus := ifOperStatus[ifIndex]
|
||||||
isUp := hasStatus && status == 1
|
isUp := hasStatus && status == 1
|
||||||
if !isUp {
|
if !isUp {
|
||||||
|
if iface.Up {
|
||||||
|
log.Printf("[ERROR] port down on %s %s", node.DisplayName(), name)
|
||||||
|
t.errors.AddPortDown(node, name)
|
||||||
|
}
|
||||||
|
iface.Up = false
|
||||||
iface.Stats = nil
|
iface.Stats = nil
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
iface.Up = true
|
||||||
|
|
||||||
stats := &InterfaceStats{}
|
stats := &InterfaceStats{}
|
||||||
|
|
||||||
@@ -282,11 +289,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
|
|||||||
inPkts := ifHCInUcastPkts[ifIndex] + ifHCInMcastPkts[ifIndex] + ifHCInBcastPkts[ifIndex]
|
inPkts := ifHCInUcastPkts[ifIndex] + ifHCInMcastPkts[ifIndex] + ifHCInBcastPkts[ifIndex]
|
||||||
outPkts := ifHCOutUcastPkts[ifIndex] + ifHCOutMcastPkts[ifIndex] + ifHCOutBcastPkts[ifIndex]
|
outPkts := ifHCOutUcastPkts[ifIndex] + ifHCOutMcastPkts[ifIndex] + ifHCOutBcastPkts[ifIndex]
|
||||||
|
|
||||||
if hasInBytes && hasOutBytes {
|
key := node.ID + ":" + name
|
||||||
key := node.ID + ":" + name
|
ifaceTracker.mu.Lock()
|
||||||
ifaceTracker.mu.Lock()
|
prev, hasPrev := ifaceTracker.counters[key]
|
||||||
prev, hasPrev := ifaceTracker.counters[key]
|
if hasPrev {
|
||||||
if hasPrev {
|
if prev.uptime > 0 && stats.Uptime > 0 && stats.Uptime < prev.uptime {
|
||||||
|
log.Printf("[ERROR] port flap on %s %s: uptime dropped from %d to %d seconds", node.DisplayName(), name, prev.uptime, stats.Uptime)
|
||||||
|
t.errors.AddPortFlap(node, name)
|
||||||
|
}
|
||||||
|
if hasInBytes && hasOutBytes {
|
||||||
elapsed := now.Sub(prev.timestamp).Seconds()
|
elapsed := now.Sub(prev.timestamp).Seconds()
|
||||||
if elapsed > 0 {
|
if elapsed > 0 {
|
||||||
stats.InPktsRate = float64(inPkts-prev.inPkts) / elapsed
|
stats.InPktsRate = float64(inPkts-prev.inPkts) / elapsed
|
||||||
@@ -307,15 +318,20 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ifaceTracker.counters[key] = &ifaceCounters{
|
|
||||||
inPkts: inPkts,
|
|
||||||
outPkts: outPkts,
|
|
||||||
inBytes: inBytes,
|
|
||||||
outBytes: outBytes,
|
|
||||||
timestamp: now,
|
|
||||||
}
|
|
||||||
ifaceTracker.mu.Unlock()
|
|
||||||
}
|
}
|
||||||
|
storedUptime := stats.Uptime
|
||||||
|
if storedUptime == 0 && hasPrev {
|
||||||
|
storedUptime = prev.uptime
|
||||||
|
}
|
||||||
|
ifaceTracker.counters[key] = &ifaceCounters{
|
||||||
|
inPkts: inPkts,
|
||||||
|
outPkts: outPkts,
|
||||||
|
inBytes: inBytes,
|
||||||
|
outBytes: outBytes,
|
||||||
|
uptime: storedUptime,
|
||||||
|
timestamp: now,
|
||||||
|
}
|
||||||
|
ifaceTracker.mu.Unlock()
|
||||||
|
|
||||||
if poe, ok := poeStats[name]; ok {
|
if poe, ok := poeStats[name]; ok {
|
||||||
stats.PoE = poe
|
stats.PoE = poe
|
||||||
|
|||||||
3
types.go
3
types.go
@@ -360,6 +360,7 @@ type Interface struct {
|
|||||||
Name string `json:"name,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
MAC MAC `json:"mac"`
|
MAC MAC `json:"mac"`
|
||||||
IPs IPSet `json:"ips,omitempty"`
|
IPs IPSet `json:"ips,omitempty"`
|
||||||
|
Up bool `json:"up,omitempty"`
|
||||||
Stats *InterfaceStats `json:"stats,omitempty"`
|
Stats *InterfaceStats `json:"stats,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,6 +369,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) {
|
|||||||
Name string `json:"name,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
MAC MAC `json:"mac"`
|
MAC MAC `json:"mac"`
|
||||||
IPs []string `json:"ips,omitempty"`
|
IPs []string `json:"ips,omitempty"`
|
||||||
|
Up bool `json:"up,omitempty"`
|
||||||
Stats *InterfaceStats `json:"stats,omitempty"`
|
Stats *InterfaceStats `json:"stats,omitempty"`
|
||||||
}
|
}
|
||||||
var ips []string
|
var ips []string
|
||||||
@@ -378,6 +380,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) {
|
|||||||
Name: i.Name,
|
Name: i.Name,
|
||||||
MAC: i.MAC,
|
MAC: i.MAC,
|
||||||
IPs: ips,
|
IPs: ips,
|
||||||
|
Up: i.Up,
|
||||||
Stats: i.Stats,
|
Stats: i.Stats,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user