diff --git a/errors.go b/errors.go index f360850..1dabc8e 100644 --- a/errors.go +++ b/errors.go @@ -12,6 +12,8 @@ const ( ErrorTypeNew = "new" ErrorTypeUnreachable = "unreachable" ErrorTypeHighUtilization = "high_utilization" + ErrorTypePortFlap = "port_flap" + ErrorTypePortDown = "port_down" ) type Error struct { @@ -139,6 +141,58 @@ func (e *ErrorTracker) AddUtilizationError(node *Node, portName string, utilizat e.t.NotifyUpdate() } +func (e *ErrorTracker) AddPortFlap(node *Node, portName string) { + e.mu.Lock() + defer e.mu.Unlock() + + key := "flap:" + node.ID + ":" + portName + now := time.Now() + + if existing, ok := e.errors[key]; ok { + existing.LastUpdated = now + e.t.NotifyUpdate() + return + } + + e.nextID++ + e.errors[key] = &Error{ + ID: fmt.Sprintf("err-%d", e.nextID), + NodeID: node.ID, + NodeName: node.DisplayName(), + Port: portName, + Type: ErrorTypePortFlap, + FirstSeen: now, + LastUpdated: now, + } + e.t.NotifyUpdate() +} + +func (e *ErrorTracker) AddPortDown(node *Node, portName string) { + e.mu.Lock() + defer e.mu.Unlock() + + key := "down:" + node.ID + ":" + portName + now := time.Now() + + if existing, ok := e.errors[key]; ok { + existing.LastUpdated = now + e.t.NotifyUpdate() + return + } + + e.nextID++ + e.errors[key] = &Error{ + ID: fmt.Sprintf("err-%d", e.nextID), + NodeID: node.ID, + NodeName: node.DisplayName(), + Port: portName, + Type: ErrorTypePortDown, + FirstSeen: now, + LastUpdated: now, + } + e.t.NotifyUpdate() +} + func (e *ErrorTracker) ClearError(errorID string) { e.mu.Lock() defer e.mu.Unlock() diff --git a/nodes.go b/nodes.go index d7d0282..d9c0125 100644 --- a/nodes.go +++ b/nodes.go @@ -246,7 +246,7 @@ func (n *Nodes) startNodePoller(node *Node) { go func() { pollTicker := time.NewTicker(10 * time.Second) - pingTicker := time.NewTicker(5 * time.Second) + pingTicker := time.NewTicker(3 * time.Second) defer pollTicker.Stop() defer pingTicker.Stop() diff --git a/ping.go b/ping.go index 9c52025..e6b8b44 100644 --- a/ping.go +++ b/ping.go @@ -172,7 +172,7 @@ func (t *Tendrils) pingNode(node *Node) { anyReachable := false for _, ipStr := range ips { - if t.ping.Ping(ipStr, 2*time.Second) { + if t.ping.Ping(ipStr, 1*time.Second) { anyReachable = true break } diff --git a/snmp.go b/snmp.go index 0bc6313..684f012 100644 --- a/snmp.go +++ b/snmp.go @@ -17,6 +17,7 @@ type ifaceCounters struct { outPkts uint64 inBytes uint64 outBytes uint64 + uptime uint64 timestamp time.Time } @@ -252,9 +253,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames status, hasStatus := ifOperStatus[ifIndex] isUp := hasStatus && status == 1 if !isUp { + if iface.Up { + log.Printf("[ERROR] port down on %s %s", node.DisplayName(), name) + t.errors.AddPortDown(node, name) + } + iface.Up = false iface.Stats = nil continue } + iface.Up = true stats := &InterfaceStats{} @@ -282,11 +289,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames inPkts := ifHCInUcastPkts[ifIndex] + ifHCInMcastPkts[ifIndex] + ifHCInBcastPkts[ifIndex] outPkts := ifHCOutUcastPkts[ifIndex] + ifHCOutMcastPkts[ifIndex] + ifHCOutBcastPkts[ifIndex] - if hasInBytes && hasOutBytes { - key := node.ID + ":" + name - ifaceTracker.mu.Lock() - prev, hasPrev := ifaceTracker.counters[key] - if hasPrev { + key := node.ID + ":" + name + ifaceTracker.mu.Lock() + prev, hasPrev := ifaceTracker.counters[key] + if hasPrev { + if prev.uptime > 0 && stats.Uptime > 0 && stats.Uptime < prev.uptime { + log.Printf("[ERROR] port flap on %s %s: uptime dropped from %d to %d seconds", node.DisplayName(), name, prev.uptime, stats.Uptime) + t.errors.AddPortFlap(node, name) + } + if hasInBytes && hasOutBytes { elapsed := now.Sub(prev.timestamp).Seconds() if elapsed > 0 { stats.InPktsRate = float64(inPkts-prev.inPkts) / elapsed @@ -307,15 +318,20 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames } } } - ifaceTracker.counters[key] = &ifaceCounters{ - inPkts: inPkts, - outPkts: outPkts, - inBytes: inBytes, - outBytes: outBytes, - timestamp: now, - } - ifaceTracker.mu.Unlock() } + storedUptime := stats.Uptime + if storedUptime == 0 && hasPrev { + storedUptime = prev.uptime + } + ifaceTracker.counters[key] = &ifaceCounters{ + inPkts: inPkts, + outPkts: outPkts, + inBytes: inBytes, + outBytes: outBytes, + uptime: storedUptime, + timestamp: now, + } + ifaceTracker.mu.Unlock() if poe, ok := poeStats[name]; ok { stats.PoE = poe diff --git a/types.go b/types.go index 11b4f93..04de840 100644 --- a/types.go +++ b/types.go @@ -360,6 +360,7 @@ type Interface struct { Name string `json:"name,omitempty"` MAC MAC `json:"mac"` IPs IPSet `json:"ips,omitempty"` + Up bool `json:"up,omitempty"` Stats *InterfaceStats `json:"stats,omitempty"` } @@ -368,6 +369,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) { Name string `json:"name,omitempty"` MAC MAC `json:"mac"` IPs []string `json:"ips,omitempty"` + Up bool `json:"up,omitempty"` Stats *InterfaceStats `json:"stats,omitempty"` } var ips []string @@ -378,6 +380,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) { Name: i.Name, MAC: i.MAC, IPs: ips, + Up: i.Up, Stats: i.Stats, }) }