Files
tendrils/errors.go

262 lines
5.8 KiB
Go
Raw Normal View History

package tendrils
import (
"fmt"
"log"
"sort"
"sync"
"time"
)
const (
ErrorTypeNew = "new"
ErrorTypeUnreachable = "unreachable"
ErrorTypeHighUtilization = "high_utilization"
ErrorTypePortFlap = "port_flap"
ErrorTypePortDown = "port_down"
)
type Error struct {
ID string `json:"id"`
NodeID string `json:"node_id"`
NodeName string `json:"node_name"`
Type string `json:"type"`
Port string `json:"port,omitempty"`
InErrors uint64 `json:"in_errors,omitempty"`
OutErrors uint64 `json:"out_errors,omitempty"`
InDelta uint64 `json:"in_delta,omitempty"`
OutDelta uint64 `json:"out_delta,omitempty"`
Utilization float64 `json:"utilization,omitempty"`
FirstSeen time.Time `json:"first_seen"`
LastSeen time.Time `json:"last_seen"`
2026-03-05 11:39:36 -08:00
LastUpdated time.Time `json:"last_updated,omitzero"`
}
type ErrorTracker struct {
mu sync.RWMutex
errors map[string]*Error
nextID int
t *Tendrils
}
func NewErrorTracker(t *Tendrils) *ErrorTracker {
return &ErrorTracker{
errors: map[string]*Error{},
t: t,
}
}
func (e *ErrorTracker) AddUnreachable(node *Node) {
e.mu.Lock()
defer e.mu.Unlock()
key := "unreachable:" + node.ID
now := time.Now().UTC()
if existing, exists := e.errors[key]; exists {
existing.LastSeen = now
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Type: ErrorTypeUnreachable,
FirstSeen: now,
LastSeen: now,
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) RemoveUnreachable(node *Node) {
e.mu.Lock()
defer e.mu.Unlock()
key := "unreachable:" + node.ID
if _, exists := e.errors[key]; exists {
delete(e.errors, key)
e.t.NotifyUpdate()
}
}
func (e *ErrorTracker) AddPortError(node *Node, portName string, stats *InterfaceStats, inDelta, outDelta uint64) {
e.mu.Lock()
defer e.mu.Unlock()
key := node.ID + ":" + portName
now := time.Now().UTC()
if existing, ok := e.errors[key]; ok {
existing.InErrors = stats.InErrors
existing.OutErrors = stats.OutErrors
existing.InDelta += inDelta
existing.OutDelta += outDelta
existing.LastSeen = now
existing.LastUpdated = now
} else {
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypeNew,
InErrors: stats.InErrors,
OutErrors: stats.OutErrors,
InDelta: inDelta,
OutDelta: outDelta,
FirstSeen: now,
LastSeen: now,
LastUpdated: now,
}
log.Printf("[ERROR] port errors on %s %s: in=%d out=%d", node.DisplayName(), portName, inDelta, outDelta)
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) AddUtilizationError(node *Node, portName string, utilization float64) {
e.mu.Lock()
defer e.mu.Unlock()
key := "util:" + node.ID + ":" + portName
now := time.Now().UTC()
if existing, ok := e.errors[key]; ok {
existing.LastSeen = now
if utilization > existing.Utilization {
existing.Utilization = utilization
existing.LastUpdated = now
}
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypeHighUtilization,
Utilization: utilization,
FirstSeen: now,
LastSeen: now,
LastUpdated: now,
}
log.Printf("[ERROR] high utilization on %s %s: %.0f%%", node.DisplayName(), portName, utilization)
e.t.NotifyUpdate()
}
func (e *ErrorTracker) UpdateUtilizationLastSeen(node *Node, portName string, utilization float64) {
e.mu.Lock()
defer e.mu.Unlock()
key := "util:" + node.ID + ":" + portName
if existing, ok := e.errors[key]; ok {
now := time.Now().UTC()
existing.LastSeen = now
if utilization > existing.Utilization {
existing.Utilization = utilization
existing.LastUpdated = now
}
e.t.NotifyUpdate()
}
}
func (e *ErrorTracker) AddPortFlap(node *Node, portName string) {
e.mu.Lock()
defer e.mu.Unlock()
key := "flap:" + node.ID + ":" + portName
now := time.Now().UTC()
if existing, ok := e.errors[key]; ok {
existing.LastSeen = now
existing.LastUpdated = now
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypePortFlap,
FirstSeen: now,
LastSeen: now,
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) AddPortDown(node *Node, portName string) {
e.mu.Lock()
defer e.mu.Unlock()
key := "down:" + node.ID + ":" + portName
now := time.Now().UTC()
if existing, ok := e.errors[key]; ok {
existing.LastSeen = now
existing.LastUpdated = now
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypePortDown,
FirstSeen: now,
LastSeen: now,
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) ClearError(errorID string) {
e.mu.Lock()
defer e.mu.Unlock()
for key, err := range e.errors {
if err.ID == errorID {
delete(e.errors, key)
e.t.NotifyUpdate()
return
}
}
}
func (e *ErrorTracker) ClearAllErrors() {
e.mu.Lock()
defer e.mu.Unlock()
if len(e.errors) > 0 {
e.errors = map[string]*Error{}
e.t.NotifyUpdate()
}
}
func (e *ErrorTracker) GetErrors() []*Error {
e.mu.RLock()
defer e.mu.RUnlock()
errors := make([]*Error, 0, len(e.errors))
for _, err := range e.errors {
errors = append(errors, err)
}
sort.Slice(errors, func(i, j int) bool {
if errors[i].NodeName != errors[j].NodeName {
return errors[i].NodeName < errors[j].NodeName
}
return errors[i].Port < errors[j].Port
})
return errors
}