// Package claude reads the weight off a scale photo using the Anthropic vision // API. It owns credential resolution, prompting, the majority-vote logic, and // response parsing — all the Claude-specific handling lives here. package claude import ( "bytes" "encoding/base64" "encoding/json" "fmt" "image" "image/png" "io" "log/slog" "net/http" "os" "os/exec" "runtime" "strings" "sync" "time" xdraw "golang.org/x/image/draw" ) // trace logs the start of an operation and, via the returned func, its end and // duration. It is a no-op above debug level, so it only appears with -v. func trace(op string) func() { t0 := time.Now() slog.Debug("begin", "op", op) return func() { slog.Debug("end", "op", op, "dur", time.Since(t0)) } } // weightPrompt primes a general vision model to read the scale. The hint about // the housing clipping the tops of the digits is what stops a 7 (which has a // top bar) from being misread as a 1 on this kind of LCD. const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` + `Read the weight on the scale's digital display, digit by digit, exactly as shown. ` + `Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` + `in the reading as "low", "medium", or "high". ` + `Reason briefly, then on the LAST line output ONLY JSON: ` + `{"weight": , "unit": "", "confidence": ""}.` // Reading is the result of reading a scale photo. type Reading struct { Weight float64 // winning weight value Unit string // g, kg, lb or oz (lowercased) Confidence float64 // vote agreement weighted by model self-confidence Weights []float64 // each successful vote's numeric reading ModelConfidences []string // each successful vote's self-rating, as the model worded it } // ReadWeight asks a vision model to read the display, taking the majority answer // across a few independent reads for robustness. No image regions or digit-font // geometry are assumed: the model does general OCR on the whole photo. The // overall confidence is the fraction of reads that agreed with the winning // answer, scaled by the average self-confidence the model reported for those // agreeing reads (low/medium/high -> 1/3, 2/3, 3/3). func ReadWeight(img image.Image, auth Auth) (Reading, error) { defer trace("claude.readWeight")() const votes = 3 type vote struct { w float64 u string conf string } type outcome struct { v vote err error } run := func() outcome { w, u, c, e := readOnce(img, auth) return outcome{vote{w, u, c}, e} } // Run the first call alone so it populates the prompt cache, then fire the // remaining (independent) votes concurrently to reuse that cache. outcomes := make([]outcome, votes) outcomes[0] = run() var wg sync.WaitGroup for i := 1; i < votes; i++ { wg.Add(1) go func(i int) { defer wg.Done() outcomes[i] = run() }(i) } wg.Wait() var got []vote var lastErr error for _, o := range outcomes { if o.err != nil { lastErr = o.err continue } got = append(got, o.v) } if len(got) == 0 { return Reading{}, lastErr } key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) } tally := map[string]int{} for _, v := range got { tally[key(v.w, v.u)]++ } bestKey, bestN := "", -1 for k, n := range tally { if n > bestN { bestKey, bestN = k, n } } res := Reading{} var winnerMC []float64 for _, v := range got { res.Weights = append(res.Weights, v.w) res.ModelConfidences = append(res.ModelConfidences, v.conf) if key(v.w, v.u) == bestKey { res.Weight, res.Unit = v.w, v.u winnerMC = append(winnerMC, modelConfidence(v.conf)) } } agreement := float64(bestN) / float64(len(got)) res.Confidence = agreement * mean(winnerMC) return res, nil } func mean(xs []float64) float64 { if len(xs) == 0 { return 0 } var s float64 for _, x := range xs { s += x } return s / float64(len(xs)) } // modelConfidence maps the model's self-rating to a fraction. func modelConfidence(s string) float64 { switch strings.ToLower(strings.TrimSpace(s)) { case "high": return 3.0 / 3 case "medium": return 2.0 / 3 default: // "low" or anything unexpected -> treat conservatively return 1.0 / 3 } } func readOnce(img image.Image, auth Auth) (float64, string, string, error) { defer trace("claude.vote")() // Downscale only to satisfy the vision API's size limit — not a crop. small := downscale(img, 1568) var buf bytes.Buffer if err := png.Encode(&buf, small); err != nil { return 0, "", "", err } b64 := base64.StdEncoding.EncodeToString(buf.Bytes()) reqBody := map[string]interface{}{ "model": "claude-opus-4-8", "max_tokens": 600, "messages": []map[string]interface{}{{ "role": "user", "content": []map[string]interface{}{ {"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}}, // Cache the image+prompt prefix so the repeated majority-vote calls // (same image, same prompt) hit the cache instead of re-sending it. {"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}}, }, }}, } if auth.system != "" { reqBody["system"] = auth.system } body, _ := json.Marshal(reqBody) req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body)) if err != nil { return 0, "", "", err } req.Header.Set("content-type", "application/json") req.Header.Set("anthropic-version", "2023-06-01") auth.apply(req) resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req) if err != nil { return 0, "", "", err } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != 200 { return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody))) } var parsed struct { Content []struct { Text string `json:"text"` } `json:"content"` } if err := json.Unmarshal(respBody, &parsed); err != nil { return 0, "", "", err } var text string for _, c := range parsed.Content { text += c.Text } var wr struct { Weight float64 `json:"weight"` Unit string `json:"unit"` Confidence string `json:"confidence"` } if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil { return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err) } return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil } // lastJSONObject returns the final {...} object in s (the model's answer line). func lastJSONObject(s string) string { j := strings.LastIndex(s, "}") if j < 0 { return s } depth := 0 for i := j; i >= 0; i-- { switch s[i] { case '}': depth++ case '{': depth-- if depth == 0 { return s[i : j+1] } } } return s } func downscale(img image.Image, max int) image.Image { b := img.Bounds() w, h := b.Dx(), b.Dy() if w <= max && h <= max { return img } scale := float64(max) / float64(w) if h > w { scale = float64(max) / float64(h) } dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale))) xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil) return dst } // Auth holds resolved Anthropic API credentials. type Auth struct { name, header, value, beta, system string } // Name describes the credential mechanism in use (for logging). func (a Auth) Name() string { return a.name } func (a Auth) apply(req *http.Request) { req.Header.Set(a.header, a.value) if a.beta != "" { req.Header.Set("anthropic-beta", a.beta) } } // ResolveAuth finds Anthropic credentials: an API key, a bearer token, or the // local Claude Code OAuth token (macOS keychain). func ResolveAuth() (Auth, error) { if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" { return Auth{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil } bearer := func(name, tok string) Auth { return Auth{ name: name, header: "authorization", value: "Bearer " + tok, beta: "oauth-2025-04-20", system: "You are Claude Code, Anthropic's official CLI for Claude.", } } if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" { return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil } if tok := claudeCodeOAuthToken(); tok != "" { return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil } return Auth{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN") } func claudeCodeOAuthToken() string { if runtime.GOOS != "darwin" { return "" } out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output() if err != nil { return "" } var creds struct { ClaudeAiOauth struct { AccessToken string `json:"accessToken"` } `json:"claudeAiOauth"` } if err := json.Unmarshal(out, &creds); err != nil { return "" } return creds.ClaudeAiOauth.AccessToken }