spoolweight/claude/claude.go

// Package claude reads the weight off a scale photo using the Anthropic vision
// API. It owns credential resolution, prompting, the majority-vote logic, and
// response parsing — all the Claude-specific handling lives here.
package claude

import (
	"bytes"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"image"
	"image/png"
	"io"
	"log/slog"
	"net/http"
	"os"
	"os/exec"
	"runtime"
	"strings"
	"sync"
	"time"

	xdraw "golang.org/x/image/draw"
)

// trace logs the start of an operation and, via the returned func, its end and
// duration. It is a no-op above debug level, so it only appears with -v.
func trace(op string) func() {
	t0 := time.Now()
	slog.Debug("begin", "op", op)
	return func() { slog.Debug("end", "op", op, "dur", time.Since(t0)) }
}

// weightPrompt primes a general vision model to read the scale. The hint about
// the housing clipping the tops of the digits is what stops a 7 (which has a
// top bar) from being misread as a 1 on this kind of LCD.
const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` +
	`Read the weight on the scale's digital display, digit by digit, exactly as shown. ` +
	`Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` +
	`in the reading as "low", "medium", or "high". ` +
	`Reason briefly, then on the LAST line output ONLY JSON: ` +
	`{"weight": <number>, "unit": "<g|kg|lb|oz>", "confidence": "<low|medium|high>"}.`

// Reading is the result of reading a scale photo.
type Reading struct {
	Weight           float64   // winning weight value
	Unit             string    // g, kg, lb or oz (lowercased)
	Confidence       float64   // vote agreement weighted by model self-confidence
	Weights          []float64 // each successful vote's numeric reading
	ModelConfidences []string  // each successful vote's self-rating, as the model worded it
}

// ReadWeight asks a vision model to read the display, taking the majority answer
// across a few independent reads for robustness. No image regions or digit-font
// geometry are assumed: the model does general OCR on the whole photo. The
// overall confidence is the fraction of reads that agreed with the winning
// answer, scaled by the average self-confidence the model reported for those
// agreeing reads (low/medium/high -> 1/3, 2/3, 3/3).
func ReadWeight(img image.Image, auth Auth) (Reading, error) {
	defer trace("claude.readWeight")()
	const votes = 3
	type vote struct {
		w    float64
		u    string
		conf string
	}
	type outcome struct {
		v   vote
		err error
	}
	run := func() outcome {
		w, u, c, e := readOnce(img, auth)
		return outcome{vote{w, u, c}, e}
	}

	// Run the first call alone so it populates the prompt cache, then fire the
	// remaining (independent) votes concurrently to reuse that cache.
	outcomes := make([]outcome, votes)
	outcomes[0] = run()
	var wg sync.WaitGroup
	for i := 1; i < votes; i++ {
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			outcomes[i] = run()
		}(i)
	}
	wg.Wait()

	var got []vote
	var lastErr error
	for _, o := range outcomes {
		if o.err != nil {
			lastErr = o.err
			continue
		}
		got = append(got, o.v)
	}
	if len(got) == 0 {
		return Reading{}, lastErr
	}

	key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) }
	tally := map[string]int{}
	for _, v := range got {
		tally[key(v.w, v.u)]++
	}
	bestKey, bestN := "", -1
	for k, n := range tally {
		if n > bestN {
			bestKey, bestN = k, n
		}
	}

	res := Reading{}
	var winnerMC []float64
	for _, v := range got {
		res.Weights = append(res.Weights, v.w)
		res.ModelConfidences = append(res.ModelConfidences, v.conf)
		if key(v.w, v.u) == bestKey {
			res.Weight, res.Unit = v.w, v.u
			winnerMC = append(winnerMC, modelConfidence(v.conf))
		}
	}
	agreement := float64(bestN) / float64(len(got))
	res.Confidence = agreement * mean(winnerMC)
	return res, nil
}

func mean(xs []float64) float64 {
	if len(xs) == 0 {
		return 0
	}
	var s float64
	for _, x := range xs {
		s += x
	}
	return s / float64(len(xs))
}

// modelConfidence maps the model's self-rating to a fraction.
func modelConfidence(s string) float64 {
	switch strings.ToLower(strings.TrimSpace(s)) {
	case "high":
		return 3.0 / 3
	case "medium":
		return 2.0 / 3
	default: // "low" or anything unexpected -> treat conservatively
		return 1.0 / 3
	}
}

func readOnce(img image.Image, auth Auth) (float64, string, string, error) {
	defer trace("claude.vote")()
	// Downscale only to satisfy the vision API's size limit — not a crop.
	small := downscale(img, 1568)
	var buf bytes.Buffer
	if err := png.Encode(&buf, small); err != nil {
		return 0, "", "", err
	}
	b64 := base64.StdEncoding.EncodeToString(buf.Bytes())

	reqBody := map[string]interface{}{
		"model":      "claude-opus-4-8",
		"max_tokens": 600,
		"messages": []map[string]interface{}{{
			"role": "user",
			"content": []map[string]interface{}{
				{"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}},
				// Cache the image+prompt prefix so the repeated majority-vote calls
				// (same image, same prompt) hit the cache instead of re-sending it.
				{"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}},
			},
		}},
	}
	if auth.system != "" {
		reqBody["system"] = auth.system
	}
	body, _ := json.Marshal(reqBody)
	req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body))
	if err != nil {
		return 0, "", "", err
	}
	req.Header.Set("content-type", "application/json")
	req.Header.Set("anthropic-version", "2023-06-01")
	auth.apply(req)

	resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req)
	if err != nil {
		return 0, "", "", err
	}
	defer resp.Body.Close()
	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != 200 {
		return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
	}
	var parsed struct {
		Content []struct {
			Text string `json:"text"`
		} `json:"content"`
	}
	if err := json.Unmarshal(respBody, &parsed); err != nil {
		return 0, "", "", err
	}
	var text string
	for _, c := range parsed.Content {
		text += c.Text
	}
	var wr struct {
		Weight     float64 `json:"weight"`
		Unit       string  `json:"unit"`
		Confidence string  `json:"confidence"`
	}
	if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil {
		return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err)
	}
	return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil
}

// lastJSONObject returns the final {...} object in s (the model's answer line).
func lastJSONObject(s string) string {
	j := strings.LastIndex(s, "}")
	if j < 0 {
		return s
	}
	depth := 0
	for i := j; i >= 0; i-- {
		switch s[i] {
		case '}':
			depth++
		case '{':
			depth--
			if depth == 0 {
				return s[i : j+1]
			}
		}
	}
	return s
}

func downscale(img image.Image, max int) image.Image {
	b := img.Bounds()
	w, h := b.Dx(), b.Dy()
	if w <= max && h <= max {
		return img
	}
	scale := float64(max) / float64(w)
	if h > w {
		scale = float64(max) / float64(h)
	}
	dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale)))
	xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil)
	return dst
}

// Auth holds resolved Anthropic API credentials.
type Auth struct {
	name, header, value, beta, system string
}

// Name describes the credential mechanism in use (for logging).
func (a Auth) Name() string { return a.name }

func (a Auth) apply(req *http.Request) {
	req.Header.Set(a.header, a.value)
	if a.beta != "" {
		req.Header.Set("anthropic-beta", a.beta)
	}
}

// ResolveAuth finds Anthropic credentials: an API key, a bearer token, or the
// local Claude Code OAuth token (macOS keychain).
func ResolveAuth() (Auth, error) {
	if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" {
		return Auth{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil
	}
	bearer := func(name, tok string) Auth {
		return Auth{
			name:   name,
			header: "authorization", value: "Bearer " + tok,
			beta:   "oauth-2025-04-20",
			system: "You are Claude Code, Anthropic's official CLI for Claude.",
		}
	}
	if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" {
		return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil
	}
	if tok := claudeCodeOAuthToken(); tok != "" {
		return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil
	}
	return Auth{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN")
}

func claudeCodeOAuthToken() string {
	if runtime.GOOS != "darwin" {
		return ""
	}
	out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output()
	if err != nil {
		return ""
	}
	var creds struct {
		ClaudeAiOauth struct {
			AccessToken string `json:"accessToken"`
		} `json:"claudeAiOauth"`
	}
	if err := json.Unmarshal(out, &creds); err != nil {
		return ""
	}
	return creds.ClaudeAiOauth.AccessToken
}