split out claude package, report weight breakdowns, add slog tracing
This commit is contained in:
@@ -0,0 +1,314 @@
|
||||
// Package claude reads the weight off a scale photo using the Anthropic vision
|
||||
// API. It owns credential resolution, prompting, the majority-vote logic, and
|
||||
// response parsing — all the Claude-specific handling lives here.
|
||||
package claude
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/png"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
xdraw "golang.org/x/image/draw"
|
||||
)
|
||||
|
||||
// trace logs the start of an operation and, via the returned func, its end and
|
||||
// duration. It is a no-op above debug level, so it only appears with -v.
|
||||
func trace(op string) func() {
|
||||
t0 := time.Now()
|
||||
slog.Debug("begin", "op", op)
|
||||
return func() { slog.Debug("end", "op", op, "dur", time.Since(t0)) }
|
||||
}
|
||||
|
||||
// weightPrompt primes a general vision model to read the scale. The hint about
|
||||
// the housing clipping the tops of the digits is what stops a 7 (which has a
|
||||
// top bar) from being misread as a 1 on this kind of LCD.
|
||||
const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` +
|
||||
`Read the weight on the scale's digital display, digit by digit, exactly as shown. ` +
|
||||
`Note: the top edge of the display housing can come close to obscuring the TOPS of the digits, ` +
|
||||
`so a digit that really has a top horizontal bar (like a 7) can momentarily look like a 1 — ` +
|
||||
`look carefully for a faint top bar before deciding. ` +
|
||||
`Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` +
|
||||
`in the reading as "low", "medium", or "high". ` +
|
||||
`Reason briefly, then on the LAST line output ONLY JSON: ` +
|
||||
`{"weight": <number>, "unit": "<g|kg|lb|oz>", "confidence": "<low|medium|high>"}.`
|
||||
|
||||
// Reading is the result of reading a scale photo.
|
||||
type Reading struct {
|
||||
Weight float64 // winning weight value
|
||||
Unit string // g, kg, lb or oz (lowercased)
|
||||
Confidence float64 // vote agreement weighted by model self-confidence
|
||||
Weights []float64 // each successful vote's numeric reading
|
||||
ModelConfidences []string // each successful vote's self-rating, as the model worded it
|
||||
}
|
||||
|
||||
// ReadWeight asks a vision model to read the display, taking the majority answer
|
||||
// across a few independent reads for robustness. No image regions or digit-font
|
||||
// geometry are assumed: the model does general OCR on the whole photo. The
|
||||
// overall confidence is the fraction of reads that agreed with the winning
|
||||
// answer, scaled by the average self-confidence the model reported for those
|
||||
// agreeing reads (low/medium/high -> 1/3, 2/3, 3/3).
|
||||
func ReadWeight(img image.Image, auth Auth) (Reading, error) {
|
||||
defer trace("claude.readWeight")()
|
||||
const votes = 3
|
||||
type vote struct {
|
||||
w float64
|
||||
u string
|
||||
conf string
|
||||
}
|
||||
type outcome struct {
|
||||
v vote
|
||||
err error
|
||||
}
|
||||
run := func() outcome {
|
||||
w, u, c, e := readOnce(img, auth)
|
||||
return outcome{vote{w, u, c}, e}
|
||||
}
|
||||
|
||||
// Run the first call alone so it populates the prompt cache, then fire the
|
||||
// remaining (independent) votes concurrently to reuse that cache.
|
||||
outcomes := make([]outcome, votes)
|
||||
outcomes[0] = run()
|
||||
var wg sync.WaitGroup
|
||||
for i := 1; i < votes; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
outcomes[i] = run()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
var got []vote
|
||||
var lastErr error
|
||||
for _, o := range outcomes {
|
||||
if o.err != nil {
|
||||
lastErr = o.err
|
||||
continue
|
||||
}
|
||||
got = append(got, o.v)
|
||||
}
|
||||
if len(got) == 0 {
|
||||
return Reading{}, lastErr
|
||||
}
|
||||
|
||||
key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) }
|
||||
tally := map[string]int{}
|
||||
for _, v := range got {
|
||||
tally[key(v.w, v.u)]++
|
||||
}
|
||||
bestKey, bestN := "", -1
|
||||
for k, n := range tally {
|
||||
if n > bestN {
|
||||
bestKey, bestN = k, n
|
||||
}
|
||||
}
|
||||
|
||||
res := Reading{}
|
||||
var winnerMC []float64
|
||||
for _, v := range got {
|
||||
res.Weights = append(res.Weights, v.w)
|
||||
res.ModelConfidences = append(res.ModelConfidences, v.conf)
|
||||
if key(v.w, v.u) == bestKey {
|
||||
res.Weight, res.Unit = v.w, v.u
|
||||
winnerMC = append(winnerMC, modelConfidence(v.conf))
|
||||
}
|
||||
}
|
||||
agreement := float64(bestN) / float64(len(got))
|
||||
res.Confidence = agreement * mean(winnerMC)
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func mean(xs []float64) float64 {
|
||||
if len(xs) == 0 {
|
||||
return 0
|
||||
}
|
||||
var s float64
|
||||
for _, x := range xs {
|
||||
s += x
|
||||
}
|
||||
return s / float64(len(xs))
|
||||
}
|
||||
|
||||
// modelConfidence maps the model's self-rating to a fraction.
|
||||
func modelConfidence(s string) float64 {
|
||||
switch strings.ToLower(strings.TrimSpace(s)) {
|
||||
case "high":
|
||||
return 3.0 / 3
|
||||
case "medium":
|
||||
return 2.0 / 3
|
||||
default: // "low" or anything unexpected -> treat conservatively
|
||||
return 1.0 / 3
|
||||
}
|
||||
}
|
||||
|
||||
func readOnce(img image.Image, auth Auth) (float64, string, string, error) {
|
||||
defer trace("claude.vote")()
|
||||
// Downscale only to satisfy the vision API's size limit — not a crop.
|
||||
small := downscale(img, 1568)
|
||||
var buf bytes.Buffer
|
||||
if err := png.Encode(&buf, small); err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
b64 := base64.StdEncoding.EncodeToString(buf.Bytes())
|
||||
|
||||
reqBody := map[string]interface{}{
|
||||
"model": "claude-opus-4-8",
|
||||
"max_tokens": 600,
|
||||
"messages": []map[string]interface{}{{
|
||||
"role": "user",
|
||||
"content": []map[string]interface{}{
|
||||
{"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}},
|
||||
// Cache the image+prompt prefix so the repeated majority-vote calls
|
||||
// (same image, same prompt) hit the cache instead of re-sending it.
|
||||
{"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}},
|
||||
},
|
||||
}},
|
||||
}
|
||||
if auth.system != "" {
|
||||
reqBody["system"] = auth.system
|
||||
}
|
||||
body, _ := json.Marshal(reqBody)
|
||||
req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
req.Header.Set("content-type", "application/json")
|
||||
req.Header.Set("anthropic-version", "2023-06-01")
|
||||
auth.apply(req)
|
||||
|
||||
resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req)
|
||||
if err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != 200 {
|
||||
return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
||||
}
|
||||
var parsed struct {
|
||||
Content []struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"content"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &parsed); err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
var text string
|
||||
for _, c := range parsed.Content {
|
||||
text += c.Text
|
||||
}
|
||||
var wr struct {
|
||||
Weight float64 `json:"weight"`
|
||||
Unit string `json:"unit"`
|
||||
Confidence string `json:"confidence"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil {
|
||||
return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err)
|
||||
}
|
||||
return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil
|
||||
}
|
||||
|
||||
// lastJSONObject returns the final {...} object in s (the model's answer line).
|
||||
func lastJSONObject(s string) string {
|
||||
j := strings.LastIndex(s, "}")
|
||||
if j < 0 {
|
||||
return s
|
||||
}
|
||||
depth := 0
|
||||
for i := j; i >= 0; i-- {
|
||||
switch s[i] {
|
||||
case '}':
|
||||
depth++
|
||||
case '{':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return s[i : j+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func downscale(img image.Image, max int) image.Image {
|
||||
b := img.Bounds()
|
||||
w, h := b.Dx(), b.Dy()
|
||||
if w <= max && h <= max {
|
||||
return img
|
||||
}
|
||||
scale := float64(max) / float64(w)
|
||||
if h > w {
|
||||
scale = float64(max) / float64(h)
|
||||
}
|
||||
dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale)))
|
||||
xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil)
|
||||
return dst
|
||||
}
|
||||
|
||||
// Auth holds resolved Anthropic API credentials.
|
||||
type Auth struct {
|
||||
name, header, value, beta, system string
|
||||
}
|
||||
|
||||
// Name describes the credential mechanism in use (for logging).
|
||||
func (a Auth) Name() string { return a.name }
|
||||
|
||||
func (a Auth) apply(req *http.Request) {
|
||||
req.Header.Set(a.header, a.value)
|
||||
if a.beta != "" {
|
||||
req.Header.Set("anthropic-beta", a.beta)
|
||||
}
|
||||
}
|
||||
|
||||
// ResolveAuth finds Anthropic credentials: an API key, a bearer token, or the
|
||||
// local Claude Code OAuth token (macOS keychain).
|
||||
func ResolveAuth() (Auth, error) {
|
||||
if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" {
|
||||
return Auth{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil
|
||||
}
|
||||
bearer := func(name, tok string) Auth {
|
||||
return Auth{
|
||||
name: name,
|
||||
header: "authorization", value: "Bearer " + tok,
|
||||
beta: "oauth-2025-04-20",
|
||||
system: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
}
|
||||
}
|
||||
if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" {
|
||||
return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil
|
||||
}
|
||||
if tok := claudeCodeOAuthToken(); tok != "" {
|
||||
return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil
|
||||
}
|
||||
return Auth{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN")
|
||||
}
|
||||
|
||||
func claudeCodeOAuthToken() string {
|
||||
if runtime.GOOS != "darwin" {
|
||||
return ""
|
||||
}
|
||||
out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
var creds struct {
|
||||
ClaudeAiOauth struct {
|
||||
AccessToken string `json:"accessToken"`
|
||||
} `json:"claudeAiOauth"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &creds); err != nil {
|
||||
return ""
|
||||
}
|
||||
return creds.ClaudeAiOauth.AccessToken
|
||||
}
|
||||
@@ -1,57 +1,59 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/jpeg"
|
||||
"image/png"
|
||||
"io"
|
||||
"log"
|
||||
_ "image/png"
|
||||
"log/slog"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/makiuchi-d/gozxing"
|
||||
"github.com/makiuchi-d/gozxing/qrcode"
|
||||
xdraw "golang.org/x/image/draw"
|
||||
"spoolweight/claude"
|
||||
"spoolweight/spooldb"
|
||||
)
|
||||
|
||||
// weightBreakdown is the spool/filament/total split, in grams.
|
||||
type weightBreakdown struct {
|
||||
Spool *float64 `json:"spool,omitempty"`
|
||||
Filament *float64 `json:"filament,omitempty"`
|
||||
Total *float64 `json:"total,omitempty"`
|
||||
}
|
||||
|
||||
type result struct {
|
||||
Image string `json:"image"`
|
||||
SpoolID string `json:"spool_id,omitempty"`
|
||||
URL string `json:"url,omitempty"`
|
||||
Location string `json:"location,omitempty"`
|
||||
SiteRemainingGrams *float64 `json:"site_remaining_grams,omitempty"`
|
||||
Weight *float64 `json:"weight,omitempty"`
|
||||
PreviousWeight *weightBreakdown `json:"previous_weight,omitempty"` // as read from spooldb
|
||||
NewWeight *weightBreakdown `json:"new_weight,omitempty"` // from the measured photo
|
||||
Unit string `json:"unit,omitempty"`
|
||||
Confidence *float64 `json:"confidence,omitempty"`
|
||||
Weights []float64 `json:"weights,omitempty"`
|
||||
ModelConfidences []string `json:"model_confidences,omitempty"`
|
||||
Updated bool `json:"updated,omitempty"`
|
||||
VoteWeights []float64 `json:"vote_weights,omitempty"` // each LLM vote's total reading
|
||||
ModelConfidences []string `json:"model_confidences,omitempty"` // each LLM vote's self-rating
|
||||
Updated *bool `json:"updated,omitempty"` // set on success: did we write to spooldb
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func ptr(v float64) *float64 { return &v }
|
||||
|
||||
// spoolURLPrefix is the only QR payload we accept. Anything else (a different
|
||||
// domain, or a /f/ filament link rather than a /s/ spool link) is rejected.
|
||||
const spoolURLPrefix = "https://spooldb.com/s/"
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
verbose := flag.Bool("v", false, "print the auth mechanism being used to stderr")
|
||||
verbose := flag.Bool("v", false, "verbose: log each event's start/end with timings to stderr")
|
||||
dryRun := flag.Bool("n", false, "dry run: read and report, but do not write weight changes back to the site")
|
||||
flag.Usage = func() {
|
||||
log.Printf("usage: %s [-v] [-n] <image>...", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "usage: %s [-v] [-n] <image>...\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
flag.Parse()
|
||||
@@ -60,13 +62,17 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
auth, err := resolveAuth()
|
||||
level := slog.LevelInfo
|
||||
if *verbose {
|
||||
level = slog.LevelDebug
|
||||
}
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level})))
|
||||
|
||||
auth, err := claude.ResolveAuth()
|
||||
if err != nil {
|
||||
fail("resolve credentials: %v", err)
|
||||
}
|
||||
if *verbose {
|
||||
log.Printf("auth: %s", auth.name)
|
||||
}
|
||||
slog.Debug("claude auth", "mechanism", auth.Name())
|
||||
|
||||
sp := &spoolSync{}
|
||||
defer sp.close()
|
||||
@@ -119,14 +125,14 @@ func (s *spoolSync) login() (*spooldb.Client, error) {
|
||||
func (s *spoolSync) info(spoolID string) *spooldb.SpoolInfo {
|
||||
client, err := s.login()
|
||||
if err != nil {
|
||||
log.Printf("spooldb: skipping spool lookup: %v", err)
|
||||
slog.Warn("spooldb: skipping spool lookup", "err", err)
|
||||
return nil
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
info, err := client.SpoolInfo(ctx, spoolID)
|
||||
if err != nil {
|
||||
log.Printf("spooldb: lookup for %s: %v", spoolID, err)
|
||||
slog.Warn("spooldb: lookup failed", "spool", spoolID, "err", err)
|
||||
return nil
|
||||
}
|
||||
return &info
|
||||
@@ -136,13 +142,13 @@ func (s *spoolSync) info(spoolID string) *spooldb.SpoolInfo {
|
||||
func (s *spoolSync) setTotal(spoolID string, grams float64) bool {
|
||||
client, err := s.login()
|
||||
if err != nil {
|
||||
log.Printf("spooldb: skipping weight update: %v", err)
|
||||
slog.Warn("spooldb: skipping weight update", "err", err)
|
||||
return false
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
|
||||
defer cancel()
|
||||
if err := client.SetTotalWeight(ctx, spoolID, grams); err != nil {
|
||||
log.Printf("spooldb: weight update for %s: %v", spoolID, err)
|
||||
slog.Warn("spooldb: weight update failed", "spool", spoolID, "err", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
@@ -156,7 +162,8 @@ func (s *spoolSync) close() {
|
||||
|
||||
// processImage reads one photo, capturing any failure in the result's Error
|
||||
// field so a single bad image doesn't abort the whole batch.
|
||||
func processImage(path string, auth authInfo, sp *spoolSync, dryRun bool) result {
|
||||
func processImage(path string, auth claude.Auth, sp *spoolSync, dryRun bool) result {
|
||||
defer trace("image " + path)()
|
||||
r := result{Image: path}
|
||||
|
||||
img, err := loadImage(path)
|
||||
@@ -187,47 +194,67 @@ func processImage(path string, auth authInfo, sp *spoolSync, dryRun bool) result
|
||||
info = sp.info(r.SpoolID)
|
||||
}()
|
||||
|
||||
w, err := readWeight(img, auth)
|
||||
reading, err := claude.ReadWeight(img, auth)
|
||||
wg.Wait()
|
||||
if info != nil {
|
||||
r.Location = info.Location
|
||||
rem := info.RemainingGrams
|
||||
r.SiteRemainingGrams = &rem
|
||||
r.PreviousWeight = &weightBreakdown{
|
||||
Spool: ptr(info.EmptySpoolGrams),
|
||||
Filament: ptr(info.RemainingGrams),
|
||||
Total: ptr(info.TotalGrams),
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
r.Error = fmt.Sprintf("read weight: %v", err)
|
||||
return r
|
||||
}
|
||||
if w.unit == "lb" || w.unit == "oz" {
|
||||
r.Error = fmt.Sprintf("scale is set to imperial units (%s); switch it to grams", w.unit)
|
||||
if reading.Unit == "lb" || reading.Unit == "oz" {
|
||||
r.Error = fmt.Sprintf("scale is set to imperial units (%s); switch it to grams", reading.Unit)
|
||||
return r
|
||||
}
|
||||
r.Weight = &w.weight
|
||||
r.Unit = w.unit
|
||||
r.Confidence = &w.confidence
|
||||
r.Weights = w.weights
|
||||
r.ModelConfidences = w.modelConfidences
|
||||
r.Unit = reading.Unit
|
||||
r.Confidence = ptr(reading.Confidence)
|
||||
r.VoteWeights = reading.Weights
|
||||
r.ModelConfidences = reading.ModelConfidences
|
||||
|
||||
// The photo gives total weight (filament + spool); the site stores remaining
|
||||
// filament. Write the measured total back when it differs from the site's
|
||||
// total — the site recomputes remaining using the empty-spool weight.
|
||||
if info != nil && math.Abs(w.weight-info.TotalGrams) >= 1 {
|
||||
newRemaining := w.weight - info.EmptySpoolGrams
|
||||
// The photo gives total weight (filament + spool). When the empty-spool
|
||||
// weight is known (from spooldb) we can also report the filament split.
|
||||
r.NewWeight = &weightBreakdown{Total: ptr(reading.Weight)}
|
||||
if info != nil {
|
||||
r.NewWeight.Spool = ptr(info.EmptySpoolGrams)
|
||||
r.NewWeight.Filament = ptr(reading.Weight - info.EmptySpoolGrams)
|
||||
}
|
||||
|
||||
// Write the measured total back when it differs from the site's total — the
|
||||
// site recomputes remaining filament using the empty-spool weight.
|
||||
updated := false
|
||||
if info != nil && math.Abs(reading.Weight-info.TotalGrams) >= 1 {
|
||||
newRemaining := reading.Weight - info.EmptySpoolGrams
|
||||
if dryRun {
|
||||
log.Printf("spooldb: %s would update remaining %.0fg -> %.0fg (measured total %.0fg)",
|
||||
r.SpoolID, info.RemainingGrams, newRemaining, w.weight)
|
||||
} else if sp.setTotal(r.SpoolID, w.weight) {
|
||||
r.Updated = true
|
||||
log.Printf("spooldb: %s updated remaining %.0fg -> %.0fg (measured total %.0fg)",
|
||||
r.SpoolID, info.RemainingGrams, newRemaining, w.weight)
|
||||
slog.Info("spooldb: would update", "spool", r.SpoolID,
|
||||
"remaining_from", info.RemainingGrams, "remaining_to", newRemaining, "measured_total", reading.Weight)
|
||||
} else if sp.setTotal(r.SpoolID, reading.Weight) {
|
||||
updated = true
|
||||
slog.Info("spooldb: updated", "spool", r.SpoolID,
|
||||
"remaining_from", info.RemainingGrams, "remaining_to", newRemaining, "measured_total", reading.Weight)
|
||||
}
|
||||
}
|
||||
r.Updated = &updated
|
||||
return r
|
||||
}
|
||||
|
||||
func fail(format string, a ...interface{}) {
|
||||
log.Fatalf("error: "+format, a...)
|
||||
slog.Error(fmt.Sprintf(format, a...))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// trace logs the start of an operation and, via the returned func, its end and
|
||||
// duration. It is a no-op above debug level, so it only appears with -v.
|
||||
func trace(op string) func() {
|
||||
t0 := time.Now()
|
||||
slog.Debug("begin", "op", op)
|
||||
return func() { slog.Debug("end", "op", op, "dur", time.Since(t0)) }
|
||||
}
|
||||
|
||||
func loadImage(path string) (image.Image, error) {
|
||||
@@ -262,276 +289,3 @@ func decodeQR(img image.Image) (string, error) {
|
||||
}
|
||||
return res.GetText(), nil
|
||||
}
|
||||
|
||||
// weightPrompt primes a general vision model to read the scale. The hint about
|
||||
// the housing clipping the tops of the digits is what stops a 7 (which has a
|
||||
// top bar) from being misread as a 1 on this kind of LCD.
|
||||
const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` +
|
||||
`Read the weight on the scale's digital display, digit by digit, exactly as shown. ` +
|
||||
`Note: the top edge of the display housing can come close to obscuring the TOPS of the digits, ` +
|
||||
`so a digit that really has a top horizontal bar (like a 7) can momentarily look like a 1 — ` +
|
||||
`look carefully for a faint top bar before deciding. ` +
|
||||
`Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` +
|
||||
`in the reading as "low", "medium", or "high". ` +
|
||||
`Reason briefly, then on the LAST line output ONLY JSON: ` +
|
||||
`{"weight": <number>, "unit": "<g|kg|lb|oz>", "confidence": "<low|medium|high>"}.`
|
||||
|
||||
type weightResult struct {
|
||||
weight float64
|
||||
unit string
|
||||
confidence float64 // overall: vote agreement weighted by model self-confidence
|
||||
weights []float64 // each successful vote's numeric reading
|
||||
modelConfidences []string // each successful vote's self-rating, as the model worded it
|
||||
}
|
||||
|
||||
// readWeight asks a vision model to read the display, taking the majority answer
|
||||
// across a few independent reads for robustness. No image regions or digit-font
|
||||
// geometry are assumed: the model does general OCR on the whole photo. The
|
||||
// overall confidence is the fraction of reads that agreed with the winning
|
||||
// answer, scaled by the average self-confidence the model reported for those
|
||||
// agreeing reads (low/medium/high -> 1/3, 2/3, 3/3).
|
||||
func readWeight(img image.Image, auth authInfo) (weightResult, error) {
|
||||
const votes = 3
|
||||
type vote struct {
|
||||
w float64
|
||||
u string
|
||||
conf string
|
||||
}
|
||||
type outcome struct {
|
||||
v vote
|
||||
err error
|
||||
}
|
||||
run := func() outcome {
|
||||
w, u, c, e := readWeightLLM(img, auth)
|
||||
return outcome{vote{w, u, c}, e}
|
||||
}
|
||||
|
||||
// Run the first call alone so it populates the prompt cache, then fire the
|
||||
// remaining (independent) votes concurrently to reuse that cache.
|
||||
outcomes := make([]outcome, votes)
|
||||
outcomes[0] = run()
|
||||
var wg sync.WaitGroup
|
||||
for i := 1; i < votes; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
outcomes[i] = run()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
var got []vote
|
||||
var lastErr error
|
||||
for _, o := range outcomes {
|
||||
if o.err != nil {
|
||||
lastErr = o.err
|
||||
continue
|
||||
}
|
||||
got = append(got, o.v)
|
||||
}
|
||||
if len(got) == 0 {
|
||||
return weightResult{}, lastErr
|
||||
}
|
||||
|
||||
key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) }
|
||||
tally := map[string]int{}
|
||||
for _, v := range got {
|
||||
tally[key(v.w, v.u)]++
|
||||
}
|
||||
bestKey, bestN := "", -1
|
||||
for k, n := range tally {
|
||||
if n > bestN {
|
||||
bestKey, bestN = k, n
|
||||
}
|
||||
}
|
||||
|
||||
res := weightResult{}
|
||||
var winnerMC []float64
|
||||
for _, v := range got {
|
||||
res.weights = append(res.weights, v.w)
|
||||
res.modelConfidences = append(res.modelConfidences, v.conf)
|
||||
if key(v.w, v.u) == bestKey {
|
||||
res.weight, res.unit = v.w, v.u
|
||||
winnerMC = append(winnerMC, modelConfidence(v.conf))
|
||||
}
|
||||
}
|
||||
agreement := float64(bestN) / float64(len(got))
|
||||
res.confidence = agreement * mean(winnerMC)
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func mean(xs []float64) float64 {
|
||||
if len(xs) == 0 {
|
||||
return 0
|
||||
}
|
||||
var s float64
|
||||
for _, x := range xs {
|
||||
s += x
|
||||
}
|
||||
return s / float64(len(xs))
|
||||
}
|
||||
|
||||
// modelConfidence maps the model's self-rating to a fraction.
|
||||
func modelConfidence(s string) float64 {
|
||||
switch strings.ToLower(strings.TrimSpace(s)) {
|
||||
case "high":
|
||||
return 3.0 / 3
|
||||
case "medium":
|
||||
return 2.0 / 3
|
||||
default: // "low" or anything unexpected -> treat conservatively
|
||||
return 1.0 / 3
|
||||
}
|
||||
}
|
||||
|
||||
func readWeightLLM(img image.Image, auth authInfo) (float64, string, string, error) {
|
||||
// Downscale only to satisfy the vision API's size limit — not a crop.
|
||||
small := downscale(img, 1568)
|
||||
var buf bytes.Buffer
|
||||
if err := png.Encode(&buf, small); err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
b64 := base64.StdEncoding.EncodeToString(buf.Bytes())
|
||||
|
||||
reqBody := map[string]interface{}{
|
||||
"model": "claude-opus-4-8",
|
||||
"max_tokens": 600,
|
||||
"messages": []map[string]interface{}{{
|
||||
"role": "user",
|
||||
"content": []map[string]interface{}{
|
||||
{"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}},
|
||||
// Cache the image+prompt prefix so the repeated majority-vote calls
|
||||
// (same image, same prompt) hit the cache instead of re-sending it.
|
||||
{"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}},
|
||||
},
|
||||
}},
|
||||
}
|
||||
if auth.system != "" {
|
||||
reqBody["system"] = auth.system
|
||||
}
|
||||
body, _ := json.Marshal(reqBody)
|
||||
req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
req.Header.Set("content-type", "application/json")
|
||||
req.Header.Set("anthropic-version", "2023-06-01")
|
||||
auth.apply(req)
|
||||
|
||||
resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req)
|
||||
if err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != 200 {
|
||||
return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
||||
}
|
||||
var parsed struct {
|
||||
Content []struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"content"`
|
||||
}
|
||||
if err := json.Unmarshal(respBody, &parsed); err != nil {
|
||||
return 0, "", "", err
|
||||
}
|
||||
var text string
|
||||
for _, c := range parsed.Content {
|
||||
text += c.Text
|
||||
}
|
||||
var wr struct {
|
||||
Weight float64 `json:"weight"`
|
||||
Unit string `json:"unit"`
|
||||
Confidence string `json:"confidence"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil {
|
||||
return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err)
|
||||
}
|
||||
return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil
|
||||
}
|
||||
|
||||
// lastJSONObject returns the final {...} object in s (the model's answer line).
|
||||
func lastJSONObject(s string) string {
|
||||
j := strings.LastIndex(s, "}")
|
||||
if j < 0 {
|
||||
return s
|
||||
}
|
||||
depth := 0
|
||||
for i := j; i >= 0; i-- {
|
||||
switch s[i] {
|
||||
case '}':
|
||||
depth++
|
||||
case '{':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return s[i : j+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func downscale(img image.Image, max int) image.Image {
|
||||
b := img.Bounds()
|
||||
w, h := b.Dx(), b.Dy()
|
||||
if w <= max && h <= max {
|
||||
return img
|
||||
}
|
||||
scale := float64(max) / float64(w)
|
||||
if h > w {
|
||||
scale = float64(max) / float64(h)
|
||||
}
|
||||
dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale)))
|
||||
xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil)
|
||||
return dst
|
||||
}
|
||||
|
||||
type authInfo struct {
|
||||
name, header, value, beta, system string
|
||||
}
|
||||
|
||||
func (a authInfo) apply(req *http.Request) {
|
||||
req.Header.Set(a.header, a.value)
|
||||
if a.beta != "" {
|
||||
req.Header.Set("anthropic-beta", a.beta)
|
||||
}
|
||||
}
|
||||
|
||||
func resolveAuth() (authInfo, error) {
|
||||
if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" {
|
||||
return authInfo{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil
|
||||
}
|
||||
bearer := func(name, tok string) authInfo {
|
||||
return authInfo{
|
||||
name: name,
|
||||
header: "authorization", value: "Bearer " + tok,
|
||||
beta: "oauth-2025-04-20",
|
||||
system: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
}
|
||||
}
|
||||
if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" {
|
||||
return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil
|
||||
}
|
||||
if tok := claudeCodeOAuthToken(); tok != "" {
|
||||
return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil
|
||||
}
|
||||
return authInfo{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN")
|
||||
}
|
||||
|
||||
func claudeCodeOAuthToken() string {
|
||||
if runtime.GOOS != "darwin" {
|
||||
return ""
|
||||
}
|
||||
out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
var creds struct {
|
||||
ClaudeAiOauth struct {
|
||||
AccessToken string `json:"accessToken"`
|
||||
} `json:"claudeAiOauth"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &creds); err != nil {
|
||||
return ""
|
||||
}
|
||||
return creds.ClaudeAiOauth.AccessToken
|
||||
}
|
||||
|
||||
+18
-3
@@ -11,7 +11,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -21,6 +21,14 @@ import (
|
||||
"github.com/chromedp/chromedp"
|
||||
)
|
||||
|
||||
// trace logs the start of an operation and, via the returned func, its end and
|
||||
// duration. It is a no-op above debug level, so it only appears with -v.
|
||||
func trace(op string) func() {
|
||||
t0 := time.Now()
|
||||
slog.Debug("begin", "op", op)
|
||||
return func() { slog.Debug("end", "op", op, "dur", time.Since(t0)) }
|
||||
}
|
||||
|
||||
const (
|
||||
baseURL = "https://3dfilamentprofiles.com"
|
||||
challengeText = "Security Checkpoint"
|
||||
@@ -79,6 +87,7 @@ func New(opts ...Option) (*Client, error) {
|
||||
// Allocate the browser bound to the long-lived browserCtx. chromedp ties the
|
||||
// Chrome process lifetime to the context of the first Run, so this must not
|
||||
// be a short-lived per-call context (otherwise the browser dies after it).
|
||||
defer trace("spooldb.launchBrowser")()
|
||||
if err := chromedp.Run(browserCtx, chromedp.Navigate("about:blank")); err != nil {
|
||||
browserCancel()
|
||||
allocCancel()
|
||||
@@ -98,7 +107,7 @@ func chromeErrorf(format string, args ...any) {
|
||||
if strings.HasPrefix(format, "unhandled ") {
|
||||
return
|
||||
}
|
||||
log.Printf("chromedp: "+format, args...)
|
||||
slog.Warn("chromedp: " + fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
// Close shuts down the browser.
|
||||
@@ -121,13 +130,15 @@ func (c *Client) run(ctx context.Context, timeout time.Duration, actions ...chro
|
||||
// awaitChallenge waits until the Vercel checkpoint clears (the headless browser
|
||||
// solves it automatically by running the page's JavaScript).
|
||||
func (c *Client) awaitChallenge(ctx context.Context) error {
|
||||
defer trace("spooldb.awaitChallenge")()
|
||||
deadline := time.Now().Add(45 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
for polls := 0; time.Now().Before(deadline); polls++ {
|
||||
var title string
|
||||
if err := c.run(ctx, 10*time.Second, chromedp.Title(&title)); err != nil {
|
||||
return err
|
||||
}
|
||||
if !strings.Contains(title, challengeText) {
|
||||
slog.Debug("challenge cleared", "polls", polls)
|
||||
return nil
|
||||
}
|
||||
select {
|
||||
@@ -141,6 +152,7 @@ func (c *Client) awaitChallenge(ctx context.Context) error {
|
||||
|
||||
// Login authenticates with email/password. Safe to call once per Client.
|
||||
func (c *Client) Login(ctx context.Context, email, password string) error {
|
||||
defer trace("spooldb.login")()
|
||||
if err := c.run(ctx, 45*time.Second, chromedp.Navigate(baseURL+"/login")); err != nil {
|
||||
return fmt.Errorf("navigate to login: %w", err)
|
||||
}
|
||||
@@ -188,6 +200,7 @@ type SpoolInfo struct {
|
||||
// direct URL for the dialog; it is opened from the spool page by the pencil
|
||||
// button, which sits between the QR-code and delete buttons in the card header.
|
||||
func (c *Client) openEdit(ctx context.Context, spoolID string) error {
|
||||
defer trace("spooldb.openEdit")()
|
||||
if !c.loggedIn {
|
||||
return errors.New("not logged in")
|
||||
}
|
||||
@@ -232,6 +245,7 @@ func (c *Client) openEdit(ctx context.Context, spoolID string) error {
|
||||
|
||||
// SpoolInfo opens a spool's edit dialog and reads its location and weights.
|
||||
func (c *Client) SpoolInfo(ctx context.Context, spoolID string) (SpoolInfo, error) {
|
||||
defer trace("spooldb.spoolInfo")()
|
||||
if err := c.openEdit(ctx, spoolID); err != nil {
|
||||
return SpoolInfo{}, err
|
||||
}
|
||||
@@ -265,6 +279,7 @@ func (c *Client) SpoolInfo(ctx context.Context, spoolID string) (SpoolInfo, erro
|
||||
// — the site recomputes remaining filament from the empty-spool weight — and
|
||||
// saves. grams is the weight read off the scale (filament plus spool).
|
||||
func (c *Client) SetTotalWeight(ctx context.Context, spoolID string, grams float64) error {
|
||||
defer trace("spooldb.setTotalWeight")()
|
||||
if err := c.openEdit(ctx, spoolID); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user