From d3e9cc8e545a39beb6a78d31e0fc380089759139 Mon Sep 17 00:00:00 2001 From: Ian Gulliver Date: Sun, 31 May 2026 10:44:29 -0700 Subject: [PATCH] spoolweight: read spool QR code and scale weight from a photo --- .gitignore | 3 + go.mod | 13 ++ go.sum | 8 + main.go | 422 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 446 insertions(+) create mode 100644 .gitignore create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b01c357 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.claude/ +/spoolweight +*.png diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..84d41c8 --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module spoolweight + +go 1.26.2 + +require ( + github.com/makiuchi-d/gozxing v0.1.1 + golang.org/x/image v0.41.0 +) + +require ( + golang.org/x/text v0.37.0 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..905ea1a --- /dev/null +++ b/go.sum @@ -0,0 +1,8 @@ +github.com/makiuchi-d/gozxing v0.1.1 h1:xxqijhoedi+/lZlhINteGbywIrewVdVv2wl9r5O9S1I= +github.com/makiuchi-d/gozxing v0.1.1/go.mod h1:eRIHbOjX7QWxLIDJoQuMLhuXg9LAuw6znsUtRkNw9DU= +golang.org/x/image v0.41.0 h1:8wS72eGJMJaBxK6okTzd4WaXumUlTVlb753MlsSvTCo= +golang.org/x/image v0.41.0/go.mod h1:uIc348UZMSvS5Z65CVZ7iDPaNobNFEPeJ4kbqTOszmA= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/main.go b/main.go new file mode 100644 index 0000000..8ffac3d --- /dev/null +++ b/main.go @@ -0,0 +1,422 @@ +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "image" + _ "image/jpeg" + "image/png" + "io" + "log" + "net/http" + "os" + "os/exec" + "runtime" + "strings" + "sync" + "time" + + "github.com/makiuchi-d/gozxing" + "github.com/makiuchi-d/gozxing/qrcode" + xdraw "golang.org/x/image/draw" +) + +type result struct { + Image string `json:"image"` + SpoolID string `json:"spool_id,omitempty"` + URL string `json:"url,omitempty"` + Weight *float64 `json:"weight,omitempty"` + Unit string `json:"unit,omitempty"` + Confidence *float64 `json:"confidence,omitempty"` + Weights []float64 `json:"weights,omitempty"` + ModelConfidences []string `json:"model_confidences,omitempty"` + Error string `json:"error,omitempty"` +} + +// spoolURLPrefix is the only QR payload we accept. Anything else (a different +// domain, or a /f/ filament link rather than a /s/ spool link) is rejected. +const spoolURLPrefix = "https://spooldb.com/s/" + +func main() { + log.SetFlags(0) + verbose := flag.Bool("v", false, "print the auth mechanism being used to stderr") + flag.Usage = func() { + log.Printf("usage: %s [-v] ...", os.Args[0]) + flag.PrintDefaults() + } + flag.Parse() + if flag.NArg() < 1 { + flag.Usage() + os.Exit(2) + } + + auth, err := resolveAuth() + if err != nil { + fail("resolve credentials: %v", err) + } + if *verbose { + log.Printf("auth: %s", auth.name) + } + + results := make([]result, 0, flag.NArg()) + for _, path := range flag.Args() { + results = append(results, processImage(path, auth)) + } + + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(results) +} + +// processImage reads one photo, capturing any failure in the result's Error +// field so a single bad image doesn't abort the whole batch. +func processImage(path string, auth authInfo) result { + r := result{Image: path} + + img, err := loadImage(path) + if err != nil { + r.Error = fmt.Sprintf("load image: %v", err) + return r + } + + url, err := decodeQR(img) + if err != nil { + r.Error = fmt.Sprintf("read QR code: %v", err) + return r + } + if !strings.HasPrefix(url, spoolURLPrefix) { + r.Error = fmt.Sprintf("not a spool QR code (expected %s...): %s", spoolURLPrefix, url) + return r + } + r.SpoolID = spoolID(url) + r.URL = url + + w, err := readWeight(img, auth) + if err != nil { + r.Error = fmt.Sprintf("read weight: %v", err) + return r + } + if w.unit == "lb" || w.unit == "oz" { + r.Error = fmt.Sprintf("scale is set to imperial units (%s); switch it to grams", w.unit) + return r + } + r.Weight = &w.weight + r.Unit = w.unit + r.Confidence = &w.confidence + r.Weights = w.weights + r.ModelConfidences = w.modelConfidences + return r +} + +func fail(format string, a ...interface{}) { + log.Fatalf("error: "+format, a...) +} + +func loadImage(path string) (image.Image, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + img, _, err := image.Decode(f) + return img, err +} + +// spoolID returns the last non-empty path segment of the QR URL. +func spoolID(url string) string { + s := strings.TrimRight(url, "/") + if i := strings.LastIndex(s, "/"); i >= 0 { + return s[i+1:] + } + return s +} + +func decodeQR(img image.Image) (string, error) { + bmp, err := gozxing.NewBinaryBitmapFromImage(img) + if err != nil { + return "", err + } + res, err := qrcode.NewQRCodeReader().Decode(bmp, map[gozxing.DecodeHintType]interface{}{ + gozxing.DecodeHintType_TRY_HARDER: true, + }) + if err != nil { + return "", err + } + return res.GetText(), nil +} + +// weightPrompt primes a general vision model to read the scale. The hint about +// the housing clipping the tops of the digits is what stops a 7 (which has a +// top bar) from being misread as a 1 on this kind of LCD. +const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` + + `Read the weight on the scale's digital display, digit by digit, exactly as shown. ` + + `Note: the top edge of the display housing can come close to obscuring the TOPS of the digits, ` + + `so a digit that really has a top horizontal bar (like a 7) can momentarily look like a 1 — ` + + `look carefully for a faint top bar before deciding. ` + + `Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` + + `in the reading as "low", "medium", or "high". ` + + `Reason briefly, then on the LAST line output ONLY JSON: ` + + `{"weight": , "unit": "", "confidence": ""}.` + +type weightResult struct { + weight float64 + unit string + confidence float64 // overall: vote agreement weighted by model self-confidence + weights []float64 // each successful vote's numeric reading + modelConfidences []string // each successful vote's self-rating, as the model worded it +} + +// readWeight asks a vision model to read the display, taking the majority answer +// across a few independent reads for robustness. No image regions or digit-font +// geometry are assumed: the model does general OCR on the whole photo. The +// overall confidence is the fraction of reads that agreed with the winning +// answer, scaled by the average self-confidence the model reported for those +// agreeing reads (low/medium/high -> 1/3, 2/3, 3/3). +func readWeight(img image.Image, auth authInfo) (weightResult, error) { + const votes = 3 + type vote struct { + w float64 + u string + conf string + } + type outcome struct { + v vote + err error + } + run := func() outcome { + w, u, c, e := readWeightLLM(img, auth) + return outcome{vote{w, u, c}, e} + } + + // Run the first call alone so it populates the prompt cache, then fire the + // remaining (independent) votes concurrently to reuse that cache. + outcomes := make([]outcome, votes) + outcomes[0] = run() + var wg sync.WaitGroup + for i := 1; i < votes; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + outcomes[i] = run() + }(i) + } + wg.Wait() + + var got []vote + var lastErr error + for _, o := range outcomes { + if o.err != nil { + lastErr = o.err + continue + } + got = append(got, o.v) + } + if len(got) == 0 { + return weightResult{}, lastErr + } + + key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) } + tally := map[string]int{} + for _, v := range got { + tally[key(v.w, v.u)]++ + } + bestKey, bestN := "", -1 + for k, n := range tally { + if n > bestN { + bestKey, bestN = k, n + } + } + + res := weightResult{} + var winnerMC []float64 + for _, v := range got { + res.weights = append(res.weights, v.w) + res.modelConfidences = append(res.modelConfidences, v.conf) + if key(v.w, v.u) == bestKey { + res.weight, res.unit = v.w, v.u + winnerMC = append(winnerMC, modelConfidence(v.conf)) + } + } + agreement := float64(bestN) / float64(len(got)) + res.confidence = agreement * mean(winnerMC) + return res, nil +} + +func mean(xs []float64) float64 { + if len(xs) == 0 { + return 0 + } + var s float64 + for _, x := range xs { + s += x + } + return s / float64(len(xs)) +} + +// modelConfidence maps the model's self-rating to a fraction. +func modelConfidence(s string) float64 { + switch strings.ToLower(strings.TrimSpace(s)) { + case "high": + return 3.0 / 3 + case "medium": + return 2.0 / 3 + default: // "low" or anything unexpected -> treat conservatively + return 1.0 / 3 + } +} + +func readWeightLLM(img image.Image, auth authInfo) (float64, string, string, error) { + // Downscale only to satisfy the vision API's size limit — not a crop. + small := downscale(img, 1568) + var buf bytes.Buffer + if err := png.Encode(&buf, small); err != nil { + return 0, "", "", err + } + b64 := base64.StdEncoding.EncodeToString(buf.Bytes()) + + reqBody := map[string]interface{}{ + "model": "claude-opus-4-8", + "max_tokens": 600, + "messages": []map[string]interface{}{{ + "role": "user", + "content": []map[string]interface{}{ + {"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}}, + // Cache the image+prompt prefix so the repeated majority-vote calls + // (same image, same prompt) hit the cache instead of re-sending it. + {"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}}, + }, + }}, + } + if auth.system != "" { + reqBody["system"] = auth.system + } + body, _ := json.Marshal(reqBody) + req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body)) + if err != nil { + return 0, "", "", err + } + req.Header.Set("content-type", "application/json") + req.Header.Set("anthropic-version", "2023-06-01") + auth.apply(req) + + resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req) + if err != nil { + return 0, "", "", err + } + defer resp.Body.Close() + respBody, _ := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody))) + } + var parsed struct { + Content []struct { + Text string `json:"text"` + } `json:"content"` + } + if err := json.Unmarshal(respBody, &parsed); err != nil { + return 0, "", "", err + } + var text string + for _, c := range parsed.Content { + text += c.Text + } + var wr struct { + Weight float64 `json:"weight"` + Unit string `json:"unit"` + Confidence string `json:"confidence"` + } + if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil { + return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err) + } + return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil +} + +// lastJSONObject returns the final {...} object in s (the model's answer line). +func lastJSONObject(s string) string { + j := strings.LastIndex(s, "}") + if j < 0 { + return s + } + depth := 0 + for i := j; i >= 0; i-- { + switch s[i] { + case '}': + depth++ + case '{': + depth-- + if depth == 0 { + return s[i : j+1] + } + } + } + return s +} + +func downscale(img image.Image, max int) image.Image { + b := img.Bounds() + w, h := b.Dx(), b.Dy() + if w <= max && h <= max { + return img + } + scale := float64(max) / float64(w) + if h > w { + scale = float64(max) / float64(h) + } + dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale))) + xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil) + return dst +} + +type authInfo struct { + name, header, value, beta, system string +} + +func (a authInfo) apply(req *http.Request) { + req.Header.Set(a.header, a.value) + if a.beta != "" { + req.Header.Set("anthropic-beta", a.beta) + } +} + +func resolveAuth() (authInfo, error) { + if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" { + return authInfo{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil + } + bearer := func(name, tok string) authInfo { + return authInfo{ + name: name, + header: "authorization", value: "Bearer " + tok, + beta: "oauth-2025-04-20", + system: "You are Claude Code, Anthropic's official CLI for Claude.", + } + } + if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" { + return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil + } + if tok := claudeCodeOAuthToken(); tok != "" { + return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil + } + return authInfo{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN") +} + +func claudeCodeOAuthToken() string { + if runtime.GOOS != "darwin" { + return "" + } + out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output() + if err != nil { + return "" + } + var creds struct { + ClaudeAiOauth struct { + AccessToken string `json:"accessToken"` + } `json:"claudeAiOauth"` + } + if err := json.Unmarshal(out, &creds); err != nil { + return "" + } + return creds.ClaudeAiOauth.AccessToken +}