498 lines
13 KiB
Go
498 lines
13 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"image"
|
|
_ "image/jpeg"
|
|
"image/png"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/makiuchi-d/gozxing"
|
|
"github.com/makiuchi-d/gozxing/qrcode"
|
|
xdraw "golang.org/x/image/draw"
|
|
"spoolweight/spooldb"
|
|
)
|
|
|
|
type result struct {
|
|
Image string `json:"image"`
|
|
SpoolID string `json:"spool_id,omitempty"`
|
|
URL string `json:"url,omitempty"`
|
|
Location string `json:"location,omitempty"`
|
|
Weight *float64 `json:"weight,omitempty"`
|
|
Unit string `json:"unit,omitempty"`
|
|
Confidence *float64 `json:"confidence,omitempty"`
|
|
Weights []float64 `json:"weights,omitempty"`
|
|
ModelConfidences []string `json:"model_confidences,omitempty"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// spoolURLPrefix is the only QR payload we accept. Anything else (a different
|
|
// domain, or a /f/ filament link rather than a /s/ spool link) is rejected.
|
|
const spoolURLPrefix = "https://spooldb.com/s/"
|
|
|
|
func main() {
|
|
log.SetFlags(0)
|
|
verbose := flag.Bool("v", false, "print the auth mechanism being used to stderr")
|
|
flag.Usage = func() {
|
|
log.Printf("usage: %s [-v] <image>...", os.Args[0])
|
|
flag.PrintDefaults()
|
|
}
|
|
flag.Parse()
|
|
if flag.NArg() < 1 {
|
|
flag.Usage()
|
|
os.Exit(2)
|
|
}
|
|
|
|
auth, err := resolveAuth()
|
|
if err != nil {
|
|
fail("resolve credentials: %v", err)
|
|
}
|
|
if *verbose {
|
|
log.Printf("auth: %s", auth.name)
|
|
}
|
|
|
|
locator := &spoolLocator{}
|
|
defer locator.close()
|
|
|
|
results := make([]result, 0, flag.NArg())
|
|
for _, path := range flag.Args() {
|
|
results = append(results, processImage(path, auth, locator))
|
|
}
|
|
|
|
enc := json.NewEncoder(os.Stdout)
|
|
enc.SetIndent("", " ")
|
|
enc.Encode(results)
|
|
}
|
|
|
|
// spoolLocator lazily logs into spooldb on first use, so its (slow) browser
|
|
// login overlaps with the first image's weight read rather than blocking it.
|
|
// Location lookup is best-effort: any failure is logged and the location is
|
|
// simply omitted from the output.
|
|
type spoolLocator struct {
|
|
once sync.Once
|
|
client *spooldb.Client
|
|
err error
|
|
}
|
|
|
|
func (s *spoolLocator) login() (*spooldb.Client, error) {
|
|
s.once.Do(func() {
|
|
user, pass := os.Getenv("SPOOLDB_USER"), os.Getenv("SPOOLDB_PASS")
|
|
if user == "" || pass == "" {
|
|
s.err = fmt.Errorf("SPOOLDB_USER/SPOOLDB_PASS not set")
|
|
return
|
|
}
|
|
c, err := spooldb.New()
|
|
if err != nil {
|
|
s.err = err
|
|
return
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
|
|
defer cancel()
|
|
if err := c.Login(ctx, user, pass); err != nil {
|
|
c.Close()
|
|
s.err = fmt.Errorf("login: %w", err)
|
|
return
|
|
}
|
|
s.client = c
|
|
})
|
|
return s.client, s.err
|
|
}
|
|
|
|
// location returns the spool's storage location, or "" if it can't be looked up.
|
|
func (s *spoolLocator) location(spoolID string) string {
|
|
client, err := s.login()
|
|
if err != nil {
|
|
log.Printf("spooldb: skipping location lookup: %v", err)
|
|
return ""
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
loc, err := client.SpoolLocation(ctx, spoolID)
|
|
if err != nil {
|
|
log.Printf("spooldb: location lookup for %s: %v", spoolID, err)
|
|
return ""
|
|
}
|
|
return loc
|
|
}
|
|
|
|
func (s *spoolLocator) close() {
|
|
if s.client != nil {
|
|
s.client.Close()
|
|
}
|
|
}
|
|
|
|
// processImage reads one photo, capturing any failure in the result's Error
|
|
// field so a single bad image doesn't abort the whole batch.
|
|
func processImage(path string, auth authInfo, locator *spoolLocator) result {
|
|
r := result{Image: path}
|
|
|
|
img, err := loadImage(path)
|
|
if err != nil {
|
|
r.Error = fmt.Sprintf("load image: %v", err)
|
|
return r
|
|
}
|
|
|
|
url, err := decodeQR(img)
|
|
if err != nil {
|
|
r.Error = fmt.Sprintf("read QR code: %v", err)
|
|
return r
|
|
}
|
|
if !strings.HasPrefix(url, spoolURLPrefix) {
|
|
r.Error = fmt.Sprintf("not a spool QR code (expected %s...): %s", spoolURLPrefix, url)
|
|
return r
|
|
}
|
|
r.SpoolID = spoolID(url)
|
|
r.URL = url
|
|
|
|
// Look up the spool's location concurrently with the (slower) weight read.
|
|
var loc string
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
loc = locator.location(r.SpoolID)
|
|
}()
|
|
|
|
w, err := readWeight(img, auth)
|
|
wg.Wait()
|
|
r.Location = loc
|
|
|
|
if err != nil {
|
|
r.Error = fmt.Sprintf("read weight: %v", err)
|
|
return r
|
|
}
|
|
if w.unit == "lb" || w.unit == "oz" {
|
|
r.Error = fmt.Sprintf("scale is set to imperial units (%s); switch it to grams", w.unit)
|
|
return r
|
|
}
|
|
r.Weight = &w.weight
|
|
r.Unit = w.unit
|
|
r.Confidence = &w.confidence
|
|
r.Weights = w.weights
|
|
r.ModelConfidences = w.modelConfidences
|
|
return r
|
|
}
|
|
|
|
func fail(format string, a ...interface{}) {
|
|
log.Fatalf("error: "+format, a...)
|
|
}
|
|
|
|
func loadImage(path string) (image.Image, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
img, _, err := image.Decode(f)
|
|
return img, err
|
|
}
|
|
|
|
// spoolID returns the last non-empty path segment of the QR URL.
|
|
func spoolID(url string) string {
|
|
s := strings.TrimRight(url, "/")
|
|
if i := strings.LastIndex(s, "/"); i >= 0 {
|
|
return s[i+1:]
|
|
}
|
|
return s
|
|
}
|
|
|
|
func decodeQR(img image.Image) (string, error) {
|
|
bmp, err := gozxing.NewBinaryBitmapFromImage(img)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
res, err := qrcode.NewQRCodeReader().Decode(bmp, map[gozxing.DecodeHintType]interface{}{
|
|
gozxing.DecodeHintType_TRY_HARDER: true,
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return res.GetText(), nil
|
|
}
|
|
|
|
// weightPrompt primes a general vision model to read the scale. The hint about
|
|
// the housing clipping the tops of the digits is what stops a 7 (which has a
|
|
// top bar) from being misread as a 1 on this kind of LCD.
|
|
const weightPrompt = `This photo shows a 3D printer filament spool sitting on a digital kitchen scale. ` +
|
|
`Read the weight on the scale's digital display, digit by digit, exactly as shown. ` +
|
|
`Note: the top edge of the display housing can come close to obscuring the TOPS of the digits, ` +
|
|
`so a digit that really has a top horizontal bar (like a 7) can momentarily look like a 1 — ` +
|
|
`look carefully for a faint top bar before deciding. ` +
|
|
`Report the unit shown on the display (g, kg, lb, or oz), and rate your own confidence ` +
|
|
`in the reading as "low", "medium", or "high". ` +
|
|
`Reason briefly, then on the LAST line output ONLY JSON: ` +
|
|
`{"weight": <number>, "unit": "<g|kg|lb|oz>", "confidence": "<low|medium|high>"}.`
|
|
|
|
type weightResult struct {
|
|
weight float64
|
|
unit string
|
|
confidence float64 // overall: vote agreement weighted by model self-confidence
|
|
weights []float64 // each successful vote's numeric reading
|
|
modelConfidences []string // each successful vote's self-rating, as the model worded it
|
|
}
|
|
|
|
// readWeight asks a vision model to read the display, taking the majority answer
|
|
// across a few independent reads for robustness. No image regions or digit-font
|
|
// geometry are assumed: the model does general OCR on the whole photo. The
|
|
// overall confidence is the fraction of reads that agreed with the winning
|
|
// answer, scaled by the average self-confidence the model reported for those
|
|
// agreeing reads (low/medium/high -> 1/3, 2/3, 3/3).
|
|
func readWeight(img image.Image, auth authInfo) (weightResult, error) {
|
|
const votes = 3
|
|
type vote struct {
|
|
w float64
|
|
u string
|
|
conf string
|
|
}
|
|
type outcome struct {
|
|
v vote
|
|
err error
|
|
}
|
|
run := func() outcome {
|
|
w, u, c, e := readWeightLLM(img, auth)
|
|
return outcome{vote{w, u, c}, e}
|
|
}
|
|
|
|
// Run the first call alone so it populates the prompt cache, then fire the
|
|
// remaining (independent) votes concurrently to reuse that cache.
|
|
outcomes := make([]outcome, votes)
|
|
outcomes[0] = run()
|
|
var wg sync.WaitGroup
|
|
for i := 1; i < votes; i++ {
|
|
wg.Add(1)
|
|
go func(i int) {
|
|
defer wg.Done()
|
|
outcomes[i] = run()
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
var got []vote
|
|
var lastErr error
|
|
for _, o := range outcomes {
|
|
if o.err != nil {
|
|
lastErr = o.err
|
|
continue
|
|
}
|
|
got = append(got, o.v)
|
|
}
|
|
if len(got) == 0 {
|
|
return weightResult{}, lastErr
|
|
}
|
|
|
|
key := func(w float64, u string) string { return fmt.Sprintf("%g|%s", w, u) }
|
|
tally := map[string]int{}
|
|
for _, v := range got {
|
|
tally[key(v.w, v.u)]++
|
|
}
|
|
bestKey, bestN := "", -1
|
|
for k, n := range tally {
|
|
if n > bestN {
|
|
bestKey, bestN = k, n
|
|
}
|
|
}
|
|
|
|
res := weightResult{}
|
|
var winnerMC []float64
|
|
for _, v := range got {
|
|
res.weights = append(res.weights, v.w)
|
|
res.modelConfidences = append(res.modelConfidences, v.conf)
|
|
if key(v.w, v.u) == bestKey {
|
|
res.weight, res.unit = v.w, v.u
|
|
winnerMC = append(winnerMC, modelConfidence(v.conf))
|
|
}
|
|
}
|
|
agreement := float64(bestN) / float64(len(got))
|
|
res.confidence = agreement * mean(winnerMC)
|
|
return res, nil
|
|
}
|
|
|
|
func mean(xs []float64) float64 {
|
|
if len(xs) == 0 {
|
|
return 0
|
|
}
|
|
var s float64
|
|
for _, x := range xs {
|
|
s += x
|
|
}
|
|
return s / float64(len(xs))
|
|
}
|
|
|
|
// modelConfidence maps the model's self-rating to a fraction.
|
|
func modelConfidence(s string) float64 {
|
|
switch strings.ToLower(strings.TrimSpace(s)) {
|
|
case "high":
|
|
return 3.0 / 3
|
|
case "medium":
|
|
return 2.0 / 3
|
|
default: // "low" or anything unexpected -> treat conservatively
|
|
return 1.0 / 3
|
|
}
|
|
}
|
|
|
|
func readWeightLLM(img image.Image, auth authInfo) (float64, string, string, error) {
|
|
// Downscale only to satisfy the vision API's size limit — not a crop.
|
|
small := downscale(img, 1568)
|
|
var buf bytes.Buffer
|
|
if err := png.Encode(&buf, small); err != nil {
|
|
return 0, "", "", err
|
|
}
|
|
b64 := base64.StdEncoding.EncodeToString(buf.Bytes())
|
|
|
|
reqBody := map[string]interface{}{
|
|
"model": "claude-opus-4-8",
|
|
"max_tokens": 600,
|
|
"messages": []map[string]interface{}{{
|
|
"role": "user",
|
|
"content": []map[string]interface{}{
|
|
{"type": "image", "source": map[string]interface{}{"type": "base64", "media_type": "image/png", "data": b64}},
|
|
// Cache the image+prompt prefix so the repeated majority-vote calls
|
|
// (same image, same prompt) hit the cache instead of re-sending it.
|
|
{"type": "text", "text": weightPrompt, "cache_control": map[string]string{"type": "ephemeral"}},
|
|
},
|
|
}},
|
|
}
|
|
if auth.system != "" {
|
|
reqBody["system"] = auth.system
|
|
}
|
|
body, _ := json.Marshal(reqBody)
|
|
req, err := http.NewRequest("POST", "https://api.anthropic.com/v1/messages", bytes.NewReader(body))
|
|
if err != nil {
|
|
return 0, "", "", err
|
|
}
|
|
req.Header.Set("content-type", "application/json")
|
|
req.Header.Set("anthropic-version", "2023-06-01")
|
|
auth.apply(req)
|
|
|
|
resp, err := (&http.Client{Timeout: 90 * time.Second}).Do(req)
|
|
if err != nil {
|
|
return 0, "", "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
if resp.StatusCode != 200 {
|
|
return 0, "", "", fmt.Errorf("API %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
|
}
|
|
var parsed struct {
|
|
Content []struct {
|
|
Text string `json:"text"`
|
|
} `json:"content"`
|
|
}
|
|
if err := json.Unmarshal(respBody, &parsed); err != nil {
|
|
return 0, "", "", err
|
|
}
|
|
var text string
|
|
for _, c := range parsed.Content {
|
|
text += c.Text
|
|
}
|
|
var wr struct {
|
|
Weight float64 `json:"weight"`
|
|
Unit string `json:"unit"`
|
|
Confidence string `json:"confidence"`
|
|
}
|
|
if err := json.Unmarshal([]byte(lastJSONObject(text)), &wr); err != nil {
|
|
return 0, "", "", fmt.Errorf("parse model reply %q: %v", text, err)
|
|
}
|
|
return wr.Weight, strings.ToLower(strings.TrimSpace(wr.Unit)), strings.TrimSpace(wr.Confidence), nil
|
|
}
|
|
|
|
// lastJSONObject returns the final {...} object in s (the model's answer line).
|
|
func lastJSONObject(s string) string {
|
|
j := strings.LastIndex(s, "}")
|
|
if j < 0 {
|
|
return s
|
|
}
|
|
depth := 0
|
|
for i := j; i >= 0; i-- {
|
|
switch s[i] {
|
|
case '}':
|
|
depth++
|
|
case '{':
|
|
depth--
|
|
if depth == 0 {
|
|
return s[i : j+1]
|
|
}
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
func downscale(img image.Image, max int) image.Image {
|
|
b := img.Bounds()
|
|
w, h := b.Dx(), b.Dy()
|
|
if w <= max && h <= max {
|
|
return img
|
|
}
|
|
scale := float64(max) / float64(w)
|
|
if h > w {
|
|
scale = float64(max) / float64(h)
|
|
}
|
|
dst := image.NewRGBA(image.Rect(0, 0, int(float64(w)*scale), int(float64(h)*scale)))
|
|
xdraw.CatmullRom.Scale(dst, dst.Bounds(), img, b, xdraw.Over, nil)
|
|
return dst
|
|
}
|
|
|
|
type authInfo struct {
|
|
name, header, value, beta, system string
|
|
}
|
|
|
|
func (a authInfo) apply(req *http.Request) {
|
|
req.Header.Set(a.header, a.value)
|
|
if a.beta != "" {
|
|
req.Header.Set("anthropic-beta", a.beta)
|
|
}
|
|
}
|
|
|
|
func resolveAuth() (authInfo, error) {
|
|
if k := os.Getenv("ANTHROPIC_API_KEY"); k != "" {
|
|
return authInfo{name: "ANTHROPIC_API_KEY (x-api-key)", header: "x-api-key", value: k}, nil
|
|
}
|
|
bearer := func(name, tok string) authInfo {
|
|
return authInfo{
|
|
name: name,
|
|
header: "authorization", value: "Bearer " + tok,
|
|
beta: "oauth-2025-04-20",
|
|
system: "You are Claude Code, Anthropic's official CLI for Claude.",
|
|
}
|
|
}
|
|
if t := os.Getenv("ANTHROPIC_AUTH_TOKEN"); t != "" {
|
|
return bearer("ANTHROPIC_AUTH_TOKEN (Bearer)", t), nil
|
|
}
|
|
if tok := claudeCodeOAuthToken(); tok != "" {
|
|
return bearer("Claude Code OAuth, macOS keychain (Bearer)", tok), nil
|
|
}
|
|
return authInfo{}, fmt.Errorf("no credentials: set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN")
|
|
}
|
|
|
|
func claudeCodeOAuthToken() string {
|
|
if runtime.GOOS != "darwin" {
|
|
return ""
|
|
}
|
|
out, err := exec.Command("security", "find-generic-password", "-s", "Claude Code-credentials", "-w").Output()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
var creds struct {
|
|
ClaudeAiOauth struct {
|
|
AccessToken string `json:"accessToken"`
|
|
} `json:"claudeAiOauth"`
|
|
}
|
|
if err := json.Unmarshal(out, &creds); err != nil {
|
|
return ""
|
|
}
|
|
return creds.ClaudeAiOauth.AccessToken
|
|
}
|