add ocr code

This commit is contained in:
2026-02-03 00:52:49 +01:00
parent 33cd5346bf
commit 7367226210
6 changed files with 3151 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
import streamlit as st
import os
import csv
from PIL import Image
"""Einfaches Streamlit-Tool zum manuellen Labeln von Captcha-Bildern."""
CAPTCHA_DIR = "./captchas"
CSV_FILE = "captcha_labels.csv"
def load_labeled():
if not os.path.exists(CSV_FILE):
return set()
with open(CSV_FILE, newline='') as f:
return set(row[0] for row in csv.reader(f))
def save_label(filename, label):
with open(CSV_FILE, "a", newline='') as f:
writer = csv.writer(f)
writer.writerow([filename, label])
def get_unlabeled_files():
labeled = load_labeled()
return [f for f in os.listdir(CAPTCHA_DIR) if f.endswith(".png") and f not in labeled]
st.title("Captcha Labeler")
files = get_unlabeled_files()
if not files:
st.success("Alle Captchas sind gelabelt!")
else:
if "idx" not in st.session_state:
st.session_state.idx = 0
if "flash" not in st.session_state:
st.session_state.flash = ""
if st.session_state.flash:
st.success(st.session_state.flash)
st.session_state.flash = "" # Nach Anzeige zurücksetzen
if st.button("Nächstes Captcha"):
st.session_state.idx += 1
st.rerun()
if st.session_state.idx >= len(files):
st.success("Alle Captchas sind gelabelt!")
else:
fname = files[st.session_state.idx]
img = Image.open(os.path.join(CAPTCHA_DIR, fname))
st.image(img, caption=fname)
with st.form(key=f"form_{fname}"):
label = st.text_input("Lösung eingeben", key=f"label_{fname}")
submitted = st.form_submit_button("Speichern")
if submitted and label.strip():
save_label(fname, label.strip())
st.session_state.flash = f"Gespeichert: {fname} -> {label.strip()}"
st.session_state.idx += 1
st.rerun()