add ocr code
This commit is contained in:
56
ocr/label_captchas_streamlit.py
Normal file
56
ocr/label_captchas_streamlit.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
import csv
|
||||
from PIL import Image
|
||||
|
||||
"""Einfaches Streamlit-Tool zum manuellen Labeln von Captcha-Bildern."""
|
||||
|
||||
CAPTCHA_DIR = "./captchas"
|
||||
CSV_FILE = "captcha_labels.csv"
|
||||
|
||||
def load_labeled():
|
||||
if not os.path.exists(CSV_FILE):
|
||||
return set()
|
||||
with open(CSV_FILE, newline='') as f:
|
||||
return set(row[0] for row in csv.reader(f))
|
||||
|
||||
def save_label(filename, label):
|
||||
with open(CSV_FILE, "a", newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([filename, label])
|
||||
|
||||
def get_unlabeled_files():
|
||||
labeled = load_labeled()
|
||||
return [f for f in os.listdir(CAPTCHA_DIR) if f.endswith(".png") and f not in labeled]
|
||||
|
||||
st.title("Captcha Labeler")
|
||||
|
||||
files = get_unlabeled_files()
|
||||
if not files:
|
||||
st.success("Alle Captchas sind gelabelt!")
|
||||
else:
|
||||
if "idx" not in st.session_state:
|
||||
st.session_state.idx = 0
|
||||
if "flash" not in st.session_state:
|
||||
st.session_state.flash = ""
|
||||
if st.session_state.flash:
|
||||
st.success(st.session_state.flash)
|
||||
st.session_state.flash = "" # Nach Anzeige zurücksetzen
|
||||
|
||||
if st.button("Nächstes Captcha"):
|
||||
st.session_state.idx += 1
|
||||
st.rerun()
|
||||
if st.session_state.idx >= len(files):
|
||||
st.success("Alle Captchas sind gelabelt!")
|
||||
else:
|
||||
fname = files[st.session_state.idx]
|
||||
img = Image.open(os.path.join(CAPTCHA_DIR, fname))
|
||||
st.image(img, caption=fname)
|
||||
with st.form(key=f"form_{fname}"):
|
||||
label = st.text_input("Lösung eingeben", key=f"label_{fname}")
|
||||
submitted = st.form_submit_button("Speichern")
|
||||
if submitted and label.strip():
|
||||
save_label(fname, label.strip())
|
||||
st.session_state.flash = f"Gespeichert: {fname} -> {label.strip()}"
|
||||
st.session_state.idx += 1
|
||||
st.rerun()
|
||||
Reference in New Issue
Block a user