add ocr code
This commit is contained in:
5
ocr/README.md
Normal file
5
ocr/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
My first attempt was to use OCR,
|
||||||
|
but I was unable to get satisfying results quickly.
|
||||||
|
|
||||||
|
However, I keep the code here for future reference,
|
||||||
|
in case someone (even myself) might find it useful.
|
||||||
56
ocr/label_captchas_streamlit.py
Normal file
56
ocr/label_captchas_streamlit.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import os
|
||||||
|
import csv
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
"""Einfaches Streamlit-Tool zum manuellen Labeln von Captcha-Bildern."""
|
||||||
|
|
||||||
|
CAPTCHA_DIR = "./captchas"
|
||||||
|
CSV_FILE = "captcha_labels.csv"
|
||||||
|
|
||||||
|
def load_labeled():
|
||||||
|
if not os.path.exists(CSV_FILE):
|
||||||
|
return set()
|
||||||
|
with open(CSV_FILE, newline='') as f:
|
||||||
|
return set(row[0] for row in csv.reader(f))
|
||||||
|
|
||||||
|
def save_label(filename, label):
|
||||||
|
with open(CSV_FILE, "a", newline='') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow([filename, label])
|
||||||
|
|
||||||
|
def get_unlabeled_files():
|
||||||
|
labeled = load_labeled()
|
||||||
|
return [f for f in os.listdir(CAPTCHA_DIR) if f.endswith(".png") and f not in labeled]
|
||||||
|
|
||||||
|
st.title("Captcha Labeler")
|
||||||
|
|
||||||
|
files = get_unlabeled_files()
|
||||||
|
if not files:
|
||||||
|
st.success("Alle Captchas sind gelabelt!")
|
||||||
|
else:
|
||||||
|
if "idx" not in st.session_state:
|
||||||
|
st.session_state.idx = 0
|
||||||
|
if "flash" not in st.session_state:
|
||||||
|
st.session_state.flash = ""
|
||||||
|
if st.session_state.flash:
|
||||||
|
st.success(st.session_state.flash)
|
||||||
|
st.session_state.flash = "" # Nach Anzeige zurücksetzen
|
||||||
|
|
||||||
|
if st.button("Nächstes Captcha"):
|
||||||
|
st.session_state.idx += 1
|
||||||
|
st.rerun()
|
||||||
|
if st.session_state.idx >= len(files):
|
||||||
|
st.success("Alle Captchas sind gelabelt!")
|
||||||
|
else:
|
||||||
|
fname = files[st.session_state.idx]
|
||||||
|
img = Image.open(os.path.join(CAPTCHA_DIR, fname))
|
||||||
|
st.image(img, caption=fname)
|
||||||
|
with st.form(key=f"form_{fname}"):
|
||||||
|
label = st.text_input("Lösung eingeben", key=f"label_{fname}")
|
||||||
|
submitted = st.form_submit_button("Speichern")
|
||||||
|
if submitted and label.strip():
|
||||||
|
save_label(fname, label.strip())
|
||||||
|
st.session_state.flash = f"Gespeichert: {fname} -> {label.strip()}"
|
||||||
|
st.session_state.idx += 1
|
||||||
|
st.rerun()
|
||||||
34
ocr/recognize.py
Normal file
34
ocr/recognize.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from PIL import Image
|
||||||
|
import pytesseract
|
||||||
|
import sys
|
||||||
|
|
||||||
|
"""OCR für Captcha-Bilder mit Tesseract only"""
|
||||||
|
|
||||||
|
# TEST config
|
||||||
|
custom_config = r'--psm 7 -c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyz123456890"'
|
||||||
|
|
||||||
|
def recognize_captcha(img_path):
|
||||||
|
im = Image.open(img_path).convert("L")
|
||||||
|
# 1. threshold the image
|
||||||
|
threshold = 150
|
||||||
|
table = []
|
||||||
|
for i in range(256):
|
||||||
|
if i < threshold:
|
||||||
|
table.append(0)
|
||||||
|
else:
|
||||||
|
table.append(1)
|
||||||
|
|
||||||
|
out = im.point(table, '1')
|
||||||
|
#out.show()
|
||||||
|
# 2. recognize with tesseract
|
||||||
|
num = pytesseract.image_to_string(out, config=custom_config)
|
||||||
|
return num
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python recognize.py <image_filename>")
|
||||||
|
res = recognize_captcha(sys.argv[1])
|
||||||
|
strs = res.split("\n")
|
||||||
|
if len(strs) >=1:
|
||||||
|
print(strs[0])
|
||||||
50
ocr/recognize2.py
Normal file
50
ocr/recognize2.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import cv2
|
||||||
|
import pytesseract
|
||||||
|
|
||||||
|
"""OCR für Captcha-Bilder mit OpenCV und Tesseract"""
|
||||||
|
|
||||||
|
#img_path = "captcha.png"
|
||||||
|
#img_path = "samples/sample_1-Dateien/1769810847305.jpg"
|
||||||
|
#img_path = "samples/sample_2-Dateien/1769811067589.jpg"
|
||||||
|
#img_path = "samples/sample_3_files/1769812197128.jpg"
|
||||||
|
img_path = "samples/sample_4-Dateien/1769818949905.jpg"
|
||||||
|
|
||||||
|
# Bild laden
|
||||||
|
img = cv2.imread(img_path)
|
||||||
|
|
||||||
|
# Graustufen
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# leichtes Blur, um Grid-Rauschen zu reduzieren
|
||||||
|
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||||
|
|
||||||
|
# Threshold (Otsu)
|
||||||
|
_, thresh = cv2.threshold(
|
||||||
|
gray, 0, 255,
|
||||||
|
cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
||||||
|
)
|
||||||
|
|
||||||
|
###
|
||||||
|
# Invertieren (falls nötig)
|
||||||
|
#thresh = cv2.bitwise_not(thresh)
|
||||||
|
|
||||||
|
# Morphologie, um Zeichen zu schließen
|
||||||
|
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||||
|
#thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
# save the processed image for debugging
|
||||||
|
cv2.imwrite("processed_captcha.png", thresh)
|
||||||
|
|
||||||
|
# Tesseract-Config
|
||||||
|
custom_config = r"""
|
||||||
|
--oem 3
|
||||||
|
--psm 7
|
||||||
|
-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Text erkennen
|
||||||
|
text = pytesseract.image_to_string(thresh, config=custom_config)
|
||||||
|
|
||||||
|
print(text.strip())
|
||||||
179
ocr/recognize3.py
Normal file
179
ocr/recognize3.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
from PIL import Image, ImageOps, ImageFilter
|
||||||
|
import pytesseract
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from tqdm import tqdm
|
||||||
|
import Levenshtein
|
||||||
|
|
||||||
|
"""
|
||||||
|
OCR für Captcha-Bilder mit verschiedenen Vorverarbeitungen und Tesseract-Konfigurationen
|
||||||
|
|
||||||
|
Es werden verschiedene Kombinationen von Bildvorverarbeitungen (teils mit OpenCV)
|
||||||
|
und Tesseract-Konfigurationen getestet, um die beste Erkennungsgenauigkeit zu ermitteln.
|
||||||
|
"""
|
||||||
|
|
||||||
|
CAPTCHA_DIR = "./captchas"
|
||||||
|
CSV_FILE = "captcha_labels.csv"
|
||||||
|
|
||||||
|
# --- Base preprocessing steps ---
|
||||||
|
def to_grayscale(img):
|
||||||
|
return img.convert("L")
|
||||||
|
|
||||||
|
def to_bw(img, threshold=140):
|
||||||
|
return img.convert("L").point(lambda x: 0 if x < threshold else 255, '1')
|
||||||
|
|
||||||
|
def invert(img):
|
||||||
|
return ImageOps.invert(img.convert("L"))
|
||||||
|
|
||||||
|
def sharpen(img):
|
||||||
|
if img.mode not in ("L", "RGB"):
|
||||||
|
img = img.convert("L")
|
||||||
|
return img.filter(ImageFilter.SHARPEN)
|
||||||
|
|
||||||
|
def blur(img):
|
||||||
|
if img.mode not in ("L", "RGB"):
|
||||||
|
img = img.convert("L")
|
||||||
|
return img.filter(ImageFilter.GaussianBlur(1))
|
||||||
|
|
||||||
|
def resize2x(img):
|
||||||
|
return img.resize((img.width * 2, img.height * 2), Image.LANCZOS)
|
||||||
|
|
||||||
|
def crop_left_2_3(img):
|
||||||
|
w, h = img.size
|
||||||
|
return img.crop((0, 0, int(w * 2 / 3), h))
|
||||||
|
|
||||||
|
def opencv_adaptive_thresh(img):
|
||||||
|
img_np = np.array(img.convert("L"))
|
||||||
|
th = cv2.adaptiveThreshold(img_np, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
|
cv2.THRESH_BINARY, 11, 2)
|
||||||
|
return Image.fromarray(th)
|
||||||
|
|
||||||
|
def opencv_otsu(img):
|
||||||
|
img_np = np.array(img.convert("L"))
|
||||||
|
_, th = cv2.threshold(img_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||||
|
return Image.fromarray(th)
|
||||||
|
|
||||||
|
def opencv_erode(img):
|
||||||
|
img_np = np.array(img.convert("L"))
|
||||||
|
kernel = np.ones((2,2), np.uint8)
|
||||||
|
eroded = cv2.erode(img_np, kernel, iterations=1)
|
||||||
|
return Image.fromarray(eroded)
|
||||||
|
|
||||||
|
def opencv_dilate(img):
|
||||||
|
img_np = np.array(img.convert("L"))
|
||||||
|
kernel = np.ones((2,2), np.uint8)
|
||||||
|
dilated = cv2.dilate(img_np, kernel, iterations=1)
|
||||||
|
return Image.fromarray(dilated)
|
||||||
|
|
||||||
|
def opencv_contrast(img):
|
||||||
|
img_np = np.array(img.convert("L"))
|
||||||
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
||||||
|
contrast = clahe.apply(img_np)
|
||||||
|
return Image.fromarray(contrast)
|
||||||
|
|
||||||
|
# --- Compose preprocessing steps ---
|
||||||
|
def compose(*funcs):
|
||||||
|
def composed(img):
|
||||||
|
for f in funcs:
|
||||||
|
img = f(img)
|
||||||
|
return img
|
||||||
|
return composed
|
||||||
|
|
||||||
|
# --- Define base steps and generate combinations ---
|
||||||
|
base_steps = {
|
||||||
|
"none": lambda img: img,
|
||||||
|
"grayscale": to_grayscale,
|
||||||
|
"bw": to_bw,
|
||||||
|
"invert": invert,
|
||||||
|
"sharpen": sharpen,
|
||||||
|
"blur": blur,
|
||||||
|
"resize2x": resize2x,
|
||||||
|
"crop_left_2_3": crop_left_2_3,
|
||||||
|
"opencv_adaptive_thresh": opencv_adaptive_thresh,
|
||||||
|
"opencv_otsu": opencv_otsu,
|
||||||
|
"opencv_erode": opencv_erode,
|
||||||
|
"opencv_dilate": opencv_dilate,
|
||||||
|
"opencv_contrast": opencv_contrast,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate all 1-step and 2-step combinations (excluding "none" as a second step)
|
||||||
|
preprocessings = {}
|
||||||
|
for name, func in base_steps.items():
|
||||||
|
preprocessings[name] = func
|
||||||
|
|
||||||
|
for (name1, func1), (name2, func2) in itertools.product(base_steps.items(), base_steps.items()):
|
||||||
|
if name1 != name2 and name2 != "none":
|
||||||
|
combo_name = f"{name1}+{name2}"
|
||||||
|
preprocessings[combo_name] = compose(func1, func2)
|
||||||
|
|
||||||
|
# Characters present in your labels:
|
||||||
|
whitelist = "23456789abcdefghijklmnopqrstuvwxyz"
|
||||||
|
|
||||||
|
tess_configs = [
|
||||||
|
'--psm 7',
|
||||||
|
'--psm 8',
|
||||||
|
'--psm 6',
|
||||||
|
'--psm 13',
|
||||||
|
'--psm 7 --oem 1',
|
||||||
|
'--psm 7 --oem 3',
|
||||||
|
f'--psm 7 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 8 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 6 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 13 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 7 --oem 1 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 7 --oem 3 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 8 --oem 1 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 8 --oem 3 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 6 --oem 1 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 6 --oem 3 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 13 --oem 1 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
f'--psm 13 --oem 3 -c tessedit_char_whitelist={whitelist}',
|
||||||
|
]
|
||||||
|
|
||||||
|
with open(CSV_FILE, newline='') as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
data = list(reader)[:10] # Nur die ersten 10 Bilder verwenden
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for pname, pfunc in tqdm(preprocessings.items(), desc="Preprocessing variants"):
|
||||||
|
for config in tess_configs:
|
||||||
|
fuzzy_score_sum = 0
|
||||||
|
total = 0
|
||||||
|
for fname, label in data:
|
||||||
|
img_path = os.path.join(CAPTCHA_DIR, fname)
|
||||||
|
if not os.path.exists(img_path):
|
||||||
|
continue
|
||||||
|
img = Image.open(img_path)
|
||||||
|
img = pfunc(img)
|
||||||
|
pred = pytesseract.image_to_string(img, config=config).strip().lower().replace(" ", "")
|
||||||
|
# Fuzzy score: 1.0 = perfect, 0.0 = completely wrong
|
||||||
|
if max(len(label), len(pred)) > 0:
|
||||||
|
fuzzy_score = 1 - Levenshtein.distance(label, pred) / max(len(label), len(pred))
|
||||||
|
else:
|
||||||
|
fuzzy_score = 0
|
||||||
|
fuzzy_score_sum += fuzzy_score
|
||||||
|
total += 1
|
||||||
|
avg_fuzzy_score = fuzzy_score_sum / total if total else 0
|
||||||
|
results.append(( pname, config, avg_fuzzy_score))
|
||||||
|
print(f"Preprocessing: {pname}, Config: {config}, Avg fuzzy score: {avg_fuzzy_score:.3f}")
|
||||||
|
|
||||||
|
# Sort results by accuracy (descending)
|
||||||
|
results_sorted = sorted(results, key=lambda x: x[2], reverse=True)
|
||||||
|
|
||||||
|
print("\nTop 5 combinations:")
|
||||||
|
for i, (pname, config, acc) in enumerate(results_sorted[:5], 1):
|
||||||
|
print(f"{i}. Preprocessing: {pname}, Config: {config}, Accuracy: {acc:.3f}")
|
||||||
|
|
||||||
|
# Output all results to a CSV file, ordered by accuracy DESC
|
||||||
|
output_csv = "recognize3_results.csv"
|
||||||
|
with open(output_csv, "w", newline='') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(["preprocessing", "tesseract_config", "accuracy"])
|
||||||
|
for pname, config, acc in results_sorted:
|
||||||
|
writer.writerow([pname, config, acc])
|
||||||
|
|
||||||
|
print(f"\nFull results written to {output_csv}")
|
||||||
2827
ocr/recognize3_results.csv
Normal file
2827
ocr/recognize3_results.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user