import csv import os from PIL import Image, ImageOps, ImageFilter import pytesseract import cv2 import numpy as np import itertools from tqdm import tqdm import Levenshtein """ OCR für Captcha-Bilder mit verschiedenen Vorverarbeitungen und Tesseract-Konfigurationen Es werden verschiedene Kombinationen von Bildvorverarbeitungen (teils mit OpenCV) und Tesseract-Konfigurationen getestet, um die beste Erkennungsgenauigkeit zu ermitteln. """ CAPTCHA_DIR = "./captchas" CSV_FILE = "captcha_labels.csv" # --- Base preprocessing steps --- def to_grayscale(img): return img.convert("L") def to_bw(img, threshold=140): return img.convert("L").point(lambda x: 0 if x < threshold else 255, '1') def invert(img): return ImageOps.invert(img.convert("L")) def sharpen(img): if img.mode not in ("L", "RGB"): img = img.convert("L") return img.filter(ImageFilter.SHARPEN) def blur(img): if img.mode not in ("L", "RGB"): img = img.convert("L") return img.filter(ImageFilter.GaussianBlur(1)) def resize2x(img): return img.resize((img.width * 2, img.height * 2), Image.LANCZOS) def crop_left_2_3(img): w, h = img.size return img.crop((0, 0, int(w * 2 / 3), h)) def opencv_adaptive_thresh(img): img_np = np.array(img.convert("L")) th = cv2.adaptiveThreshold(img_np, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) return Image.fromarray(th) def opencv_otsu(img): img_np = np.array(img.convert("L")) _, th = cv2.threshold(img_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return Image.fromarray(th) def opencv_erode(img): img_np = np.array(img.convert("L")) kernel = np.ones((2,2), np.uint8) eroded = cv2.erode(img_np, kernel, iterations=1) return Image.fromarray(eroded) def opencv_dilate(img): img_np = np.array(img.convert("L")) kernel = np.ones((2,2), np.uint8) dilated = cv2.dilate(img_np, kernel, iterations=1) return Image.fromarray(dilated) def opencv_contrast(img): img_np = np.array(img.convert("L")) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) contrast = clahe.apply(img_np) return Image.fromarray(contrast) # --- Compose preprocessing steps --- def compose(*funcs): def composed(img): for f in funcs: img = f(img) return img return composed # --- Define base steps and generate combinations --- base_steps = { "none": lambda img: img, "grayscale": to_grayscale, "bw": to_bw, "invert": invert, "sharpen": sharpen, "blur": blur, "resize2x": resize2x, "crop_left_2_3": crop_left_2_3, "opencv_adaptive_thresh": opencv_adaptive_thresh, "opencv_otsu": opencv_otsu, "opencv_erode": opencv_erode, "opencv_dilate": opencv_dilate, "opencv_contrast": opencv_contrast, } # Generate all 1-step and 2-step combinations (excluding "none" as a second step) preprocessings = {} for name, func in base_steps.items(): preprocessings[name] = func for (name1, func1), (name2, func2) in itertools.product(base_steps.items(), base_steps.items()): if name1 != name2 and name2 != "none": combo_name = f"{name1}+{name2}" preprocessings[combo_name] = compose(func1, func2) # Characters present in your labels: whitelist = "23456789abcdefghijklmnopqrstuvwxyz" tess_configs = [ '--psm 7', '--psm 8', '--psm 6', '--psm 13', '--psm 7 --oem 1', '--psm 7 --oem 3', f'--psm 7 -c tessedit_char_whitelist={whitelist}', f'--psm 8 -c tessedit_char_whitelist={whitelist}', f'--psm 6 -c tessedit_char_whitelist={whitelist}', f'--psm 13 -c tessedit_char_whitelist={whitelist}', f'--psm 7 --oem 1 -c tessedit_char_whitelist={whitelist}', f'--psm 7 --oem 3 -c tessedit_char_whitelist={whitelist}', f'--psm 8 --oem 1 -c tessedit_char_whitelist={whitelist}', f'--psm 8 --oem 3 -c tessedit_char_whitelist={whitelist}', f'--psm 6 --oem 1 -c tessedit_char_whitelist={whitelist}', f'--psm 6 --oem 3 -c tessedit_char_whitelist={whitelist}', f'--psm 13 --oem 1 -c tessedit_char_whitelist={whitelist}', f'--psm 13 --oem 3 -c tessedit_char_whitelist={whitelist}', ] with open(CSV_FILE, newline='') as f: reader = csv.reader(f) data = list(reader)[:10] # Nur die ersten 10 Bilder verwenden results = [] for pname, pfunc in tqdm(preprocessings.items(), desc="Preprocessing variants"): for config in tess_configs: fuzzy_score_sum = 0 total = 0 for fname, label in data: img_path = os.path.join(CAPTCHA_DIR, fname) if not os.path.exists(img_path): continue img = Image.open(img_path) img = pfunc(img) pred = pytesseract.image_to_string(img, config=config).strip().lower().replace(" ", "") # Fuzzy score: 1.0 = perfect, 0.0 = completely wrong if max(len(label), len(pred)) > 0: fuzzy_score = 1 - Levenshtein.distance(label, pred) / max(len(label), len(pred)) else: fuzzy_score = 0 fuzzy_score_sum += fuzzy_score total += 1 avg_fuzzy_score = fuzzy_score_sum / total if total else 0 results.append(( pname, config, avg_fuzzy_score)) print(f"Preprocessing: {pname}, Config: {config}, Avg fuzzy score: {avg_fuzzy_score:.3f}") # Sort results by accuracy (descending) results_sorted = sorted(results, key=lambda x: x[2], reverse=True) print("\nTop 5 combinations:") for i, (pname, config, acc) in enumerate(results_sorted[:5], 1): print(f"{i}. Preprocessing: {pname}, Config: {config}, Accuracy: {acc:.3f}") # Output all results to a CSV file, ordered by accuracy DESC output_csv = "recognize3_results.csv" with open(output_csv, "w", newline='') as f: writer = csv.writer(f) writer.writerow(["preprocessing", "tesseract_config", "accuracy"]) for pname, config, acc in results_sorted: writer.writerow([pname, config, acc]) print(f"\nFull results written to {output_csv}")