import cv2 import pytesseract """OCR für Captcha-Bilder mit OpenCV und Tesseract""" #img_path = "captcha.png" #img_path = "samples/sample_1-Dateien/1769810847305.jpg" #img_path = "samples/sample_2-Dateien/1769811067589.jpg" #img_path = "samples/sample_3_files/1769812197128.jpg" img_path = "samples/sample_4-Dateien/1769818949905.jpg" # Bild laden img = cv2.imread(img_path) # Graustufen gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # leichtes Blur, um Grid-Rauschen zu reduzieren gray = cv2.GaussianBlur(gray, (3, 3), 0) # Threshold (Otsu) _, thresh = cv2.threshold( gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU ) ### # Invertieren (falls nötig) #thresh = cv2.bitwise_not(thresh) # Morphologie, um Zeichen zu schließen #kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) #thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) ### # save the processed image for debugging cv2.imwrite("processed_captcha.png", thresh) # Tesseract-Config custom_config = r""" --oem 3 --psm 7 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789 """ # Text erkennen text = pytesseract.image_to_string(thresh, config=custom_config) print(text.strip())