import cv2
import pytesseract

"""OCR für Captcha-Bilder mit OpenCV und Tesseract"""

#img_path = "captcha.png"
#img_path = "samples/sample_1-Dateien/1769810847305.jpg"
#img_path = "samples/sample_2-Dateien/1769811067589.jpg"
#img_path = "samples/sample_3_files/1769812197128.jpg"
img_path = "samples/sample_4-Dateien/1769818949905.jpg"

# Bild laden
img = cv2.imread(img_path)

# Graustufen
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# leichtes Blur, um Grid-Rauschen zu reduzieren
gray = cv2.GaussianBlur(gray, (3, 3), 0)

# Threshold (Otsu)
_, thresh = cv2.threshold(
    gray, 0, 255,
    cv2.THRESH_BINARY + cv2.THRESH_OTSU
)

###
# Invertieren (falls nötig)
#thresh = cv2.bitwise_not(thresh)

# Morphologie, um Zeichen zu schließen
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

###

# save the processed image for debugging
cv2.imwrite("processed_captcha.png", thresh)

# Tesseract-Config
custom_config = r"""
--oem 3
--psm 7
-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789
"""

# Text erkennen
text = pytesseract.image_to_string(thresh, config=custom_config)

print(text.strip())