add ocr code

This commit is contained in:
2026-02-03 00:52:49 +01:00
parent 33cd5346bf
commit 7367226210
6 changed files with 3151 additions and 0 deletions

50
ocr/recognize2.py Normal file
View File

@@ -0,0 +1,50 @@
import cv2
import pytesseract
"""OCR für Captcha-Bilder mit OpenCV und Tesseract"""
#img_path = "captcha.png"
#img_path = "samples/sample_1-Dateien/1769810847305.jpg"
#img_path = "samples/sample_2-Dateien/1769811067589.jpg"
#img_path = "samples/sample_3_files/1769812197128.jpg"
img_path = "samples/sample_4-Dateien/1769818949905.jpg"
# Bild laden
img = cv2.imread(img_path)
# Graustufen
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# leichtes Blur, um Grid-Rauschen zu reduzieren
gray = cv2.GaussianBlur(gray, (3, 3), 0)
# Threshold (Otsu)
_, thresh = cv2.threshold(
gray, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
###
# Invertieren (falls nötig)
#thresh = cv2.bitwise_not(thresh)
# Morphologie, um Zeichen zu schließen
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
###
# save the processed image for debugging
cv2.imwrite("processed_captcha.png", thresh)
# Tesseract-Config
custom_config = r"""
--oem 3
--psm 7
-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789
"""
# Text erkennen
text = pytesseract.image_to_string(thresh, config=custom_config)
print(text.strip())