add ocr code

2026-02-03 00:52:49 +01:00
parent 33cd5346bf
commit 7367226210
6 changed files with 3151 additions and 0 deletions
--- a/ocr/recognize2.py
+++ b/ocr/recognize2.py
@@ -0,0 +1,50 @@
+import cv2
+import pytesseract
+
+"""OCR für Captcha-Bilder mit OpenCV und Tesseract"""
+
+#img_path = "captcha.png"
+#img_path = "samples/sample_1-Dateien/1769810847305.jpg"
+#img_path = "samples/sample_2-Dateien/1769811067589.jpg"
+#img_path = "samples/sample_3_files/1769812197128.jpg"
+img_path = "samples/sample_4-Dateien/1769818949905.jpg"
+
+# Bild laden
+img = cv2.imread(img_path)
+
+# Graustufen
+gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+# leichtes Blur, um Grid-Rauschen zu reduzieren
+gray = cv2.GaussianBlur(gray, (3, 3), 0)
+
+# Threshold (Otsu)
+_, thresh = cv2.threshold(
+    gray, 0, 255,
+    cv2.THRESH_BINARY + cv2.THRESH_OTSU
+)
+
+###
+# Invertieren (falls nötig)
+#thresh = cv2.bitwise_not(thresh)
+
+# Morphologie, um Zeichen zu schließen
+#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+#thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
+
+###
+
+# save the processed image for debugging
+cv2.imwrite("processed_captcha.png", thresh)
+
+# Tesseract-Config
+custom_config = r"""
+--oem 3
+--psm 7
+-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789
+"""
+
+# Text erkennen
+text = pytesseract.image_to_string(thresh, config=custom_config)
+
+print(text.strip())