add ocr code
This commit is contained in:
50
ocr/recognize2.py
Normal file
50
ocr/recognize2.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import cv2
|
||||
import pytesseract
|
||||
|
||||
"""OCR für Captcha-Bilder mit OpenCV und Tesseract"""
|
||||
|
||||
#img_path = "captcha.png"
|
||||
#img_path = "samples/sample_1-Dateien/1769810847305.jpg"
|
||||
#img_path = "samples/sample_2-Dateien/1769811067589.jpg"
|
||||
#img_path = "samples/sample_3_files/1769812197128.jpg"
|
||||
img_path = "samples/sample_4-Dateien/1769818949905.jpg"
|
||||
|
||||
# Bild laden
|
||||
img = cv2.imread(img_path)
|
||||
|
||||
# Graustufen
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# leichtes Blur, um Grid-Rauschen zu reduzieren
|
||||
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||
|
||||
# Threshold (Otsu)
|
||||
_, thresh = cv2.threshold(
|
||||
gray, 0, 255,
|
||||
cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
||||
)
|
||||
|
||||
###
|
||||
# Invertieren (falls nötig)
|
||||
#thresh = cv2.bitwise_not(thresh)
|
||||
|
||||
# Morphologie, um Zeichen zu schließen
|
||||
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
#thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
###
|
||||
|
||||
# save the processed image for debugging
|
||||
cv2.imwrite("processed_captcha.png", thresh)
|
||||
|
||||
# Tesseract-Config
|
||||
custom_config = r"""
|
||||
--oem 3
|
||||
--psm 7
|
||||
-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz0123456789
|
||||
"""
|
||||
|
||||
# Text erkennen
|
||||
text = pytesseract.image_to_string(thresh, config=custom_config)
|
||||
|
||||
print(text.strip())
|
||||
Reference in New Issue
Block a user