Files
check_pa/ocr/recognize.py
2026-02-03 00:59:15 +01:00

34 lines
862 B
Python

from PIL import Image
import pytesseract
import sys
"""OCR für Captcha-Bilder mit Tesseract only"""
# TEST config
custom_config = r'--psm 7 -c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyz123456890"'
def recognize_captcha(img_path):
im = Image.open(img_path).convert("L")
# 1. threshold the image
threshold = 150
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = im.point(table, '1')
#out.show()
# 2. recognize with tesseract
num = pytesseract.image_to_string(out, config=custom_config)
return num
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python recognize.py <image_filename>")
res = recognize_captcha(sys.argv[1])
strs = res.split("\n")
if len(strs) >=1:
print(strs[0])