add ocr code
This commit is contained in:
34
ocr/recognize.py
Normal file
34
ocr/recognize.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from PIL import Image
|
||||
import pytesseract
|
||||
import sys
|
||||
|
||||
"""OCR für Captcha-Bilder mit Tesseract only"""
|
||||
|
||||
# TEST config
|
||||
custom_config = r'--psm 7 -c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyz123456890"'
|
||||
|
||||
def recognize_captcha(img_path):
|
||||
im = Image.open(img_path).convert("L")
|
||||
# 1. threshold the image
|
||||
threshold = 150
|
||||
table = []
|
||||
for i in range(256):
|
||||
if i < threshold:
|
||||
table.append(0)
|
||||
else:
|
||||
table.append(1)
|
||||
|
||||
out = im.point(table, '1')
|
||||
#out.show()
|
||||
# 2. recognize with tesseract
|
||||
num = pytesseract.image_to_string(out, config=custom_config)
|
||||
return num
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python recognize.py <image_filename>")
|
||||
res = recognize_captcha(sys.argv[1])
|
||||
strs = res.split("\n")
|
||||
if len(strs) >=1:
|
||||
print(strs[0])
|
||||
Reference in New Issue
Block a user