Zihao Mu
commited on
Commit
·
b81d9fd
1
Parent(s):
d30c3db
add scale factor to DB demo (#96)
Browse files
models/text_detection_db/demo.py
CHANGED
|
@@ -73,29 +73,40 @@ if __name__ == '__main__':
|
|
| 73 |
|
| 74 |
# If input is an image
|
| 75 |
if args.input is not None:
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# Inference
|
| 80 |
results = model.infer(image)
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Print results
|
| 83 |
print('{} texts detected.'.format(len(results[0])))
|
| 84 |
for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
|
| 85 |
print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
|
| 86 |
|
| 87 |
# Draw results on the input image
|
| 88 |
-
|
| 89 |
|
| 90 |
# Save results if save is true
|
| 91 |
if args.save:
|
| 92 |
print('Resutls saved to result.jpg\n')
|
| 93 |
-
cv.imwrite('result.jpg',
|
| 94 |
|
| 95 |
# Visualize results in a new window
|
| 96 |
if args.vis:
|
| 97 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
| 98 |
-
cv.imshow(args.input,
|
| 99 |
cv.waitKey(0)
|
| 100 |
else: # Omit input to call default camera
|
| 101 |
deviceId = 0
|
|
@@ -103,22 +114,33 @@ if __name__ == '__main__':
|
|
| 103 |
|
| 104 |
tm = cv.TickMeter()
|
| 105 |
while cv.waitKey(1) < 0:
|
| 106 |
-
hasFrame,
|
| 107 |
if not hasFrame:
|
| 108 |
print('No frames grabbed!')
|
| 109 |
break
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Inference
|
| 113 |
tm.start()
|
| 114 |
results = model.infer(frame) # results is a tuple
|
| 115 |
tm.stop()
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# Draw results on the input image
|
| 118 |
-
|
| 119 |
|
| 120 |
# Visualize results in a new Window
|
| 121 |
-
cv.imshow('{} Demo'.format(model.name),
|
| 122 |
|
| 123 |
tm.reset()
|
| 124 |
|
|
|
|
| 73 |
|
| 74 |
# If input is an image
|
| 75 |
if args.input is not None:
|
| 76 |
+
original_image = cv.imread(args.input)
|
| 77 |
+
original_w = original_image.shape[1]
|
| 78 |
+
original_h = original_image.shape[0]
|
| 79 |
+
scaleHeight = original_h / args.height
|
| 80 |
+
scaleWidth = original_w / args.width
|
| 81 |
+
image = cv.resize(original_image, [args.width, args.height])
|
| 82 |
|
| 83 |
# Inference
|
| 84 |
results = model.infer(image)
|
| 85 |
|
| 86 |
+
# Scale the results bounding box
|
| 87 |
+
for i in range(len(results[0])):
|
| 88 |
+
for j in range(4):
|
| 89 |
+
box = results[0][i][j]
|
| 90 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
| 91 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
| 92 |
+
|
| 93 |
# Print results
|
| 94 |
print('{} texts detected.'.format(len(results[0])))
|
| 95 |
for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
|
| 96 |
print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
|
| 97 |
|
| 98 |
# Draw results on the input image
|
| 99 |
+
original_image = visualize(original_image, results)
|
| 100 |
|
| 101 |
# Save results if save is true
|
| 102 |
if args.save:
|
| 103 |
print('Resutls saved to result.jpg\n')
|
| 104 |
+
cv.imwrite('result.jpg', original_image)
|
| 105 |
|
| 106 |
# Visualize results in a new window
|
| 107 |
if args.vis:
|
| 108 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
| 109 |
+
cv.imshow(args.input, original_image)
|
| 110 |
cv.waitKey(0)
|
| 111 |
else: # Omit input to call default camera
|
| 112 |
deviceId = 0
|
|
|
|
| 114 |
|
| 115 |
tm = cv.TickMeter()
|
| 116 |
while cv.waitKey(1) < 0:
|
| 117 |
+
hasFrame, original_image = cap.read()
|
| 118 |
if not hasFrame:
|
| 119 |
print('No frames grabbed!')
|
| 120 |
break
|
| 121 |
|
| 122 |
+
original_w = original_image.shape[1]
|
| 123 |
+
original_h = original_image.shape[0]
|
| 124 |
+
scaleHeight = original_h / args.height
|
| 125 |
+
scaleWidth = original_w / args.width
|
| 126 |
+
frame = cv.resize(original_image, [args.width, args.height])
|
| 127 |
# Inference
|
| 128 |
tm.start()
|
| 129 |
results = model.infer(frame) # results is a tuple
|
| 130 |
tm.stop()
|
| 131 |
|
| 132 |
+
# Scale the results bounding box
|
| 133 |
+
for i in range(len(results[0])):
|
| 134 |
+
for j in range(4):
|
| 135 |
+
box = results[0][i][j]
|
| 136 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
| 137 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
| 138 |
+
|
| 139 |
# Draw results on the input image
|
| 140 |
+
original_image = visualize(original_image, results, fps=tm.getFPS())
|
| 141 |
|
| 142 |
# Visualize results in a new Window
|
| 143 |
+
cv.imshow('{} Demo'.format(model.name), original_image)
|
| 144 |
|
| 145 |
tm.reset()
|
| 146 |
|
models/text_recognition_crnn/demo.py
CHANGED
|
@@ -75,8 +75,12 @@ if __name__ == '__main__':
|
|
| 75 |
|
| 76 |
# If input is an image
|
| 77 |
if args.input is not None:
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Inference
|
| 82 |
results = detector.infer(image)
|
|
@@ -86,18 +90,25 @@ if __name__ == '__main__':
|
|
| 86 |
recognizer.infer(image, box.reshape(8))
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# Draw results on the input image
|
| 90 |
-
|
| 91 |
|
| 92 |
# Save results if save is true
|
| 93 |
if args.save:
|
| 94 |
print('Resutls saved to result.jpg\n')
|
| 95 |
-
cv.imwrite('result.jpg',
|
| 96 |
|
| 97 |
# Visualize results in a new window
|
| 98 |
if args.vis:
|
| 99 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
| 100 |
-
cv.imshow(args.input,
|
| 101 |
cv.waitKey(0)
|
| 102 |
else: # Omit input to call default camera
|
| 103 |
deviceId = 0
|
|
@@ -105,12 +116,17 @@ if __name__ == '__main__':
|
|
| 105 |
|
| 106 |
tm = cv.TickMeter()
|
| 107 |
while cv.waitKey(1) < 0:
|
| 108 |
-
hasFrame,
|
| 109 |
if not hasFrame:
|
| 110 |
print('No frames grabbed!')
|
| 111 |
break
|
| 112 |
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
# Inference of text detector
|
| 115 |
tm.start()
|
| 116 |
results = detector.infer(frame)
|
|
@@ -133,10 +149,17 @@ if __name__ == '__main__':
|
|
| 133 |
cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
|
| 134 |
tm.reset()
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# Draw results on the input image
|
| 137 |
-
|
| 138 |
print(texts)
|
| 139 |
|
| 140 |
# Visualize results in a new Window
|
| 141 |
-
cv.imshow('{} Demo'.format(recognizer.name),
|
| 142 |
|
|
|
|
| 75 |
|
| 76 |
# If input is an image
|
| 77 |
if args.input is not None:
|
| 78 |
+
original_image = cv.imread(args.input)
|
| 79 |
+
original_w = original_image.shape[1]
|
| 80 |
+
original_h = original_image.shape[0]
|
| 81 |
+
scaleHeight = original_h / args.height
|
| 82 |
+
scaleWidth = original_w / args.width
|
| 83 |
+
image = cv.resize(original_image, [args.width, args.height])
|
| 84 |
|
| 85 |
# Inference
|
| 86 |
results = detector.infer(image)
|
|
|
|
| 90 |
recognizer.infer(image, box.reshape(8))
|
| 91 |
)
|
| 92 |
|
| 93 |
+
# Scale the results bounding box
|
| 94 |
+
for i in range(len(results[0])):
|
| 95 |
+
for j in range(4):
|
| 96 |
+
box = results[0][i][j]
|
| 97 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
| 98 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
| 99 |
+
|
| 100 |
# Draw results on the input image
|
| 101 |
+
original_image = visualize(original_image, results, texts)
|
| 102 |
|
| 103 |
# Save results if save is true
|
| 104 |
if args.save:
|
| 105 |
print('Resutls saved to result.jpg\n')
|
| 106 |
+
cv.imwrite('result.jpg', original_image)
|
| 107 |
|
| 108 |
# Visualize results in a new window
|
| 109 |
if args.vis:
|
| 110 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
| 111 |
+
cv.imshow(args.input, original_image)
|
| 112 |
cv.waitKey(0)
|
| 113 |
else: # Omit input to call default camera
|
| 114 |
deviceId = 0
|
|
|
|
| 116 |
|
| 117 |
tm = cv.TickMeter()
|
| 118 |
while cv.waitKey(1) < 0:
|
| 119 |
+
hasFrame, original_image = cap.read()
|
| 120 |
if not hasFrame:
|
| 121 |
print('No frames grabbed!')
|
| 122 |
break
|
| 123 |
|
| 124 |
+
original_w = original_image.shape[1]
|
| 125 |
+
original_h = original_image.shape[0]
|
| 126 |
+
scaleHeight = original_h / args.height
|
| 127 |
+
scaleWidth = original_w / args.width
|
| 128 |
+
|
| 129 |
+
frame = cv.resize(original_image, [args.width, args.height])
|
| 130 |
# Inference of text detector
|
| 131 |
tm.start()
|
| 132 |
results = detector.infer(frame)
|
|
|
|
| 149 |
cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
|
| 150 |
tm.reset()
|
| 151 |
|
| 152 |
+
# Scale the results bounding box
|
| 153 |
+
for i in range(len(results[0])):
|
| 154 |
+
for j in range(4):
|
| 155 |
+
box = results[0][i][j]
|
| 156 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
| 157 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
| 158 |
+
|
| 159 |
# Draw results on the input image
|
| 160 |
+
original_image = visualize(original_image, results, texts)
|
| 161 |
print(texts)
|
| 162 |
|
| 163 |
# Visualize results in a new Window
|
| 164 |
+
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
| 165 |
|