Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
yolocows
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Jappe Franke
yolocows
Commits
319f9a47
Commit
319f9a47
authored
Apr 15, 2019
by
Jappe Franke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added a few clear comments
parent
bd63e39b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
213 additions
and
209 deletions
+213
-209
.gitignore
.gitignore
+4
-0
video.py
video.py
+209
-209
No files found.
.gitignore
0 → 100644
View file @
319f9a47
/.idea
/.project
/.pydevproject
/__pycache__
video.py
View file @
319f9a47
from
__future__
import
division
import
time
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
numpy
as
np
import
cv2
from
util
import
*
from
darknet
import
Darknet
from
preprocess
import
prep_image
,
inp_to_image
,
letterbox_image
import
pandas
as
pd
import
pickle
as
pkl
import
argparse
def
get_test_input
(
input_dim
,
CUDA
):
img
=
cv2
.
imread
(
"dog-cycle-car.png"
)
img
=
cv2
.
resize
(
img
,
(
input_dim
,
input_dim
))
img_
=
img
[:,:,::
-
1
].
transpose
((
2
,
0
,
1
))
img_
=
img_
[
np
.
newaxis
,:,:,:]
/
255.0
img_
=
torch
.
from_numpy
(
img_
).
float
()
img_
=
Variable
(
img_
)
if
CUDA
:
img_
=
img_
.
cuda
()
return
img_
def
prep_image
(
img
,
inp_dim
):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""
orig_im
=
img
dim
=
orig_im
.
shape
[
1
],
orig_im
.
shape
[
0
]
img
=
(
letterbox_image
(
orig_im
,
(
inp_dim
,
inp_dim
)))
img_
=
img
[:,:,::
-
1
].
transpose
((
2
,
0
,
1
)).
copy
()
img_
=
torch
.
from_numpy
(
img_
).
float
().
div
(
255.0
).
unsqueeze
(
0
)
return
img_
,
orig_im
,
dim
def
write
(
x
,
img
,
olen
):
c1
=
tuple
(
x
[
1
:
3
].
int
())
c2
=
tuple
(
x
[
3
:
5
].
int
())
#classname readout x[-1]
cls
=
int
(
x
[
-
1
])
#confidence readout x[-2]
label
=
"{0}:{1:.2f}"
.
format
(
classes
[
cls
],
x
[
-
2
])
strolen
=
"{0}"
.
format
(
olen
)
#just one color for every frame; no rainbowboxing
color
=
(
255
,
0
,
0
)
#color = random.choice(colors)
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
1
)
#counter background
#cv2.rectangle(img, (0,10), (60,60),color, -1)
t_size
=
cv2
.
getTextSize
(
label
,
cv2
.
FONT_HERSHEY_PLAIN
,
1
,
1
)[
0
]
c2
=
c1
[
0
]
+
t_size
[
0
]
+
3
,
c1
[
1
]
-
t_size
[
1
]
-
4
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
-
1
)
#counter
cv2
.
putText
(
img
,
strolen
,
(
2
,
50
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.4
,
[
0
,
0
,
255
],
2
,
cv2
.
LINE_AA
)
cv2
.
putText
(
img
,
label
,
(
c1
[
0
],
c1
[
1
]
-
2.5
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.5
,
[
225
,
255
,
255
],
1
,
cv2
.
LINE_AA
)
return
img
def
arg_parse
():
"""
Parse arguments to the detect module
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'YOLO v3 Video Detection Module'
)
parser
.
add_argument
(
"--video"
,
dest
=
'video'
,
help
=
"Video to run detection upon"
,
default
=
"video.avi"
,
type
=
str
)
parser
.
add_argument
(
"--dataset"
,
dest
=
"dataset"
,
help
=
"Dataset on which the network has been trained"
,
default
=
"pascal"
)
parser
.
add_argument
(
"--confidence"
,
dest
=
"confidence"
,
help
=
"Object Confidence to filter predictions"
,
default
=
0.5
)
parser
.
add_argument
(
"--nms_thresh"
,
dest
=
"nms_thresh"
,
help
=
"NMS Threshhold"
,
default
=
0.4
)
parser
.
add_argument
(
"--cfg"
,
dest
=
'cfgfile'
,
help
=
"Config file"
,
default
=
"cfg/yolov3.cfg"
,
type
=
str
)
parser
.
add_argument
(
"--weights"
,
dest
=
'weightsfile'
,
help
=
"weightsfile"
,
default
=
"yolov3.weights"
,
type
=
str
)
parser
.
add_argument
(
"--reso"
,
dest
=
'reso'
,
help
=
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed"
,
default
=
"416"
,
type
=
str
)
return
parser
.
parse_args
()
if
__name__
==
'__main__'
:
args
=
arg_parse
()
confidence
=
float
(
args
.
confidence
)
nms_thesh
=
float
(
args
.
nms_thresh
)
start
=
0
#CUDA = torch.cuda.is_available()
num_classes
=
80
CUDA
=
torch
.
cuda
.
is_available
()
bbox_attrs
=
5
+
num_classes
print
(
"Loading network......"
)
model
=
Darknet
(
args
.
cfgfile
)
model
.
load_weights
(
args
.
weightsfile
)
print
(
"Network successfully loaded"
)
model
.
net_info
[
"height"
]
=
args
.
reso
inp_dim
=
int
(
model
.
net_info
[
"height"
])
assert
inp_dim
%
32
==
0
assert
inp_dim
>
32
if
CUDA
:
model
.
cuda
()
model
(
get_test_input
(
inp_dim
,
CUDA
),
CUDA
)
model
.
eval
()
videofile
=
args
.
video
cap
=
cv2
.
VideoCapture
(
videofile
)
assert
cap
.
isOpened
(),
'Cannot capture source'
frames
=
0
start
=
time
.
time
()
while
cap
.
isOpened
():
ret
,
frame
=
cap
.
read
()
if
ret
:
img
,
orig_im
,
dim
=
prep_image
(
frame
,
inp_dim
)
im_dim
=
torch
.
FloatTensor
(
dim
).
repeat
(
1
,
2
)
if
CUDA
:
im_dim
=
im_dim
.
cuda
()
img
=
img
.
cuda
()
with
torch
.
no_grad
():
output
=
model
(
Variable
(
img
),
CUDA
)
output
=
write_results
(
output
,
confidence
,
num_classes
,
nms
=
True
,
nms_conf
=
nms_thesh
)
if
type
(
output
)
==
int
:
frames
+=
1
print
(
"FPS:{:5.2f}"
.
format
(
frames
/
(
time
.
time
()
-
start
)))
cv2
.
imshow
(
"frame"
,
orig_im
)
key
=
cv2
.
waitKey
(
1
)
if
key
&
0xFF
==
ord
(
'q'
):
break
continue
im_dim
=
im_dim
.
repeat
(
output
.
size
(
0
),
1
)
scaling_factor
=
torch
.
min
(
inp_dim
/
im_dim
,
1
)[
0
].
view
(
-
1
,
1
)
output
[:,[
1
,
3
]]
-=
(
inp_dim
-
scaling_factor
*
im_dim
[:,
0
].
view
(
-
1
,
1
))
/
2
output
[:,[
2
,
4
]]
-=
(
inp_dim
-
scaling_factor
*
im_dim
[:,
1
].
view
(
-
1
,
1
))
/
2
output
[:,
1
:
5
]
/=
scaling_factor
for
i
in
range
(
output
.
shape
[
0
]):
output
[
i
,
[
1
,
3
]]
=
torch
.
clamp
(
output
[
i
,
[
1
,
3
]],
0.0
,
im_dim
[
i
,
0
])
output
[
i
,
[
2
,
4
]]
=
torch
.
clamp
(
output
[
i
,
[
2
,
4
]],
0.0
,
im_dim
[
i
,
1
])
classes
=
load_classes
(
'data/coco.names'
)
colors
=
pkl
.
load
(
open
(
"pallete"
,
"rb"
))
list
(
map
(
lambda
x
:
write
(
x
,
orig_im
,
len
(
output
)),
output
))
cv2
.
imshow
(
"frame"
,
orig_im
)
key
=
cv2
.
waitKey
(
1
)
if
key
&
0xFF
==
ord
(
'q'
):
break
frames
+=
1
print
(
"FPS:{:5.2f}"
.
format
(
frames
/
(
time
.
time
()
-
start
)))
else
:
break
from
__future__
import
division
import
time
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
numpy
as
np
import
cv2
from
util
import
*
from
darknet
import
Darknet
from
preprocess
import
prep_image
,
inp_to_image
,
letterbox_image
import
pandas
as
pd
import
pickle
as
pkl
import
argparse
def
get_test_input
(
input_dim
,
CUDA
):
img
=
cv2
.
imread
(
"dog-cycle-car.png"
)
img
=
cv2
.
resize
(
img
,
(
input_dim
,
input_dim
))
img_
=
img
[:,:,::
-
1
].
transpose
((
2
,
0
,
1
))
img_
=
img_
[
np
.
newaxis
,:,:,:]
/
255.0
img_
=
torch
.
from_numpy
(
img_
).
float
()
img_
=
Variable
(
img_
)
if
CUDA
:
img_
=
img_
.
cuda
()
return
img_
def
prep_image
(
img
,
inp_dim
):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""
orig_im
=
img
dim
=
orig_im
.
shape
[
1
],
orig_im
.
shape
[
0
]
img
=
(
letterbox_image
(
orig_im
,
(
inp_dim
,
inp_dim
)))
img_
=
img
[:,:,::
-
1
].
transpose
((
2
,
0
,
1
)).
copy
()
img_
=
torch
.
from_numpy
(
img_
).
float
().
div
(
255.0
).
unsqueeze
(
0
)
return
img_
,
orig_im
,
dim
def
write
(
x
,
img
,
olen
):
c1
=
tuple
(
x
[
1
:
3
].
int
())
c2
=
tuple
(
x
[
3
:
5
].
int
())
#classname readout x[-1]
cls
=
int
(
x
[
-
1
])
#confidence readout x[-2]
and add it to the label
label
=
"{0}:{1:.2f}"
.
format
(
classes
[
cls
],
x
[
-
2
])
strolen
=
"{0}"
.
format
(
olen
)
#just one color for every frame; no rainbowboxing
:)
color
=
(
255
,
0
,
0
)
#color = random.choice(colors)
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
1
)
#counter background
#cv2.rectangle(img, (0,10), (60,60),color, -1)
t_size
=
cv2
.
getTextSize
(
label
,
cv2
.
FONT_HERSHEY_PLAIN
,
1
,
1
)[
0
]
c2
=
c1
[
0
]
+
t_size
[
0
]
+
3
,
c1
[
1
]
-
t_size
[
1
]
-
4
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
-
1
)
#counter
cv2
.
putText
(
img
,
strolen
,
(
2
,
50
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.4
,
[
0
,
0
,
255
],
2
,
cv2
.
LINE_AA
)
cv2
.
putText
(
img
,
label
,
(
c1
[
0
],
c1
[
1
]
-
2.5
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.5
,
[
225
,
255
,
255
],
1
,
cv2
.
LINE_AA
)
return
img
def
arg_parse
():
"""
Parse arguments to the detect module
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'YOLO v3 Video Detection Module'
)
parser
.
add_argument
(
"--video"
,
dest
=
'video'
,
help
=
"Video to run detection upon"
,
default
=
"video.avi"
,
type
=
str
)
parser
.
add_argument
(
"--dataset"
,
dest
=
"dataset"
,
help
=
"Dataset on which the network has been trained"
,
default
=
"pascal"
)
parser
.
add_argument
(
"--confidence"
,
dest
=
"confidence"
,
help
=
"Object Confidence to filter predictions"
,
default
=
0.5
)
parser
.
add_argument
(
"--nms_thresh"
,
dest
=
"nms_thresh"
,
help
=
"NMS Threshhold"
,
default
=
0.4
)
parser
.
add_argument
(
"--cfg"
,
dest
=
'cfgfile'
,
help
=
"Config file"
,
default
=
"cfg/yolov3.cfg"
,
type
=
str
)
parser
.
add_argument
(
"--weights"
,
dest
=
'weightsfile'
,
help
=
"weightsfile"
,
default
=
"yolov3.weights"
,
type
=
str
)
parser
.
add_argument
(
"--reso"
,
dest
=
'reso'
,
help
=
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed"
,
default
=
"416"
,
type
=
str
)
return
parser
.
parse_args
()
if
__name__
==
'__main__'
:
args
=
arg_parse
()
confidence
=
float
(
args
.
confidence
)
nms_thesh
=
float
(
args
.
nms_thresh
)
start
=
0
#CUDA = torch.cuda.is_available()
num_classes
=
80
CUDA
=
torch
.
cuda
.
is_available
()
bbox_attrs
=
5
+
num_classes
print
(
"Loading network......"
)
model
=
Darknet
(
args
.
cfgfile
)
model
.
load_weights
(
args
.
weightsfile
)
print
(
"Network successfully loaded"
)
model
.
net_info
[
"height"
]
=
args
.
reso
inp_dim
=
int
(
model
.
net_info
[
"height"
])
assert
inp_dim
%
32
==
0
assert
inp_dim
>
32
if
CUDA
:
model
.
cuda
()
model
(
get_test_input
(
inp_dim
,
CUDA
),
CUDA
)
model
.
eval
()
videofile
=
args
.
video
cap
=
cv2
.
VideoCapture
(
videofile
)
assert
cap
.
isOpened
(),
'Cannot capture source'
frames
=
0
start
=
time
.
time
()
while
cap
.
isOpened
():
ret
,
frame
=
cap
.
read
()
if
ret
:
img
,
orig_im
,
dim
=
prep_image
(
frame
,
inp_dim
)
im_dim
=
torch
.
FloatTensor
(
dim
).
repeat
(
1
,
2
)
if
CUDA
:
im_dim
=
im_dim
.
cuda
()
img
=
img
.
cuda
()
with
torch
.
no_grad
():
output
=
model
(
Variable
(
img
),
CUDA
)
output
=
write_results
(
output
,
confidence
,
num_classes
,
nms
=
True
,
nms_conf
=
nms_thesh
)
if
type
(
output
)
==
int
:
frames
+=
1
print
(
"FPS:{:5.2f}"
.
format
(
frames
/
(
time
.
time
()
-
start
)))
cv2
.
imshow
(
"frame"
,
orig_im
)
key
=
cv2
.
waitKey
(
1
)
if
key
&
0xFF
==
ord
(
'q'
):
break
continue
im_dim
=
im_dim
.
repeat
(
output
.
size
(
0
),
1
)
scaling_factor
=
torch
.
min
(
inp_dim
/
im_dim
,
1
)[
0
].
view
(
-
1
,
1
)
output
[:,[
1
,
3
]]
-=
(
inp_dim
-
scaling_factor
*
im_dim
[:,
0
].
view
(
-
1
,
1
))
/
2
output
[:,[
2
,
4
]]
-=
(
inp_dim
-
scaling_factor
*
im_dim
[:,
1
].
view
(
-
1
,
1
))
/
2
output
[:,
1
:
5
]
/=
scaling_factor
for
i
in
range
(
output
.
shape
[
0
]):
output
[
i
,
[
1
,
3
]]
=
torch
.
clamp
(
output
[
i
,
[
1
,
3
]],
0.0
,
im_dim
[
i
,
0
])
output
[
i
,
[
2
,
4
]]
=
torch
.
clamp
(
output
[
i
,
[
2
,
4
]],
0.0
,
im_dim
[
i
,
1
])
classes
=
load_classes
(
'data/coco.names'
)
colors
=
pkl
.
load
(
open
(
"pallete"
,
"rb"
))
list
(
map
(
lambda
x
:
write
(
x
,
orig_im
,
len
(
output
)),
output
))
cv2
.
imshow
(
"frame"
,
orig_im
)
key
=
cv2
.
waitKey
(
1
)
if
key
&
0xFF
==
ord
(
'q'
):
break
frames
+=
1
print
(
"FPS:{:5.2f}"
.
format
(
frames
/
(
time
.
time
()
-
start
)))
else
:
break
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment