-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaction_classifier.py
More file actions
144 lines (110 loc) · 4.96 KB
/
action_classifier.py
File metadata and controls
144 lines (110 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
'''
##### DO Not Touch your face ver.0.2
### Medical Imaging & Intelligent Reality Lab (MI2RL) @ Asan Medical Center(AMC)
# MI2RL website : https://www.mi2rl.co/
# AMC : http://www.amc.seoul.kr/asan/main.do
### Developer
# Sungman Cho : dev.sungman@gmail.com
# Minjee Kim : minjeekim00@gmail.com
# Taehyeong Kim : kimtaehyeong62@gmail.com
# Junmyung Choi : jm5901@gmail.com
# Namkug Kim : namkugkim@gmail.com
### Data contributor
# MI2RL researchers
# Dongwoo Seo, Emergency Medicine@AMC
# Namkug Kim, Convergence Medicine@AMC
### references
# I3D Network (https://github.com/hassony2/kinetics_i3d_pytorch)
#####
'''
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from model.i3dpt import I3D, Unit3Dpy
from torchvision import transforms as T
import time
class ActionClassifier:
def __init__(self, model_path, temporal_batch_size=24, img_size=224):
### Multi-class
# drinking, touching_phone, touching_keyboard
self.classes = ['drinking', 'picking_up_phone', 'removing_mask',
'resting_chin_on_hand', 'rubbing_eyes', 'touching_glasses',
'touching_hairs', 'touching_keyboard', 'touching_nose',
'touching_phone', 'wearing_mask']
'''
self.classes = ['drinking', 'others', 'picking_up_phone', 'removing_mask',
'resting_chin_on_hand', 'rubbing_eyes', 'touching_glasses',
'touching_hairs', 'touching_nose',
'touching_phone', 'wearing_mask']
'''
# define action
self.touching_actions = ['picking_up_phone', 'resting_chin_on_hand', 'rubbing_eyes', 'touching_hairs',
'touching_nose']
'''
### Binary-class
self.classes = ['non-touching', 'touching']
self.touching_actions = ['touching']
'''
# b, c, w, h
self.model = I3D(num_classes=400, modality='rgb')
self.model.conv3d_0c_1x1 = self._modify_lastlayer(self.model.conv3d_0c_1x1, out_ch=len(self.classes))
self.model.softmax = torch.nn.Softmax()
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.model.to(self.device)
self.model.eval()
if self.device is 'cuda':
state_dict = self._change_key(torch.load(model_path, map_location='cuda:0'))
else:
state_dict = self._change_key(torch.load(model_path, map_location=torch.device('cpu')))
self.model.load_state_dict(state_dict)
self.temporal_batch_size = temporal_batch_size
self.temporal_batch = torch.zeros((1, 3, self.temporal_batch_size, img_size, img_size))
self.transforms = T.Compose([
T.Resize((img_size,img_size)),
T.ToTensor(),
T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
self.pred = None
self.need_sleep = False
self.cnt = 0
def _set_param_requires_grad(self, feature_extracting, training_num):
if feature_extracting:
for i, param in enumerate(self.model.parameters()):
if training_num >= i:
param.requires_grad = False
def _modify_lastlayer(self, last_layer, out_ch):
conv2 = Unit3Dpy(in_channels=400, out_channels=len(self.classes),
kernel_size=(1, 1, 1), activation=None, use_bias=True, use_bn=False)
branch_0 = torch.nn.Sequential(last_layer, conv2)
return branch_0
def _change_key(self, ord_dict):
state_dict = ord_dict.copy()
for i, key in enumerate(ord_dict.keys()):
key, value = state_dict.popitem(False)
old = key
state_dict[key.replace('module.', '') if key == old else key] = value
return state_dict
def run(self, img):
# conver image to tensor
pil_img = Image.fromarray(img)
img_tensor = self.transforms(pil_img)
self.temporal_batch[:, :, self.cnt, :, :] = img_tensor
# every temporal batch frames, input image to network
if (self.cnt == self.temporal_batch_size-1) & (self.need_sleep is False):
start_time = time.time()
self.temporal_batch = self.temporal_batch.to(self.device)
out_var, out_logit = self.model(self.temporal_batch)
out = torch.nn.functional.softmax(out_logit, 1).data.cpu()
top_val, top_idx = torch.sort(out, 1, descending=True)
end_time = time.time()
self.pred = self.classes[int(top_idx[0,0].data.numpy())]
self.score = top_val[0,0].data.numpy()
print(self.pred, self.score)
if (self.pred in self.touching_actions) & (self.score > 0.9) :
self.pred = '얼굴을 만지지 마세요 !'
else:
self.pred = ''
self.cnt = 0
self.cnt += 1
return self.pred