Intégration de la reco d'image à l'interface borne

This commit is contained in:
Quentin Roussel
2023-03-22 14:39:56 +01:00
parent 15bc1c7714
commit d896767543
11 changed files with 131 additions and 156 deletions

View File

@@ -1,75 +0,0 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
trainSet = datasets.ImageFolder(r'C:\Users\kesha\Desktop\TelecomParis\PACT\DownloadedDataset\train',
transform = transforms.ToTensor())
valSet = datasets.ImageFolder(r'C:\Users\kesha\Desktop\TelecomParis\PACT\DownloadedDataset\val',
transform = transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainSet,
batch_size = 50,
shuffle = True)
valloader = torch.utils.data.DataLoader(valSet,
batch_size = 50,
shuffle = True)
class Net(nn.Module):
def __init__(self):
super().__init__()
#nn.Conv2d(channels_in, out_channels/number of filters, kernel size)
self.conv1 = nn.Conv2d(3, 16, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 3)
self.conv3 = nn.Conv2d(32, 64, 3)
self.fc1 = nn.Linear(64*14*14, 16)
self.fc2 = nn.Linear(16, 6)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
#size = 16*126*126 then 16*63*63
x = self.pool(F.relu(self.conv2(x)))
#size = 32*61*61 then 32*30*30
x = self.pool(F.relu(self.conv3(x)))
#size = 64*28*28 then 64*14*14
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
print(net)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(net.parameters(), lr=0.001)
device = torch.device('cuda')
for epoch in range(1, 7):
print('Starting epoch ' + str(epoch))
current_loss = 0
Epoch = []
Loss = []
for i, data in enumerate(trainloader, 0):
inputs, labels = data
#très important
optimizer.zero_grad()
output = net(inputs)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
current_loss += loss.item()
print('epoch: ', epoch, " loss: ", current_loss)
Loss.append(current_loss)
Epoch.append(epoch)
plt.plot(Epoch, Loss)
plt.title('Valeur de la fonction cost en fonction de l\'epoch')
plt.show()
#to save a model: torch.save(net.state_dict(), file_location)

View File

@@ -1,44 +1,78 @@
import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
# For webcam input:
cap = cv2.VideoCapture(0)
hands = mp_hands.Hands(
model_complexity=0,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
BUFFER_LENGTH = 30
TH_FRACTION = 3/4
resultBuffer = []
def frame():
def reconnaissancePouce(handLandmarks):
etatDuPouce=["neutre","thumbs_down","thumbs_up"]
i=0
j=0
for cpt in range (0,4):
V1=[handLandmarks[(4*cpt)+6][0]-handLandmarks[(4*cpt)+5][0],handLandmarks[(4*cpt)+6][1]-handLandmarks[(4*cpt)+5][1]]
V2=[handLandmarks[(4*cpt)+8][0]-handLandmarks[(4*cpt)+6][0],handLandmarks[(4*cpt)+8][1]-handLandmarks[(4*cpt)+6][1]]
j=np.dot(V1,V2)
if (j>0):
return etatDuPouce[0]
V1=[handLandmarks[4][0]-handLandmarks[1][0],handLandmarks[4][1]-handLandmarks[1][1]]
V2=[handLandmarks[2][0]-handLandmarks[1][0],handLandmarks[2][1]-handLandmarks[1][1]]
if((np.dot(V1,V2))>0 and (handLandmarks[4][1]>handLandmarks[2][1])):
i=1
elif(np.dot(V1,V2)>0 and handLandmarks[4][1]<handLandmarks[2][1]):
i=2
return etatDuPouce[i]
def getThumbState():
if cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
return
return False
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image)
# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# print(results)
handLandmarks = []
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
# Flip the image horizontally for a selfie-view display.
# cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27:
return
# cap.release()
# Fill list with x and y positions of each landmark
for landmarks in hand_landmarks.landmark:
handLandmarks.append([landmarks.x, landmarks.y])
thumbState = reconnaissancePouce(handLandmarks)
resultBuffer.append(thumbState)
if(len(resultBuffer) > BUFFER_LENGTH):
resultBuffer.pop(0)
thumbsUpCount = sum(map(lambda x : x == "thumbs_up", resultBuffer))
thumbsDownCount = sum(map(lambda x : x == "thumbs_down", resultBuffer))
print(thumbsUpCount,thumbsDownCount)
if(thumbsUpCount > TH_FRACTION * BUFFER_LENGTH):
result = "thumbs_up"
elif(thumbsDownCount > TH_FRACTION * BUFFER_LENGTH):
result = "thumbs_down"
else:
result = False
if(thumbState != "neutre"):
return thumbState, handLandmarks[9], np.linalg.norm(np.array(handLandmarks[9]) - np.array(handLandmarks[0])), result
return False

View File

@@ -4,12 +4,14 @@ import math
import websockets
import random
import os
import hands
import time
from hands import getThumbState
values = []
class WebsocketServer:
def __init__(self,getEffects,port=os.getenv("PORT"),host=os.getenv("HOST")) -> None:
self.thumbResult = None
self.state = 0
self.host = host
self.port = port
self.getEffects = getEffects
@@ -21,19 +23,23 @@ class WebsocketServer:
async def handler(self,websocket):
while True:
start = time.time()
messages = self.getEffects()
hands.frame()
if(self.state == 0):
messages, result = self.getEffects()
if(messages != False):
if(result == False):
await websocket.send(json.dumps(messages))
# await asyncio.sleep(1/30)
delay = time.time() - start
values.append(1/delay)
avg = sum(values) / len(values)
dev = [(v - avg) ** 2 for v in values]
print(avg, math.sqrt(sum(dev)/len(dev)))
#Remplacer ça par la fonction qui récupère les effets (dans le module de reconnaissance de gestes)
else:
self.thumbResult = result
self.state = 1
await websocket.send('{"type":"state","state":2}')
def getEffects():
return {"type": "effects", "effects": [{"type": "thumbs_up", "x":random.randint(0,100), "y": random.randint(0,100), "width": 50, "height": 50}]}
res = getThumbState()
if(res != False):
state, coords, size, result = res
return {"type": "effects", "effects": [{"type": state, "x":coords[0], "y": coords[1], "width": size, "height": size}]}, result
else:
return False,False
server = WebsocketServer(getEffects)
asyncio.run(server.run())

View File

@@ -2,3 +2,4 @@ websockets
requests
opencv-python
mediapipe
numpy

View File

@@ -55,13 +55,13 @@ services:
restart: always
# Serveur web de l'interface de la borne
# interface_borne:
# image: httpd:latest
# volumes:
# - ./interface_borne:/usr/local/apache2/htdocs/
# container_name: interface_borne
# ports:
# - 8888:80
interface_borne:
image: httpd:latest
volumes:
- ./interface_borne:/usr/local/apache2/htdocs/
container_name: interface_borne
ports:
- 8888:80
#Serveur web de l'interface admin
interface_admin:
@@ -75,24 +75,24 @@ services:
# #Backend de la borne : scripts pythons de reconnaissances video et audio
# #Envoient les infos a l'interface de la borne par websocket pour mettre a jour l'interface rapidement
# #Met a jour les avis en faisant des requêtes a l'API
# backend_reconnaissance:
# build: ./backend_reconnaissance
# container_name: backend_reconnaissance
# restart: always
# devices:
# - /dev/video3:/dev/video0
# environment:
# - PORT=5000
# - HOST=backend_reconnaissance
# ports:
# #Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur
# - 5000:5000
backend_reconnaissance:
build: ./backend_reconnaissance
container_name: backend_reconnaissance
restart: always
devices:
- /dev/video3:/dev/video0
environment:
- PORT=5000
- HOST=backend_reconnaissance
ports:
#Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur
- 5000:5000
# video_loopback:
# build: ./video_loopback
# container_name: video_loopback
# restart: always
# devices:
# - /dev/video0:/dev/video0
# - /dev/video2:/dev/video1
# - /dev/video3:/dev/video2
video_loopback:
build: ./video_loopback
container_name: video_loopback
restart: always
devices:
- /dev/video0:/dev/video0
- /dev/video2:/dev/video1
- /dev/video3:/dev/video2

View File

@@ -84,7 +84,6 @@ class CameraPage {
_frame() {
if (this.streaming && this.enabled && this.width && this.height) {
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
// this.ctx.drawImage(this.video, 0, 0, this.width, this.height);
this._drawEffects();
}
if (this.enabled) {
@@ -106,6 +105,11 @@ class CameraPage {
_drawEffects() {
for (let effect of this.activeEffects) {
let { x, y, width, height } = this._scaleEffect(effect.x, effect.y, effect.width, effect.height);
width = width * this.videoWidth * 2;
height = height * this.videoHeight * 2;
x = x * this.videoWidth - width / 2;
y = y * this.videoHeight - height / 2;
console.log(width, height);
if (effect.type == "thumbs_down") {
this._drawThumbsDown(x, y, width, height);
}

View File

@@ -3,14 +3,16 @@ class WebsocketClient {
this.socket = new WebSocket("ws://localhost:5000");
this.socket.addEventListener("open", (event) => {
this.socket.send("connected");
console.log("connected")
});
this.socket.addEventListener("message", (event) => {
this.socket.onmessage = (event) => {
let msg = JSON.parse(event.data);
if (msg.type == "effects") {
onNewEffects(msg.effects);
}else if(msg.type == "state") {
onNewState(msg.state);
}
});
};
}
}

View File

@@ -14,8 +14,11 @@ class StateManager {
this._thankYouPage = new ThankYouPage();
this.wsClient = new WebsocketClient(
(effects) => this._cameraPage.setEffects(effects),
(state) => this.changeState(state)
(effects) => {
this.setState(STATE.video);
this._cameraPage.setEffects(effects)
},
(state) => this.setState(state)
);
this._sleepingPage.enabled = true;