mirror of
https://git.roussel.pro/telecom-paris/pact.git
synced 2026-02-09 02:20:17 +01:00
tentative fix son
This commit is contained in:
@@ -1,167 +0,0 @@
|
|||||||
import librosa
|
|
||||||
import os
|
|
||||||
from matplotlib.patches import ConnectionPatch
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
import scipy.spatial.distance as dist
|
|
||||||
import pyaudio
|
|
||||||
import wave
|
|
||||||
def dp(distmat):
|
|
||||||
N,M = distmat.shape
|
|
||||||
# Initialisons the cost matrix
|
|
||||||
costmat =np.zeros((N+1,M+1))
|
|
||||||
for i in range (1,N+1):
|
|
||||||
costmat[i,0]=np.inf
|
|
||||||
for i in range (1,M+1):
|
|
||||||
costmat[0,i]=np.inf
|
|
||||||
|
|
||||||
for i in range (N):
|
|
||||||
for j in range (M):
|
|
||||||
#on calcule le cout minimal pour chaque chemin.pour atteindre the costmat[i][j] il y a trois chemins possibles on choisit celui de cout minimal
|
|
||||||
penalty = [
|
|
||||||
costmat[i,j], # cas T==0
|
|
||||||
costmat[i,j+1] , # cas T==1
|
|
||||||
costmat[i+1,j]] # cas T==2
|
|
||||||
ipenalty = np.argmin(penalty)
|
|
||||||
costmat[i+1,j+1] = distmat[i,j] + penalty[ipenalty]
|
|
||||||
|
|
||||||
#enlever les valeurs de l infini
|
|
||||||
costmat = costmat[1: , 1:]
|
|
||||||
return (costmat, costmat[-1, -1]/(N+M))
|
|
||||||
def calculate_mfcc(audio, sr):
|
|
||||||
# Define parameters for MFCC calculation
|
|
||||||
n_mfcc = 13
|
|
||||||
n_fft = 2048
|
|
||||||
hop_length = 512
|
|
||||||
fmin = 0
|
|
||||||
fmax = sr/2
|
|
||||||
|
|
||||||
# Calculate MFCCs
|
|
||||||
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length, fmin=fmin, fmax=fmax)
|
|
||||||
return mfccs.T
|
|
||||||
def calculate_dtw_cost(mfccs_query , mfccs_train):
|
|
||||||
distmat = dist.cdist(mfccs_query, mfccs_train,"cosine")
|
|
||||||
costmat,mincost = dp(distmat)
|
|
||||||
return mincost
|
|
||||||
def recognize_speech(audio_query, audio_train_list, sr):#sr frequence d echantillonnage
|
|
||||||
# Calculate MFCCs for query audio
|
|
||||||
mfccs_query = calculate_mfcc(audio_query, sr)
|
|
||||||
|
|
||||||
# Calculate DTW cost for each audio in training data
|
|
||||||
dtw_costs = []
|
|
||||||
for audio_train in audio_train_list:
|
|
||||||
mfccs_train = calculate_mfcc(audio_train, sr)
|
|
||||||
mincost = calculate_dtw_cost(mfccs_query, mfccs_train)
|
|
||||||
dtw_costs.append(mincost)
|
|
||||||
|
|
||||||
# Find index of word with lowest DTW cost
|
|
||||||
index = np.argmin(dtw_costs)
|
|
||||||
|
|
||||||
# Return recognized word
|
|
||||||
return index
|
|
||||||
|
|
||||||
# Example usage
|
|
||||||
def get_recognized_word(recognized_word_index):
|
|
||||||
|
|
||||||
# Define a dictionary to map recognized word indices to actual words
|
|
||||||
word_map = {
|
|
||||||
"un" : [0,1,2,3,4,5,6],
|
|
||||||
"deux" : [7, 8, 9, 10, 11, 12, 13],
|
|
||||||
"trois" : [14, 15, 16, 17, 18, 19],
|
|
||||||
"quatre" : [20,21, 22, 23, 24, 25, 26],
|
|
||||||
"cinq" : [27 ,28, 29, 30, 31, 32],
|
|
||||||
"six" : [33 ,34, 35, 36, 37, 38],
|
|
||||||
"sept" : [39 , 40, 41, 42, 43, 44],
|
|
||||||
"huit" : [45,46, 47, 48, 49, 50, 51],
|
|
||||||
"neuf" : [52,53, 54, 55, 56, 57, 58],
|
|
||||||
"dix" : [59,60, 61, 62, 63, 64, 65],
|
|
||||||
"bien" : [66 ,67, 68, 69, 70, 71, 72],
|
|
||||||
"super" : [127,128,129,130, 131, 132, 133],
|
|
||||||
"génial" : [87,88, 89, 90, 91, 92, 93],
|
|
||||||
"sympa" : [134,135,136,137, 138, 139, 140],
|
|
||||||
"propre" : [122, 123, 124, 125, 126],
|
|
||||||
"nul" : [115 ,116, 117, 118, 119, 120, 121],
|
|
||||||
"ennuyant" : [80 ,81, 82, 83, 84, 85, 86],
|
|
||||||
"j'ai beaucoup aimé" : [94 ,95, 96, 97, 98, 99, 100],
|
|
||||||
"j'ai trouvé ça génial" : [101 ,102, 103, 104, 105, 106, 107],
|
|
||||||
"je n'ai pas aimé" : [108 ,109, 110, 111, 112, 113, 114],
|
|
||||||
"c'était drole" : [73,74, 75, 76, 77, 78, 79],
|
|
||||||
}
|
|
||||||
for word, indices in word_map.items():
|
|
||||||
if recognized_word_index in indices:
|
|
||||||
return word
|
|
||||||
return "Word not recognized"
|
|
||||||
def record_audio(filename, duration, sr):
|
|
||||||
chunk = 1024
|
|
||||||
sample_format = pyaudio.paInt16
|
|
||||||
channels = 1
|
|
||||||
record_seconds = duration
|
|
||||||
filename = f"{filename}.wav"
|
|
||||||
|
|
||||||
p = pyaudio.PyAudio()
|
|
||||||
|
|
||||||
stream = p.open(format=sample_format,
|
|
||||||
channels=channels,
|
|
||||||
rate=sr,
|
|
||||||
frames_per_buffer=chunk,
|
|
||||||
input=True)
|
|
||||||
|
|
||||||
frames = []
|
|
||||||
|
|
||||||
print(f"Enregistrement en cours...")
|
|
||||||
|
|
||||||
for i in range(0, int(sr / chunk * record_seconds)):
|
|
||||||
data = stream.read(chunk)
|
|
||||||
frames.append(data)
|
|
||||||
|
|
||||||
stream.stop_stream()
|
|
||||||
stream.close()
|
|
||||||
|
|
||||||
p.terminate()
|
|
||||||
|
|
||||||
|
|
||||||
print("Enregistrement terminé")
|
|
||||||
|
|
||||||
wf = wave.open(filename, "wb")
|
|
||||||
wf.setnchannels(channels)
|
|
||||||
wf.setsampwidth(p.get_sample_size(sample_format))
|
|
||||||
wf.setframerate(sr)
|
|
||||||
wf.writeframes(b"".join(frames))
|
|
||||||
wf.close()
|
|
||||||
|
|
||||||
print(f"Fichier enregistré sous {filename}")
|
|
||||||
def coupe_silence(signal):
|
|
||||||
t = 0
|
|
||||||
if signal[t] == 0 :
|
|
||||||
p = 0
|
|
||||||
while signal[t+p] == 0 :
|
|
||||||
if p == 88 :
|
|
||||||
signal = signal[:t] + signal[t+p:]
|
|
||||||
coupe_silence(signal)
|
|
||||||
else :
|
|
||||||
p = p+1
|
|
||||||
"""
|
|
||||||
sr = 44100 # fréquence d'échantillonnage
|
|
||||||
duration = 2.5 # durée d'enregistrement en secondes
|
|
||||||
filename = "audio_query" # nom du fichier à enregistrer
|
|
||||||
|
|
||||||
record_audio(filename, duration, sr)
|
|
||||||
audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr)
|
|
||||||
|
|
||||||
audio_train_list = [librosa.load('C:\\Users\\HP\\Documents\\cool.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\formidable.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\cest mauvais.wav', sr=sr)[0] , librosa.load('C:\\Users\\HP\\Documents\\un.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\parfait.wav', sr=sr)[0]]
|
|
||||||
recognized_word_index = recognize_speech(audio_query, audio_train_list, sr)
|
|
||||||
print(f'Recognized word: {recognized_word_index}')
|
|
||||||
"""
|
|
||||||
sr = 44100 # fréquence d'échantillonnage
|
|
||||||
duration = 6 # durée d'enregistrement en secondes
|
|
||||||
filename = "audio_query" # nom du fichier à enregistrer
|
|
||||||
|
|
||||||
record_audio(filename, duration, sr)
|
|
||||||
audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr)
|
|
||||||
coupe_silence(audio_query)
|
|
||||||
audio_train_list = []
|
|
||||||
for file in os.listdir('C:\\Users\\HP\\Documents\\Base de données') :
|
|
||||||
audio_train_list.append(librosa.load('C:\\Users\\HP\\Documents\\Base de données\\' + file, sr=sr)[0])
|
|
||||||
recognized_word_index = recognize_speech(audio_query, audio_train_list, sr)
|
|
||||||
recognized_word = get_recognized_word(recognized_word_index)
|
|
||||||
print(f'Recognized word: {recognized_word}')
|
|
||||||
@@ -12,7 +12,6 @@ RUN apt-get install ffmpeg libsm6 libxext6 portaudio19-dev python3-pyaudio pulse
|
|||||||
# Installation des dépendances python
|
# Installation des dépendances python
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN python -m pip install -r requirements.txt
|
RUN python -m pip install -r requirements.txt
|
||||||
|
|
||||||
# Création du répertoire de travail
|
# Création du répertoire de travail
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|||||||
BIN
code/backend_reconnaissance/audio_data.zip
Normal file
BIN
code/backend_reconnaissance/audio_data.zip
Normal file
Binary file not shown.
@@ -131,7 +131,6 @@ def get_word_metadata(word):
|
|||||||
|
|
||||||
#Todo : detecte si pas de note donnée
|
#Todo : detecte si pas de note donnée
|
||||||
def get_grade():
|
def get_grade():
|
||||||
return 4
|
|
||||||
sr = 44100 # fréquence d'échantillonnage
|
sr = 44100 # fréquence d'échantillonnage
|
||||||
duration = 6 # durée d'enregistrement en secondes
|
duration = 6 # durée d'enregistrement en secondes
|
||||||
filename = "recording" # nom du fichier à enregistrer
|
filename = "recording" # nom du fichier à enregistrer
|
||||||
|
|||||||
@@ -91,18 +91,16 @@ services:
|
|||||||
restart: always
|
restart: always
|
||||||
devices:
|
devices:
|
||||||
- /dev/video3:/dev/video0
|
- /dev/video3:/dev/video0
|
||||||
# volumes:
|
- /dev/snd:/dev/snd
|
||||||
# - /run/user/1000/pulse/native:/run/user/1000/pulse/native
|
|
||||||
environment:
|
environment:
|
||||||
- PORT=5000
|
- PORT=5000
|
||||||
- HOST=backend_reconnaissance
|
- HOST=backend_reconnaissance
|
||||||
- API_HOST=reviews_api
|
- API_HOST=reviews_api
|
||||||
- API_PORT=8080
|
- API_PORT=8080
|
||||||
# - PULSE_SERVER=unix:/run/user/1000/pulse/native
|
|
||||||
user: 1000:1000
|
|
||||||
ports:
|
ports:
|
||||||
#Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur
|
#Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur
|
||||||
- 5000:5000
|
- 5000:5000
|
||||||
|
user: root
|
||||||
|
|
||||||
video_loopback:
|
video_loopback:
|
||||||
build: ./video_loopback
|
build: ./video_loopback
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
FROM alpine:latest
|
FROM alpine:latest
|
||||||
RUN apk add --no-cache ffmpeg
|
RUN apk add --no-cache ffmpeg
|
||||||
CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","warning"]
|
CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","debug"]
|
||||||
|
|||||||
BIN
recording.wav
BIN
recording.wav
Binary file not shown.
Reference in New Issue
Block a user