diff --git a/code-speech-To-Text.py b/code-speech-To-Text.py deleted file mode 100644 index 81152e6..0000000 --- a/code-speech-To-Text.py +++ /dev/null @@ -1,167 +0,0 @@ -import librosa -import os -from matplotlib.patches import ConnectionPatch -import matplotlib.pyplot as plt -import numpy as np -import scipy.spatial.distance as dist -import pyaudio -import wave -def dp(distmat): - N,M = distmat.shape - # Initialisons the cost matrix - costmat =np.zeros((N+1,M+1)) - for i in range (1,N+1): - costmat[i,0]=np.inf - for i in range (1,M+1): - costmat[0,i]=np.inf - - for i in range (N): - for j in range (M): - #on calcule le cout minimal pour chaque chemin.pour atteindre the costmat[i][j] il y a trois chemins possibles on choisit celui de cout minimal - penalty = [ - costmat[i,j], # cas T==0 - costmat[i,j+1] , # cas T==1 - costmat[i+1,j]] # cas T==2 - ipenalty = np.argmin(penalty) - costmat[i+1,j+1] = distmat[i,j] + penalty[ipenalty] - - #enlever les valeurs de l infini - costmat = costmat[1: , 1:] - return (costmat, costmat[-1, -1]/(N+M)) -def calculate_mfcc(audio, sr): - # Define parameters for MFCC calculation - n_mfcc = 13 - n_fft = 2048 - hop_length = 512 - fmin = 0 - fmax = sr/2 - - # Calculate MFCCs - mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length, fmin=fmin, fmax=fmax) - return mfccs.T -def calculate_dtw_cost(mfccs_query , mfccs_train): - distmat = dist.cdist(mfccs_query, mfccs_train,"cosine") - costmat,mincost = dp(distmat) - return mincost -def recognize_speech(audio_query, audio_train_list, sr):#sr frequence d echantillonnage - # Calculate MFCCs for query audio - mfccs_query = calculate_mfcc(audio_query, sr) - - # Calculate DTW cost for each audio in training data - dtw_costs = [] - for audio_train in audio_train_list: - mfccs_train = calculate_mfcc(audio_train, sr) - mincost = calculate_dtw_cost(mfccs_query, mfccs_train) - dtw_costs.append(mincost) - - # Find index of word with lowest DTW cost - index = np.argmin(dtw_costs) - - # Return recognized word - return index - -# Example usage -def get_recognized_word(recognized_word_index): - - # Define a dictionary to map recognized word indices to actual words - word_map = { - "un" : [0,1,2,3,4,5,6], - "deux" : [7, 8, 9, 10, 11, 12, 13], - "trois" : [14, 15, 16, 17, 18, 19], - "quatre" : [20,21, 22, 23, 24, 25, 26], - "cinq" : [27 ,28, 29, 30, 31, 32], - "six" : [33 ,34, 35, 36, 37, 38], - "sept" : [39 , 40, 41, 42, 43, 44], - "huit" : [45,46, 47, 48, 49, 50, 51], - "neuf" : [52,53, 54, 55, 56, 57, 58], - "dix" : [59,60, 61, 62, 63, 64, 65], - "bien" : [66 ,67, 68, 69, 70, 71, 72], - "super" : [127,128,129,130, 131, 132, 133], - "génial" : [87,88, 89, 90, 91, 92, 93], - "sympa" : [134,135,136,137, 138, 139, 140], - "propre" : [122, 123, 124, 125, 126], - "nul" : [115 ,116, 117, 118, 119, 120, 121], - "ennuyant" : [80 ,81, 82, 83, 84, 85, 86], - "j'ai beaucoup aimé" : [94 ,95, 96, 97, 98, 99, 100], - "j'ai trouvé ça génial" : [101 ,102, 103, 104, 105, 106, 107], - "je n'ai pas aimé" : [108 ,109, 110, 111, 112, 113, 114], - "c'était drole" : [73,74, 75, 76, 77, 78, 79], - } - for word, indices in word_map.items(): - if recognized_word_index in indices: - return word - return "Word not recognized" -def record_audio(filename, duration, sr): - chunk = 1024 - sample_format = pyaudio.paInt16 - channels = 1 - record_seconds = duration - filename = f"{filename}.wav" - - p = pyaudio.PyAudio() - - stream = p.open(format=sample_format, - channels=channels, - rate=sr, - frames_per_buffer=chunk, - input=True) - - frames = [] - - print(f"Enregistrement en cours...") - - for i in range(0, int(sr / chunk * record_seconds)): - data = stream.read(chunk) - frames.append(data) - - stream.stop_stream() - stream.close() - - p.terminate() - - - print("Enregistrement terminé") - - wf = wave.open(filename, "wb") - wf.setnchannels(channels) - wf.setsampwidth(p.get_sample_size(sample_format)) - wf.setframerate(sr) - wf.writeframes(b"".join(frames)) - wf.close() - - print(f"Fichier enregistré sous {filename}") -def coupe_silence(signal): - t = 0 - if signal[t] == 0 : - p = 0 - while signal[t+p] == 0 : - if p == 88 : - signal = signal[:t] + signal[t+p:] - coupe_silence(signal) - else : - p = p+1 -""" -sr = 44100 # fréquence d'échantillonnage -duration = 2.5 # durée d'enregistrement en secondes -filename = "audio_query" # nom du fichier à enregistrer - -record_audio(filename, duration, sr) -audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr) - -audio_train_list = [librosa.load('C:\\Users\\HP\\Documents\\cool.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\formidable.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\cest mauvais.wav', sr=sr)[0] , librosa.load('C:\\Users\\HP\\Documents\\un.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\parfait.wav', sr=sr)[0]] -recognized_word_index = recognize_speech(audio_query, audio_train_list, sr) -print(f'Recognized word: {recognized_word_index}') -""" -sr = 44100 # fréquence d'échantillonnage -duration = 6 # durée d'enregistrement en secondes -filename = "audio_query" # nom du fichier à enregistrer - -record_audio(filename, duration, sr) -audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr) -coupe_silence(audio_query) -audio_train_list = [] -for file in os.listdir('C:\\Users\\HP\\Documents\\Base de données') : - audio_train_list.append(librosa.load('C:\\Users\\HP\\Documents\\Base de données\\' + file, sr=sr)[0]) -recognized_word_index = recognize_speech(audio_query, audio_train_list, sr) -recognized_word = get_recognized_word(recognized_word_index) -print(f'Recognized word: {recognized_word}') diff --git a/code/backend_reconnaissance/Dockerfile b/code/backend_reconnaissance/Dockerfile index 3847647..1a75d72 100644 --- a/code/backend_reconnaissance/Dockerfile +++ b/code/backend_reconnaissance/Dockerfile @@ -12,7 +12,6 @@ RUN apt-get install ffmpeg libsm6 libxext6 portaudio19-dev python3-pyaudio pulse # Installation des dépendances python COPY requirements.txt . RUN python -m pip install -r requirements.txt - # Création du répertoire de travail WORKDIR /app COPY . /app diff --git a/code/backend_reconnaissance/audio_data.zip b/code/backend_reconnaissance/audio_data.zip new file mode 100644 index 0000000..46e243c Binary files /dev/null and b/code/backend_reconnaissance/audio_data.zip differ diff --git a/code/backend_reconnaissance/audio_detector.py b/code/backend_reconnaissance/audio_detector.py index f767274..a3e170f 100644 --- a/code/backend_reconnaissance/audio_detector.py +++ b/code/backend_reconnaissance/audio_detector.py @@ -131,7 +131,6 @@ def get_word_metadata(word): #Todo : detecte si pas de note donnée def get_grade(): - return 4 sr = 44100 # fréquence d'échantillonnage duration = 6 # durée d'enregistrement en secondes filename = "recording" # nom du fichier à enregistrer diff --git a/code/docker-compose.yaml b/code/docker-compose.yaml index 6bb99b4..91b7c98 100644 --- a/code/docker-compose.yaml +++ b/code/docker-compose.yaml @@ -91,18 +91,16 @@ services: restart: always devices: - /dev/video3:/dev/video0 - # volumes: - # - /run/user/1000/pulse/native:/run/user/1000/pulse/native + - /dev/snd:/dev/snd environment: - PORT=5000 - HOST=backend_reconnaissance - API_HOST=reviews_api - API_PORT=8080 - # - PULSE_SERVER=unix:/run/user/1000/pulse/native - user: 1000:1000 ports: #Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur - 5000:5000 + user: root video_loopback: build: ./video_loopback diff --git a/code/video_loopback/Dockerfile b/code/video_loopback/Dockerfile index 684238e..5a04b5f 100644 --- a/code/video_loopback/Dockerfile +++ b/code/video_loopback/Dockerfile @@ -1,3 +1,3 @@ FROM alpine:latest RUN apk add --no-cache ffmpeg -CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","warning"] \ No newline at end of file +CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","debug"] diff --git a/recording.wav b/recording.wav deleted file mode 100644 index 72726c0..0000000 Binary files a/recording.wav and /dev/null differ