tentative fix son

2026-04-10 16:40:20 +02:00 · 2023-03-23 19:03:23 +01:00
parent 0af05fb361
commit f978ed0a8b
7 changed files with 3 additions and 174 deletions
--- a/code-speech-To-Text.py
+++ b/code-speech-To-Text.py
@@ -1,167 +0,0 @@
-import librosa
-import os
-from matplotlib.patches import ConnectionPatch
-import matplotlib.pyplot as plt
-import numpy as np
-import scipy.spatial.distance as dist
-import pyaudio
-import wave
-def dp(distmat):
-    N,M = distmat.shape
-    # Initialisons the cost matrix
-    costmat =np.zeros((N+1,M+1))
-    for i in range (1,N+1):
-        costmat[i,0]=np.inf
-    for i in range (1,M+1):
-        costmat[0,i]=np.inf
-
-    for i in range (N):
-        for j in range (M):
-            #on calcule le cout minimal pour chaque chemin.pour atteindre the costmat[i][j] il y a trois chemins possibles on choisit celui de cout minimal
-            penalty = [
-              costmat[i,j],     # cas T==0
-              costmat[i,j+1] ,  # cas T==1
-              costmat[i+1,j]]   # cas T==2
-            ipenalty = np.argmin(penalty)
-            costmat[i+1,j+1] = distmat[i,j] + penalty[ipenalty]
-
-    #enlever les valeurs de l infini
-    costmat = costmat[1: , 1:]
-    return (costmat, costmat[-1, -1]/(N+M))
-def calculate_mfcc(audio, sr):
-    # Define parameters for MFCC calculation
-    n_mfcc = 13
-    n_fft = 2048
-    hop_length = 512
-    fmin = 0
-    fmax = sr/2
-
-    # Calculate MFCCs
-    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length, fmin=fmin, fmax=fmax)
-    return mfccs.T
-def calculate_dtw_cost(mfccs_query , mfccs_train):
-    distmat = dist.cdist(mfccs_query, mfccs_train,"cosine")
-    costmat,mincost = dp(distmat)
-    return mincost
-def recognize_speech(audio_query, audio_train_list, sr):#sr frequence d echantillonnage
-    # Calculate MFCCs for query audio
-    mfccs_query = calculate_mfcc(audio_query, sr)
-
-    # Calculate DTW cost for each audio in training data
-    dtw_costs = []
-    for audio_train in audio_train_list:
-        mfccs_train = calculate_mfcc(audio_train, sr)
-        mincost = calculate_dtw_cost(mfccs_query, mfccs_train)
-        dtw_costs.append(mincost)
-
-    # Find index of word with lowest DTW cost
-    index = np.argmin(dtw_costs)
-
-    # Return recognized word
-    return index
-
-# Example usage
-def get_recognized_word(recognized_word_index):
-
-    # Define a dictionary to map recognized word indices to actual words
-    word_map = {
-    "un" : [0,1,2,3,4,5,6],
-    "deux" : [7, 8, 9, 10, 11, 12, 13],
-    "trois" : [14, 15, 16, 17, 18, 19],
-    "quatre" : [20,21, 22, 23, 24, 25, 26],
-    "cinq" : [27 ,28, 29, 30, 31, 32],
-    "six" : [33 ,34, 35, 36, 37, 38],
-    "sept" : [39 , 40, 41, 42, 43, 44],
-    "huit" : [45,46, 47, 48, 49, 50, 51],
-    "neuf" : [52,53, 54, 55, 56, 57, 58],
-    "dix" : [59,60, 61, 62, 63, 64, 65],
-    "bien" : [66 ,67, 68, 69, 70, 71, 72],
-    "super" : [127,128,129,130, 131, 132, 133],
-    "génial" : [87,88, 89, 90, 91, 92, 93],
-    "sympa" : [134,135,136,137, 138, 139, 140],
-    "propre" : [122, 123, 124, 125, 126],
-    "nul" : [115 ,116, 117, 118, 119, 120, 121],
-    "ennuyant" : [80 ,81, 82, 83, 84, 85, 86],
-    "j'ai beaucoup aimé" : [94 ,95, 96, 97, 98, 99, 100],
-    "j'ai trouvé ça génial" : [101 ,102, 103, 104, 105, 106, 107],
-    "je n'ai pas aimé" : [108 ,109, 110, 111, 112, 113, 114],
-    "c'était drole" : [73,74, 75, 76, 77, 78, 79],
-    }
-    for word, indices in word_map.items():
-        if recognized_word_index in indices:
-            return word
-    return "Word not recognized"
-def record_audio(filename, duration, sr):
-    chunk = 1024
-    sample_format = pyaudio.paInt16
-    channels = 1
-    record_seconds = duration
-    filename = f"{filename}.wav"
-
-    p = pyaudio.PyAudio()
-
-    stream = p.open(format=sample_format,
-                    channels=channels,
-                    rate=sr,
-                    frames_per_buffer=chunk,
-                    input=True)
-
-    frames = []
-
-    print(f"Enregistrement en cours...")
-
-    for i in range(0, int(sr / chunk * record_seconds)):
-        data = stream.read(chunk)
-        frames.append(data)
-
-    stream.stop_stream()
-    stream.close()
-
-    p.terminate()
-
-
-    print("Enregistrement terminé")
-
-    wf = wave.open(filename, "wb")
-    wf.setnchannels(channels)
-    wf.setsampwidth(p.get_sample_size(sample_format))
-    wf.setframerate(sr)
-    wf.writeframes(b"".join(frames))
-    wf.close()
-
-    print(f"Fichier enregistré sous {filename}")
-def coupe_silence(signal):
-    t = 0
-    if signal[t] == 0 :
-        p = 0
-        while signal[t+p] == 0 :
-            if p == 88 :
-                signal = signal[:t] + signal[t+p:]
-                coupe_silence(signal)
-            else :
-                p = p+1
-"""
-sr = 44100  # fréquence d'échantillonnage
-duration = 2.5  # durée d'enregistrement en secondes
-filename = "audio_query"  # nom du fichier à enregistrer
-
-record_audio(filename, duration, sr)
-audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr)
-
-audio_train_list = [librosa.load('C:\\Users\\HP\\Documents\\cool.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\formidable.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\cest mauvais.wav', sr=sr)[0] , librosa.load('C:\\Users\\HP\\Documents\\un.wav', sr=sr)[0], librosa.load('C:\\Users\\HP\\Documents\\parfait.wav', sr=sr)[0]]
-recognized_word_index = recognize_speech(audio_query, audio_train_list, sr)
-print(f'Recognized word: {recognized_word_index}')
-"""
-sr = 44100  # fréquence d'échantillonnage
-duration = 6  # durée d'enregistrement en secondes
-filename = "audio_query"  # nom du fichier à enregistrer
-
-record_audio(filename, duration, sr)
-audio_query, sr = librosa.load('C:\\Users\\HP\\audio_query.wav', sr=sr)
-coupe_silence(audio_query)
-audio_train_list = []
-for file in os.listdir('C:\\Users\\HP\\Documents\\Base de données') :
-    audio_train_list.append(librosa.load('C:\\Users\\HP\\Documents\\Base de données\\' + file, sr=sr)[0])
-recognized_word_index = recognize_speech(audio_query, audio_train_list, sr)
-recognized_word = get_recognized_word(recognized_word_index)
-print(f'Recognized word: {recognized_word}')
--- a/code/backend_reconnaissance/Dockerfile
+++ b/code/backend_reconnaissance/Dockerfile
@@ -12,7 +12,6 @@ RUN apt-get install ffmpeg libsm6 libxext6 portaudio19-dev python3-pyaudio pulse
 # Installation des dépendances python
 COPY requirements.txt .
 RUN python -m pip install -r requirements.txt
-
 # Création du répertoire de travail
 WORKDIR /app
 COPY . /app
--- a/code/backend_reconnaissance/audio_data.zip
+++ b/code/backend_reconnaissance/audio_data.zip
--- a/code/backend_reconnaissance/audio_detector.py
+++ b/code/backend_reconnaissance/audio_detector.py
@@ -131,7 +131,6 @@ def get_word_metadata(word):

 #Todo : detecte si pas de note donnée
 def get_grade():
-    return 4
    sr = 44100  # fréquence d'échantillonnage
    duration = 6  # durée d'enregistrement en secondes
    filename = "recording"  # nom du fichier à enregistrer
--- a/code/docker-compose.yaml
+++ b/code/docker-compose.yaml
@@ -91,18 +91,16 @@ services:
    restart: always
    devices:
      - /dev/video3:/dev/video0
-    # volumes:
-    #   - /run/user/1000/pulse/native:/run/user/1000/pulse/native
+      - /dev/snd:/dev/snd
    environment:
      - PORT=5000
      - HOST=backend_reconnaissance
      - API_HOST=reviews_api
      - API_PORT=8080
-      # - PULSE_SERVER=unix:/run/user/1000/pulse/native
-    user: 1000:1000
    ports:
      #Ce container est le serveur websocker dont le client est l'interface de la borne qui tourne dans le navigateur
      - 5000:5000
+    user: root
    
  video_loopback:
    build: ./video_loopback
--- a/code/video_loopback/Dockerfile
+++ b/code/video_loopback/Dockerfile
@@ -1,3 +1,3 @@
 FROM alpine:latest
 RUN apk add --no-cache ffmpeg
-CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","warning"]
+CMD ["ffmpeg","-video_size","640x480","-f","video4linux2","-i","/dev/video0","-codec","copy","-f","v4l2","/dev/video1","-codec","copy","-f","v4l2","/dev/video2", "-loglevel","debug"]
--- a/recording.wav
+++ b/recording.wav