mirror of
https://git.roussel.pro/telecom-paris/pact.git
synced 2026-02-09 02:20:17 +01:00
269 lines
8.4 KiB
Plaintext
269 lines
8.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 62,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#pip install unidecode\n",
|
|
"#pip install dataclass\n",
|
|
"#pip install nltk\n",
|
|
"#import os\n",
|
|
"#from unidecode import unidecode\n",
|
|
"#import nltk\n",
|
|
"#from dataclasses import dataclass"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Première partie : presentation du problème et du materiel\n",
|
|
"\n",
|
|
"Nous cherchons a attribuer à une liste d'avis laissés un score global de satisfaction, ainsi qu'un score de satisfaction concernant chaque point pour lequel il sera particulierement interessant de se pencher (par exemple le delais d'attente dans un parc d'attraction ou la propreté dans un hotel).\n",
|
|
"\n",
|
|
"Nous allons pour cela utiliser une base de mots français associés chacun a un score de positivité, ainsi qu'une liste d'avis concernant le musée du Louvre.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Emplacmement du fichier contenant des mots francais associés a une score sous la forme\n",
|
|
"#mot1->son score\n",
|
|
"#mot2->son score\n",
|
|
"#mot3->son score ...\n",
|
|
"\n",
|
|
"lexiconPath = r\"fr_lexicon.txt\" \n",
|
|
"\n",
|
|
"\n",
|
|
"#Emplacmement du fichier contenant des des avis sur le musée du Louvre sous la forme\n",
|
|
"#Avis1\n",
|
|
"#//Avis2\n",
|
|
"#//Avis3 ...\n",
|
|
"\n",
|
|
"reviewPath = r\"LouvreAvis.txt\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Nous créons une liste de listes ordonnée alphabétiquement pour ne pas avoir à chercher un mot d'un avis dans le lexique en entier à chaque fois. La dernière case correspond aux expressions n'étant pas des mots."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 82,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"scoreWords = open(lexiconPath, \"r\")\n",
|
|
"scoreTable = [[] for i in range(27)]\n",
|
|
"line = scoreWords.readline()\n",
|
|
"\n",
|
|
"#Fonction d'ajout d'une paire mot-score par ordre alphabétique avec les\n",
|
|
"#expressions n'étant pas des mots à la dernière case.\n",
|
|
"#L'indice de la bonne case est trouvée avec le code ASCII en minuscule\n",
|
|
"#(a vaut 97 et z vaut 122)\n",
|
|
"\n",
|
|
"def add(scoreword):\n",
|
|
" if (ord(scoreword[0][0]) < 97 or ord(scoreword[0][0]) > 122):\n",
|
|
" scoreTable[26].append(scoreword)\n",
|
|
" else:\n",
|
|
" scoreTable[ord(scoreword[0][0])-97].append(scoreword)\n",
|
|
" \n",
|
|
"#Ajout des paires mot-score dans scoreTable\n",
|
|
"while (line != ''):\n",
|
|
" line = line.strip().split(\"->\")\n",
|
|
" add([line[0].lower(), float(line[1])])\n",
|
|
" line = scoreWords.readline()\n",
|
|
"scoreWords.close()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 83,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"['abandon', -2.4]\n",
|
|
"['abandonnant', -1.6]\n",
|
|
"['abandonne', -1.3]\n",
|
|
"['badass', 1.4]\n",
|
|
"['badin', 1.2]\n",
|
|
"['badine', 1.2]\n",
|
|
"['cachant', -1.2]\n",
|
|
"['cache', -0.7]\n",
|
|
"['cachent', -0.7]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i in range (3):\n",
|
|
" print(scoreTable[i][0])\n",
|
|
" print(scoreTable[i][1])\n",
|
|
" print(scoreTable[i][2])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Deuxieme partue : analyse d'avis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 67,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"file = open(reviewPath, \"r\")\n",
|
|
"reviews = (file.read()).split('//')\n",
|
|
" \n",
|
|
"#liste (partielle) de mots-clé pertinents pour un musée\n",
|
|
"keys=['attente', \"d'attente\", 'queue', 'patienter', 'patience', 'patient',\n",
|
|
" 'patients', 'patiente', 'patientes',\n",
|
|
" 'impolitesse' ,'impolie', 'impolies', 'impoli', 'impolis',\n",
|
|
" 'gentillesse', 'amabilité', 'aimable', 'aimables','gentil', 'gentils',\n",
|
|
" 'gentille', 'gentilles', 'personnel',\n",
|
|
" 'sales', 'sale', 'saleté', 'propre', 'propres', 'propreté',\n",
|
|
" 'acceuil', 'prix', 'cher', 'chers', 'chère', 'chères',\n",
|
|
" 'onéreux', 'onéreuse', 'onéreuses', 'abordable',\n",
|
|
" 'raisonnable', 'raisonnables', 'accessible', 'accessibilité', 'orienter','employé',\n",
|
|
" 'employés', 'employées', 'employée',\n",
|
|
" 'orientation', 'orienté', \"s'orienter\",\n",
|
|
" 'désorienter', 'désorienté', 'désorientée', 'désorientés', 'désorientées',\n",
|
|
" 'panneau', 'panneaux', 'signalétique', 'labyrinthe',\n",
|
|
" 'perdu', 'perdus', 'perdue', 'perdues']\n",
|
|
"\n",
|
|
"#Tableau de paires mots-clé, score associé\n",
|
|
"keyWords = []\n",
|
|
"\n",
|
|
"#Score moyen d'un avis\n",
|
|
"averageScore = 0\n",
|
|
"\n",
|
|
"#Fonction de recherche d'un mot d'un avis parmis le lexique\n",
|
|
"def search(word):\n",
|
|
" if (len(word) != 0):\n",
|
|
" if (ord(word[0]) < 97 or ord(word[0]) > 122):\n",
|
|
" mots = list(e[0] for e in scoreTable[26])\n",
|
|
" if (word in mots):\n",
|
|
" return([word, scoreTable[26][mots.index(word)][1]])\n",
|
|
" else:\n",
|
|
" return(-1)\n",
|
|
" mots = list(e[0] for e in scoreTable[ord(word[0])-97])\n",
|
|
" if (word in mots):\n",
|
|
" return([word, scoreTable[ord(word[0])-97][mots.index(word)][1]])\n",
|
|
" return(-1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 68,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"for Review in reviews:\n",
|
|
" #print(Review)\n",
|
|
" reviewScore = 0\n",
|
|
" miniKey = []\n",
|
|
" #recherche de mots positifs/négatifs\n",
|
|
" review = list(e.strip(',.') for e in Review.split())\n",
|
|
" for Word in review:\n",
|
|
" word = Word.lower()\n",
|
|
" temp = search(word)\n",
|
|
" #recherche d'un éventuel mot-clé associé à ce caractère positif/négatif\n",
|
|
" if (temp != -1):\n",
|
|
" for key in keys:\n",
|
|
" if (key in review):\n",
|
|
" cles = list(e[0] for e in keyWords)\n",
|
|
" if (key in cles):\n",
|
|
" keyWords[cles.index(key)][1] += temp[1]\n",
|
|
" else:\n",
|
|
" keyWords.append([key, temp[1]])\n",
|
|
" miniKey.append(key)\n",
|
|
" reviewScore += temp[1]\n",
|
|
" averageScore += reviewScore\n",
|
|
" #Caractéristique de l'avis analysé\n",
|
|
" miniKey = set(miniKey)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Troisiere partie : affichage des resultats"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 69,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Format: [[Mot-clé, score associé]]\n",
|
|
"[['cher', -2.7], ['orientation', -2.7], ['abordable', 3.4000000000000004], ['personnel', -3.0999999999999996], [\"s'orienter\", -0.10000000000000053], ['queue', 1.0000000000000004], ['orienter', -0.30000000000000004], ['prix', 7.4], ['raisonnable', 7.4], [\"d'attente\", 0.9000000000000012]]\n",
|
|
"Nombre d'avis: 23\n",
|
|
"Score moyen d'un avis: 1.5652173913043481\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"averageScore /= len(reviews)\n",
|
|
"print(\"Format: [[Mot-clé, score associé]]\")\n",
|
|
"print(keyWords)\n",
|
|
"print(\"Nombre d'avis: \", len(reviews))\n",
|
|
"print(\"Score moyen d'un avis: \", averageScore)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|