#2
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
import sklearn.metrics
import warnings
#3
from gensim.downloader import load
from nltk.tokenize import word_tokenize
import matplotlib.pyplot as plt
import nltk
import time
import json
import pickle
import os.path
nltk.download('punkt')
warnings.filterwarnings('ignore')
[nltk_data] Downloading package punkt to [nltk_data] C:\Users\26565\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date!
df = pd.read_json('goemotions.json')
df.columns = ['post', 'emotion', 'sentiment']
df.head()
| post | emotion | sentiment | |
|---|---|---|---|
| 0 | That game hurt. | sadness | negative |
| 1 | You do right, if you don't care then fuck 'em! | neutral | neutral |
| 2 | Man I love reddit. | love | positive |
| 3 | [NAME] was nowhere near them, he was by the Fa... | neutral | neutral |
| 4 | Right? Considering it’s such an important docu... | gratitude | positive |
file=pd.read_json('goemotions.json')
emotion= file[1].value_counts()
print(emotion)
sentiment = file[2].value_counts()
print(sentiment)
neutral 55298 approval 11259 admiration 10531 annoyance 8342 disapproval 7686 gratitude 7075 amusement 6130 curiosity 5885 anger 5202 love 4957 confusion 4938 realization 4714 disappointment 4706 optimism 4519 joy 4329 sadness 3827 caring 3523 surprise 3472 excitement 3020 disgust 2914 desire 2147 fear 1778 remorse 1510 embarrassment 1433 nervousness 796 relief 788 pride 690 grief 351 Name: 1, dtype: int64 positive 58968 neutral 55298 negative 38545 ambiguous 19009 Name: 2, dtype: int64
plt.xlabel('Emotion')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.bar(emotion.keys(),emotion)
plt.savefig("emotion_bar.pdf")
plt.show()
e=[]
s=[]
for key,value in emotion.items():
e.append(key)
s.append(value)
plt.pie(s,labels=e,rotatelabels=True,autopct='%0.1f%%',labeldistance=1.1,radius=1.7)
plt.savefig("emotion_pie.pdf")
plt.show()
plt.xlabel('Sentiment')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.bar(sentiment.keys(),sentiment)
plt.savefig("sentiment_bar.pdf")
plt.show()
e=[]
s=[]
for key,value in sentiment.items():
e.append(key)
s.append(value)
plt.pie(s,labels=e,rotatelabels=True,autopct='%0.1f%%',labeldistance=1.1,radius=1.7)
plt.savefig("sentiment_pie.pdf")
plt.show()
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df.post)
print(X.shape)
print(df.shape)
(171820, 30449) (171820, 3)
X_train, X_test, y_train, y_test = train_test_split(X, df[['emotion', 'sentiment']], test_size=0.2)
print(X_train.shape)
(137456, 30449)
performance_data = []
def train_model(model, model_name):
for label in ['emotion', 'sentiment']:
start=time.time()
print(f'{label} train {label} start: {start}')
if (os.path.exists(f'trainedmodels/{model_name}_{label}_model.pickle')):
picklefile = open(f'trainedmodels/{model_name}_{label}_model.pickle','rb')
model = pickle.load(picklefile)
print(f"success load pretrainedmodel: trainedmodels/{model_name}_{label}_model.pickle")
else:
model = model.fit(X_train, y_train[label])
with open(f'trainedmodels/{model_name}_{label}_model.pickle','wb') as f:
pickle.dump(model, f)
pickle_in = open(f'trainedmodels/{model_name}_{label}_model.pickle','rb')
model = pickle.load(pickle_in)
end=time.time()
print(f'{label} train {label} end: {end} take time:{end-start}')
start=time.time()
y_pred = model.predict(X_test)
end=time.time()
print(f'{label} predict {label} end: {end} take time:{end-start}')
performance_data.append([f'{model_name}, {label}', confusion_matrix(y_test[label], y_pred), classification_report(y_test[label], y_pred)])
def grid_search(model, model_name, params):
clf = GridSearchCV(model, params, cv=2, n_jobs=-1)
for label in ['emotion', 'sentiment']:
if (os.path.exists(f'trainedGridSearchCV/{model_name}_{label}_model.pickle')):
picklefile = open(f'trainedGridSearchCV/{model_name}_{label}_model.pickle','rb')
clf = pickle.load(picklefile)
print(f"success load pretrainedGridSearchCV: trainedGridSearchCV/{model_name}_{label}_model.pickle")
else:
clf.fit(X_train, y_train[label])
with open(f'trainedGridSearchCV/{model_name}_{label}_model.pickle','wb') as f:
pickle.dump(clf, f)
pickle_in = open(f'trainedGridSearchCV/{model_name}_{label}_model.pickle','rb')
clf = pickle.load(pickle_in)
param_str = ''
for k, v in clf.best_params_.items():
param_str += "{0}_{1}".format(k,v)
y_pred = clf.best_estimator_.predict(X_test)
performance_data.append([f"{model_name}, {param_str[:-1]}, {label}", confusion_matrix(y_test[label], y_pred), classification_report(y_test[label], y_pred)])
def performance_data_show(train_y,predict_y,model_type,model_name,y_name):
print(f"F1 Score (W): {sklearn.metrics.f1_score(y_true=train_y,y_pred= predict_y,average='weighted')}")
print(f"F1 Score (M): {sklearn.metrics.f1_score(y_true=train_y,y_pred= predict_y,average='macro')}")
print(f"Accuracy: {sklearn.metrics.accuracy_score(y_true=train_y,y_pred= predict_y)}")
print(f"Precision (W): {sklearn.metrics.precision_score(y_true=train_y,y_pred= predict_y, average='weighted')}")
confusion_matrix(y_true=train_y,y_pred= predict_y)
print(classification_report(y_true=train_y,y_pred= predict_y, zero_division='warn'))
cmp = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix(y_true=train_y,y_pred= predict_y))
fig, ax = plt.subplots(figsize=(14,14))
cmp.plot(ax=ax,cmap='Blues',colorbar='coolwarm')
plt.savefig(f"plotpic/{model_type}_{model_name}_{y_name}.jpg")
base_mnb = MultinomialNB()
train_model(base_mnb, 'MultinomialNB')
emotion train emotion start: 1666553479.534583 success load pretrainedmodel: trainedmodels/MultinomialNB_emotion_model.pickle emotion train emotion end: 1666553479.5764523 take time:0.041869163513183594 emotion predict emotion end: 1666553479.5963995 take time:0.019947290420532227 sentiment train sentiment start: 1666553480.8261125 success load pretrainedmodel: trainedmodels/MultinomialNB_sentiment_model.pickle sentiment train sentiment end: 1666553480.8301017 take time:0.003989219665527344 sentiment predict sentiment end: 1666553480.837083 take time:0.006981372833251953
model_typeN='trainedmodels'
model_nameN='MultinomialNB'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.3672337721093812
F1 Score (M): 0.2217391876238735
Accuracy: 0.434990105924805
Precision (W): 0.4824964740391358
precision recall f1-score support
admiration 0.55 0.52 0.54 2088
amusement 0.64 0.35 0.45 1256
anger 0.53 0.19 0.28 1057
annoyance 0.41 0.15 0.22 1669
approval 0.42 0.17 0.25 2203
caring 0.51 0.11 0.19 748
confusion 0.54 0.11 0.18 986
curiosity 0.51 0.14 0.23 1170
desire 0.67 0.07 0.12 452
disappointment 0.46 0.07 0.12 937
disapproval 0.46 0.14 0.22 1544
disgust 0.57 0.10 0.17 576
embarrassment 0.62 0.04 0.07 276
excitement 0.55 0.06 0.10 619
fear 0.58 0.05 0.10 352
gratitude 0.77 0.70 0.74 1395
grief 0.00 0.00 0.00 61
joy 0.45 0.15 0.23 882
love 0.66 0.41 0.51 997
nervousness 0.00 0.00 0.00 151
neutral 0.39 0.89 0.54 11078
optimism 0.56 0.19 0.28 855
pride 0.00 0.00 0.00 118
realization 0.48 0.07 0.12 961
relief 0.50 0.01 0.01 149
remorse 0.62 0.09 0.16 304
sadness 0.57 0.12 0.20 776
surprise 0.53 0.13 0.21 704
accuracy 0.43 34364
macro avg 0.48 0.18 0.22 34364
weighted avg 0.48 0.43 0.37 34364
model_typeN='trainedmodels'
model_nameN='MultinomialNB'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.6013691065212221
F1 Score (M): 0.5687329546672967
Accuracy: 0.6081364218368059
Precision (W): 0.6034752846575681
precision recall f1-score support
ambiguous 0.56 0.33 0.41 3821
negative 0.61 0.59 0.60 7703
neutral 0.56 0.56 0.56 11078
positive 0.66 0.75 0.70 11762
accuracy 0.61 34364
macro avg 0.60 0.56 0.57 34364
weighted avg 0.60 0.61 0.60 34364
base_dt = DecisionTreeClassifier()
train_model(base_dt, 'DecisionTreeClassifier')
emotion train emotion start: 1666553490.4644406 success load pretrainedmodel: trainedmodels/DecisionTreeClassifier_emotion_model.pickle emotion train emotion end: 1666553490.5362847 take time:0.07184410095214844 emotion predict emotion end: 1666553490.5821598 take time:0.04587507247924805 sentiment train sentiment start: 1666553492.0941188 success load pretrainedmodel: trainedmodels/DecisionTreeClassifier_sentiment_model.pickle sentiment train sentiment end: 1666553492.1069627 take time:0.012843847274780273 sentiment predict sentiment end: 1666553492.136882 take time:0.02991938591003418
model_typeN='trainedmodels'
model_nameN='DecisionTreeClassifier'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.5900680933288325
F1 Score (M): 0.517346460320759
Accuracy: 0.5935281108136422
Precision (W): 0.6086920863995283
precision recall f1-score support
admiration 0.58 0.82 0.68 2088
amusement 0.58 0.82 0.68 1256
anger 0.47 0.71 0.57 1057
annoyance 0.41 0.59 0.49 1669
approval 0.44 0.56 0.49 2203
caring 0.49 0.58 0.53 748
confusion 0.49 0.57 0.53 986
curiosity 0.55 0.58 0.56 1170
desire 0.52 0.53 0.52 452
disappointment 0.44 0.45 0.44 937
disapproval 0.54 0.49 0.51 1544
disgust 0.56 0.43 0.48 576
embarrassment 0.48 0.36 0.41 276
excitement 0.56 0.46 0.51 619
fear 0.63 0.54 0.58 352
gratitude 0.84 0.81 0.82 1395
grief 0.38 0.31 0.34 61
joy 0.57 0.44 0.50 882
love 0.74 0.72 0.73 997
nervousness 0.54 0.38 0.45 151
neutral 0.70 0.65 0.67 11078
optimism 0.67 0.39 0.49 855
pride 0.55 0.20 0.30 118
realization 0.60 0.23 0.33 961
relief 0.56 0.24 0.34 149
remorse 0.62 0.50 0.55 304
sadness 0.69 0.37 0.48 776
surprise 0.67 0.39 0.49 704
accuracy 0.59 34364
macro avg 0.57 0.50 0.52 34364
weighted avg 0.61 0.59 0.59 34364
model_typeN='trainedmodels'
model_nameN='DecisionTreeClassifier'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.7191144683689241
F1 Score (M): 0.7031051665896968
Accuracy: 0.7167966476545222
Precision (W): 0.728193338899006
precision recall f1-score support
ambiguous 0.56 0.71 0.62 3821
negative 0.68 0.78 0.72 7703
neutral 0.71 0.66 0.68 11078
positive 0.84 0.73 0.78 11762
accuracy 0.72 34364
macro avg 0.69 0.72 0.70 34364
weighted avg 0.73 0.72 0.72 34364
base_mlp = MLPClassifier(max_iter=2)
train_model(base_mlp, 'MLPClassifier')
emotion train emotion start: 1666553502.0642376 success load pretrainedmodel: trainedmodels/MLPClassifier_emotion_model.pickle emotion train emotion end: 1666553502.1481023 take time:0.08386468887329102 emotion predict emotion end: 1666553502.219908 take time:0.07180571556091309 sentiment train sentiment start: 1666553503.5254164 success load pretrainedmodel: trainedmodels/MLPClassifier_sentiment_model.pickle sentiment train sentiment end: 1666553503.582265 take time:0.05684852600097656 sentiment predict sentiment end: 1666553503.6530764 take time:0.07081151008605957
model_typeN='trainedmodels'
model_nameN='MLPClassifier'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.4691171433829688
F1 Score (M): 0.3688237646377684
Accuracy: 0.507391456175067
Precision (W): 0.505999390637638
precision recall f1-score support
admiration 0.61 0.61 0.61 2088
amusement 0.61 0.67 0.64 1256
anger 0.51 0.38 0.44 1057
annoyance 0.40 0.17 0.24 1669
approval 0.45 0.20 0.28 2203
caring 0.46 0.22 0.30 748
confusion 0.51 0.23 0.32 986
curiosity 0.51 0.25 0.34 1170
desire 0.54 0.28 0.37 452
disappointment 0.45 0.13 0.20 937
disapproval 0.42 0.22 0.29 1544
disgust 0.51 0.27 0.36 576
embarrassment 0.53 0.21 0.30 276
excitement 0.56 0.18 0.28 619
fear 0.56 0.44 0.49 352
gratitude 0.83 0.78 0.81 1395
grief 0.00 0.00 0.00 61
joy 0.44 0.36 0.40 882
love 0.64 0.70 0.67 997
nervousness 0.62 0.17 0.27 151
neutral 0.47 0.82 0.60 11078
optimism 0.53 0.38 0.45 855
pride 0.80 0.07 0.12 118
realization 0.48 0.13 0.20 961
relief 0.00 0.00 0.00 149
remorse 0.46 0.61 0.52 304
sadness 0.51 0.32 0.39 776
surprise 0.50 0.40 0.45 704
accuracy 0.51 34364
macro avg 0.50 0.33 0.37 34364
weighted avg 0.51 0.51 0.47 34364
model_typeN='trainedmodels'
model_nameN='MLPClassifier'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.647464919955244
F1 Score (M): 0.6146476243162504
Accuracy: 0.6511465487137702
Precision (W): 0.6563661084104959
precision recall f1-score support
ambiguous 0.60 0.38 0.47 3821
negative 0.70 0.56 0.62 7703
neutral 0.57 0.70 0.62 11078
positive 0.73 0.75 0.74 11762
accuracy 0.65 34364
macro avg 0.65 0.60 0.61 34364
weighted avg 0.66 0.65 0.65 34364
mnb_params = {'alpha': [0.5, 0, 2]}
grid_search(base_mnb, 'MultinomialNB', mnb_params)
success load pretrainedGridSearchCV: trainedGridSearchCV/MultinomialNB_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/MultinomialNB_sentiment_model.pickle
model_typeN='trainedGridSearchCV'
model_nameN='MultinomialNB'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.43766887216639794
F1 Score (M): 0.32146097286145825
Accuracy: 0.4733150971947387
Precision (W): 0.4811466601589672
precision recall f1-score support
admiration 0.53 0.61 0.56 2088
amusement 0.62 0.56 0.58 1256
anger 0.48 0.32 0.38 1057
annoyance 0.38 0.22 0.28 1669
approval 0.41 0.24 0.30 2203
caring 0.39 0.23 0.29 748
confusion 0.45 0.20 0.28 986
curiosity 0.48 0.26 0.34 1170
desire 0.53 0.16 0.24 452
disappointment 0.42 0.16 0.23 937
disapproval 0.42 0.25 0.32 1544
disgust 0.50 0.21 0.30 576
embarrassment 0.59 0.11 0.18 276
excitement 0.55 0.16 0.25 619
fear 0.60 0.18 0.27 352
gratitude 0.72 0.77 0.75 1395
grief 0.50 0.02 0.03 61
joy 0.47 0.26 0.34 882
love 0.65 0.59 0.62 997
nervousness 0.75 0.04 0.08 151
neutral 0.44 0.78 0.56 11078
optimism 0.48 0.34 0.40 855
pride 0.71 0.04 0.08 118
realization 0.45 0.16 0.24 961
relief 0.45 0.03 0.06 149
remorse 0.63 0.24 0.34 304
sadness 0.49 0.26 0.34 776
surprise 0.50 0.27 0.35 704
accuracy 0.47 34364
macro avg 0.52 0.27 0.32 34364
weighted avg 0.48 0.47 0.44 34364
model_typeN='trainedGridSearchCV'
model_nameN='MultinomialNB'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.6094124858573083
F1 Score (M): 0.5837251828989938
Accuracy: 0.6133453614247468
Precision (W): 0.6085930901600597
precision recall f1-score support
ambiguous 0.53 0.41 0.46 3821
negative 0.61 0.61 0.61 7703
neutral 0.57 0.55 0.56 11078
positive 0.67 0.74 0.70 11762
accuracy 0.61 34364
macro avg 0.59 0.58 0.58 34364
weighted avg 0.61 0.61 0.61 34364
dt_params = {'criterion': ['gini', 'entropy'],
'max_depth': [1, 3],
'min_samples_split': [2, 3, 4]}
grid_search(base_dt, 'DecisionTreeClassifier', dt_params)
success load pretrainedGridSearchCV: trainedGridSearchCV/DecisionTreeClassifier_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/DecisionTreeClassifier_sentiment_model.pickle
model_typeN='trainedGridSearchCV'
model_nameN='DecisionTreeClassifier'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.21203938551135124
F1 Score (M): 0.06742948033819159
Accuracy: 0.3648294727039926
Precision (W): 0.16060009666388989
precision recall f1-score support
admiration 0.00 0.00 0.00 2088
amusement 0.00 0.00 0.00 1256
anger 0.00 0.00 0.00 1057
annoyance 0.00 0.00 0.00 1669
approval 0.00 0.00 0.00 2203
caring 0.00 0.00 0.00 748
confusion 0.00 0.00 0.00 986
curiosity 0.00 0.00 0.00 1170
desire 0.00 0.00 0.00 452
disappointment 0.00 0.00 0.00 937
disapproval 0.00 0.00 0.00 1544
disgust 0.00 0.00 0.00 576
embarrassment 0.00 0.00 0.00 276
excitement 0.00 0.00 0.00 619
fear 0.00 0.00 0.00 352
gratitude 0.86 0.73 0.79 1395
grief 0.00 0.00 0.00 61
joy 0.00 0.00 0.00 882
love 0.56 0.64 0.59 997
nervousness 0.00 0.00 0.00 151
neutral 0.34 0.98 0.50 11078
optimism 0.00 0.00 0.00 855
pride 0.00 0.00 0.00 118
realization 0.00 0.00 0.00 961
relief 0.00 0.00 0.00 149
remorse 0.00 0.00 0.00 304
sadness 0.00 0.00 0.00 776
surprise 0.00 0.00 0.00 704
accuracy 0.36 34364
macro avg 0.06 0.08 0.07 34364
weighted avg 0.16 0.36 0.21 34364
model_typeN='trainedGridSearchCV'
model_nameN='DecisionTreeClassifier'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.2596590035440406
F1 Score (M): 0.19699686719010184
Accuracy: 0.37481084856244906
Precision (W): 0.4034366954623935
precision recall f1-score support
ambiguous 0.00 0.00 0.00 3821
negative 0.00 0.00 0.00 7703
neutral 0.34 0.98 0.50 11078
positive 0.86 0.17 0.28 11762
accuracy 0.37 34364
macro avg 0.30 0.29 0.20 34364
weighted avg 0.40 0.37 0.26 34364
mlp_params = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
'hidden_layer_sizes': [(30, 50), (10, 10, 10)],
'solver': ['adam', 'sgd']}
grid_search(base_mlp, 'MLPClassifier', mlp_params)
success load pretrainedGridSearchCV: trainedGridSearchCV/MLPClassifier_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/MLPClassifier_sentiment_model.pickle
model_typeN='trainedGridSearchCV'
model_nameN='MLPClassifier'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.45146001635634886
F1 Score (M): 0.34482748432285776
Accuracy: 0.49793388429752067
Precision (W): 0.5010426950099419
precision recall f1-score support
admiration 0.60 0.60 0.60 2088
amusement 0.58 0.69 0.63 1256
anger 0.49 0.37 0.42 1057
annoyance 0.39 0.17 0.23 1669
approval 0.48 0.15 0.23 2203
caring 0.45 0.22 0.30 748
confusion 0.50 0.15 0.23 986
curiosity 0.55 0.18 0.27 1170
desire 0.50 0.30 0.38 452
disappointment 0.48 0.10 0.17 937
disapproval 0.39 0.21 0.27 1544
disgust 0.47 0.28 0.35 576
embarrassment 0.47 0.22 0.30 276
excitement 0.53 0.17 0.26 619
fear 0.54 0.39 0.45 352
gratitude 0.82 0.79 0.80 1395
grief 0.50 0.02 0.03 61
joy 0.46 0.34 0.39 882
love 0.64 0.71 0.67 997
nervousness 0.65 0.07 0.13 151
neutral 0.46 0.84 0.60 11078
optimism 0.55 0.37 0.44 855
pride 0.67 0.02 0.03 118
realization 0.42 0.11 0.18 961
relief 0.60 0.02 0.04 149
remorse 0.49 0.53 0.51 304
sadness 0.49 0.30 0.37 776
surprise 0.49 0.32 0.39 704
accuracy 0.50 34364
macro avg 0.52 0.31 0.34 34364
weighted avg 0.50 0.50 0.45 34364
model_typeN='trainedGridSearchCV'
model_nameN='MLPClassifier'
y_name='sentiment'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test)
print(f'test for {y_name}:')
performance_data_show(y_test[y_name],y_pred,model_typeN,model_nameN,y_name)
test for sentiment:
F1 Score (W): 0.6252654617101031
F1 Score (M): 0.5882103523819106
Accuracy: 0.6296414852752881
Precision (W): 0.635595398392076
precision recall f1-score support
ambiguous 0.60 0.32 0.42 3821
negative 0.64 0.58 0.61 7703
neutral 0.54 0.67 0.60 11078
positive 0.73 0.72 0.73 11762
accuracy 0.63 34364
macro avg 0.63 0.57 0.59 34364
weighted avg 0.64 0.63 0.63 34364
with open('performance.txt', 'w') as f:
for line in performance_data:
line = list(map(str, line))
f.write(",".join(line))
transformer = TfidfTransformer()
X_tfidf = transformer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, df[['emotion', 'sentiment']], test_size=0.2)
performance_data = []
base_mnb = MultinomialNB()
train_model(base_mnb, 'MultinomialNB')
base_dt = DecisionTreeClassifier()
train_model(base_dt, 'DecisionTreeClassifier')
base_mlp = MLPClassifier(max_iter=2)
train_model(base_mlp, 'MLPClassifier')
grid_search(base_mnb, 'MultinomialNB', mnb_params)
grid_search(base_dt, 'DecisionTreeClassifier', dt_params)
grid_search(base_mlp, 'MLPClassifier', mlp_params)
emotion train emotion start: 1666553545.3841374 success load pretrainedmodel: trainedmodels/MultinomialNB_emotion_model.pickle emotion train emotion end: 1666553545.3901238 take time:0.005986452102661133 emotion predict emotion end: 1666553545.4210458 take time:0.03092193603515625 sentiment train sentiment start: 1666553546.5779464 success load pretrainedmodel: trainedmodels/MultinomialNB_sentiment_model.pickle sentiment train sentiment end: 1666553546.5809383 take time:0.002991914749145508 sentiment predict sentiment end: 1666553546.5869222 take time:0.005983829498291016 emotion train emotion start: 1666553547.723882 success load pretrainedmodel: trainedmodels/DecisionTreeClassifier_emotion_model.pickle emotion train emotion end: 1666553547.739839 take time:0.015957117080688477 emotion predict emotion end: 1666553547.7737484 take time:0.03390932083129883 sentiment train sentiment start: 1666553549.0827417 success load pretrainedmodel: trainedmodels/DecisionTreeClassifier_sentiment_model.pickle sentiment train sentiment end: 1666553549.0897229 take time:0.0069811344146728516 sentiment predict sentiment end: 1666553549.1176484 take time:0.027925491333007812 emotion train emotion start: 1666553550.5687678 success load pretrainedmodel: trainedmodels/MLPClassifier_emotion_model.pickle emotion train emotion end: 1666553550.5927038 take time:0.023936033248901367 emotion predict emotion end: 1666553550.6555333 take time:0.06282949447631836 sentiment train sentiment start: 1666553551.8303933 success load pretrainedmodel: trainedmodels/MLPClassifier_sentiment_model.pickle sentiment train sentiment end: 1666553551.86147 take time:0.031076669692993164 sentiment predict sentiment end: 1666553551.9332783 take time:0.0718083381652832 success load pretrainedGridSearchCV: trainedGridSearchCV/MultinomialNB_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/MultinomialNB_sentiment_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/DecisionTreeClassifier_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/DecisionTreeClassifier_sentiment_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/MLPClassifier_emotion_model.pickle success load pretrainedGridSearchCV: trainedGridSearchCV/MLPClassifier_sentiment_model.pickle
pd.DataFrame(performance_data, columns=['Model', 'confusion matrix', 'classfication report'])
| Model | confusion matrix | classfication report | |
|---|---|---|---|
| 0 | MultinomialNB, emotion | [[1100, 13, 1, 5, 33, 4, 0, 6, 1, 2, 2, 1, 0, ... | precision recall f1-score ... |
| 1 | MultinomialNB, sentiment | [[1215, 550, 1181, 807], [276, 4616, 1779, 111... | precision recall f1-score ... |
| 2 | DecisionTreeClassifier, emotion | [[1767, 13, 4, 13, 71, 8, 1, 5, 3, 3, 3, 2, 0,... | precision recall f1-score ... |
| 3 | DecisionTreeClassifier, sentiment | [[2672, 309, 489, 283], [573, 6036, 777, 401],... | precision recall f1-score ... |
| 4 | MLPClassifier, emotion | [[1298, 25, 2, 10, 38, 8, 5, 11, 5, 2, 6, 1, 1... | precision recall f1-score ... |
| 5 | MLPClassifier, sentiment | [[1437, 341, 1454, 521], [304, 4425, 2215, 843... | precision recall f1-score ... |
| 6 | MultinomialNB, alpha_0., emotion | [[1292, 20, 3, 14, 43, 12, 3, 10, 3, 3, 4, 1, ... | precision recall f1-score ... |
| 7 | MultinomialNB, alpha_0., sentiment | [[1481, 524, 1016, 732], [385, 4742, 1618, 104... | precision recall f1-score ... |
| 8 | DecisionTreeClassifier, criterion_ginimax_dept... | [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | precision recall f1-score ... |
| 9 | DecisionTreeClassifier, criterion_ginimax_dept... | [[0, 0, 3701, 52], [0, 0, 7693, 94], [0, 0, 10... | precision recall f1-score ... |
| 10 | MLPClassifier, activation_identityhidden_layer... | [[1283, 23, 3, 12, 22, 9, 1, 7, 9, 0, 5, 1, 1,... | precision recall f1-score ... |
| 11 | MLPClassifier, activation_tanhhidden_layer_siz... | [[1204, 449, 1595, 505], [214, 4491, 2293, 789... | precision recall f1-score ... |
wordvec2model=load('word2vec-google-news-300')
wordvec2model.similar_by_vector('car')
[('vehicle', 0.7821096181869507),
('cars', 0.7423830032348633),
('SUV', 0.7160962224006653),
('minivan', 0.6907036304473877),
('truck', 0.6735789775848389),
('Car', 0.6677608489990234),
('Ford_Focus', 0.6673202514648438),
('Honda_Civic', 0.6626849174499512),
('Jeep', 0.651133120059967),
('pickup_truck', 0.6441437602043152)]
file = pd.read_json("goemotions.json")
file.columns=['post', 'emotion', 'sentiment']
words=file['post']
tokenizer=[word_tokenize(x) for x in words]
list_tokens=[]
list_tokens=[k for k in tokenizer for i in k]
list_len=len(list_tokens)
print(f'There are {list_len} Reddit post tokens in goemotions.json')
There are 2642128 Reddit post tokens in goemotions.json
emotion= file['emotion'].value_counts()
print(emotion)
neutral 55298 approval 11259 admiration 10531 annoyance 8342 disapproval 7686 gratitude 7075 amusement 6130 curiosity 5885 anger 5202 love 4957 confusion 4938 realization 4714 disappointment 4706 optimism 4519 joy 4329 sadness 3827 caring 3523 surprise 3472 excitement 3020 disgust 2914 desire 2147 fear 1778 remorse 1510 embarrassment 1433 nervousness 796 relief 788 pride 690 grief 351 Name: emotion, dtype: int64
sentiment = file['sentiment'].value_counts()
print(sentiment)
positive 58968 neutral 55298 negative 38545 ambiguous 19009 Name: sentiment, dtype: int64
plt.xlabel('Emotion')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.bar(emotion.keys(),emotion)
plt.show()
e=[]
s=[]
for key,value in emotion.items():
e.append(key)
s.append(value)
plt.pie(s,labels=e,rotatelabels=True,autopct='%0.1f%%',labeldistance=1.1,radius=1.7)
plt.show()
plt.xlabel('Sentiment')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.bar(sentiment.keys(),sentiment)
plt.show()
e=[]
s=[]
for key,value in sentiment.items():
e.append(key)
s.append(value)
plt.pie(s,labels=e,rotatelabels=True,autopct='%0.1f%%',labeldistance=1.1,radius=1.7)
plt.show()
redditpositone=list_tokens[1]
print(redditpositone)
['That', 'game', 'hurt', '.']
vecter_list=[]
word_filtered_list=[]
vecter_list=[wordvec2model[word] for word in redditpositone if word in wordvec2model.index_to_key]
word_filtered_list=[word for word in redditpositone if word in wordvec2model.index_to_key]
df=pd.DataFrame.from_dict(dict(zip(word_filtered_list,vecter_list)),orient='index')
json.dump(df.to_dict(), open("filteredlist_one_reddit_post.json", 'w'))
gj=pd.read_json("filteredlist_one_reddit_post.json")
dict_tokens=gj.to_dict()
gj.head(3)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| That | -0.138672 | 0.125977 | 0.326172 | 0.099609 | -0.237305 | 0.014832 | 0.049805 | -0.091797 | 0.205078 | 0.296875 | ... | -0.150391 | 0.013367 | -0.242188 | 0.043213 | -0.129883 | -0.320312 | 0.126953 | 0.049561 | 0.253906 | -0.181641 |
| game | 0.114746 | 0.052734 | 0.042969 | 0.004486 | 0.273438 | 0.071777 | 0.173828 | -0.198242 | 0.020752 | 0.118652 | ... | -0.212891 | 0.043213 | -0.139648 | -0.011597 | 0.228516 | 0.170898 | -0.010437 | -0.094238 | -0.172852 | -0.052734 |
| hurt | -0.003891 | 0.092773 | -0.009277 | 0.061768 | -0.100586 | 0.239258 | 0.104004 | 0.013977 | 0.183594 | 0.263672 | ... | 0.196289 | 0.154297 | -0.160156 | -0.081543 | -0.215820 | 0.078613 | -0.363281 | -0.065918 | 0.119141 | -0.165039 |
3 rows × 300 columns
average=[]
for x in range(0,wordvec2model.vector_size-1):
k=0
for j in range(0,len(dict_tokens[0])-1):
k+=gj[x][j]
average.append(k/len(dict_tokens[0]))
df=pd.DataFrame(average,columns=['average'])
df.T.head()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| average | -0.007975 | 0.05957 | 0.123047 | 0.034698 | 0.012044 | 0.02887 | 0.074544 | -0.09668 | 0.075277 | 0.138509 | ... | 0.018738 | -0.121094 | 0.01886 | -0.127279 | 0.010539 | 0.032878 | -0.049805 | 0.038839 | -0.014893 | 0.027018 |
1 rows × 299 columns
X_train, X_test, y_train, y_test = train_test_split(tokenizer,file[['emotion', 'sentiment']], test_size=0.2)
def filter_by_word2vec(list_list,list_name):
global y_train
dumy=[]
for i in range(0,300):
dumy.append(0)
df=pd.DataFrame()
print(len(list_list))
vocab=wordvec2model.index_to_key
print(len(vocab))
count=0
vecter_list1=[]
word_filtered_list1=[]
if(os.path.exists(f'filteredlist_reddit_post_{list_name}.json')):
print('openfile,filtered list')
else:
for words in list_list:
linetext=[]
count+=1
print("search embeddings, till complete {:0.1%}".format(count/len(list_list)),end='\r')
for word in words:
if word in vocab:
linetext.append(word)
word_filtered_list1.append(linetext)
for words in word_filtered_list1:
linevec=[]
for word in words:
linevec.append(wordvec2model[word])
vecter_list1.append(np.mean(linevec,axis=0))
for i in range(len(vecter_list1)-1):
if vecter_list1[i].shape == (300,):
continue
else:
print(f'findimpuretyat:{i}')
vecter_list1[i]=dumy
df=pd.DataFrame(vecter_list1)
json.dump(df.to_dict(), open(f"filteredlist_reddit_post_{list_name}.json", 'w'))
df.head(5)
print(y_train.shape)
selectlist2=X_test
filter_by_word2vec(selectlist2,"X_test")
(137456, 2) 34364 3000000 openfile,filtered list
selectlist=X_train
filter_by_word2vec(selectlist,"X_train")
137456 3000000 openfile,filtered list
df_train=pd.read_json(f"filteredlist_reddit_post_X_train.json")
X_train_filtered=df_train.values.tolist()
df_test=pd.read_json(f"filteredlist_reddit_post_X_test.json")
X_test_filtered=df_test.values.tolist()
df_train.head(167)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.074473 | -0.028224 | 0.074883 | 0.114950 | 0.018596 | -0.039480 | 0.035522 | -0.086110 | 0.110636 | 0.191630 | ... | -0.043921 | -0.021790 | -0.115000 | 0.048299 | -0.070190 | -0.025116 | 0.087463 | -0.048940 | 0.058675 | -0.078451 |
| 1 | 0.014440 | 0.015371 | 0.017170 | 0.134499 | -0.005157 | -0.044666 | 0.007108 | -0.002695 | -0.012939 | 0.071800 | ... | -0.103782 | 0.060630 | -0.107522 | 0.072731 | -0.002595 | -0.015055 | -0.031788 | -0.057085 | 0.021905 | 0.015514 |
| 2 | 0.036253 | 0.034479 | 0.020712 | 0.057836 | -0.040987 | 0.005086 | 0.062035 | -0.061710 | 0.071849 | 0.073953 | ... | -0.062227 | 0.044512 | -0.101859 | 0.026827 | -0.067433 | -0.072546 | 0.017973 | -0.067028 | 0.083396 | -0.000659 |
| 3 | 0.081573 | 0.032275 | 0.011780 | 0.012614 | 0.049988 | -0.062421 | 0.078786 | -0.055603 | 0.042247 | 0.087911 | ... | 0.018936 | 0.004873 | -0.199448 | 0.019526 | -0.107086 | -0.065796 | 0.001322 | -0.048716 | 0.020060 | -0.004415 |
| 4 | -0.058746 | 0.003027 | 0.079639 | -0.064929 | -0.094131 | 0.052441 | 0.093359 | -0.186035 | 0.204395 | -0.004199 | ... | -0.107324 | 0.028833 | -0.044519 | 0.004053 | 0.019580 | -0.069885 | 0.054541 | -0.124170 | 0.070972 | -0.101758 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 162 | 0.045611 | 0.086123 | 0.068787 | 0.121012 | -0.072141 | -0.009472 | -0.039368 | -0.064314 | 0.066703 | 0.074498 | ... | -0.036669 | 0.041453 | -0.127465 | -0.012442 | -0.044080 | -0.080991 | -0.006252 | -0.078102 | 0.015639 | -0.010667 |
| 163 | 0.125570 | -0.040360 | -0.012533 | 0.116130 | -0.122945 | -0.013774 | 0.140462 | -0.100265 | 0.084005 | -0.003825 | ... | -0.004232 | 0.030355 | -0.016154 | 0.125142 | -0.043355 | 0.055420 | 0.069661 | -0.029541 | 0.043416 | -0.053650 |
| 164 | 0.053113 | 0.023563 | 0.040924 | 0.080277 | -0.077756 | -0.024334 | 0.038854 | -0.098334 | 0.040500 | 0.036540 | ... | -0.067505 | 0.057959 | -0.112462 | 0.040753 | -0.100141 | -0.025770 | 0.013820 | -0.025087 | 0.057831 | -0.034348 |
| 165 | 0.093315 | 0.053682 | 0.089752 | 0.135818 | -0.005569 | -0.055214 | 0.048954 | -0.094727 | 0.062904 | 0.149918 | ... | -0.012184 | 0.063044 | -0.187439 | -0.005104 | -0.109085 | -0.053651 | -0.016838 | -0.065147 | -0.007571 | -0.014923 |
| 166 | 0.084778 | 0.137657 | -0.057995 | 0.040710 | -0.000488 | 0.047470 | 0.025940 | -0.054153 | 0.067688 | 0.074707 | ... | -0.006556 | 0.065735 | -0.093567 | 0.069763 | -0.147339 | -0.109131 | -0.077515 | 0.055801 | 0.038696 | 0.008057 |
167 rows × 300 columns
count_total=0
for words1 in X_train:
for word1 in words1:
count_total+=1
print()
print(count_total)
2114574
print('hit rate of X_train is: {:0.1%}'.format(len(vecter_list)/count_total))
hit rate of X_train is: 0.0%
dumy=[]
for i in range(0,300):
dumy.append(0)
dumy1=[]
for i in range(0,300):
dumy1.append(1)
for j in range(len(X_train_filtered)-1):
if X_train_filtered[j] ==dumy:
print('find dumy')
X_train_filtered[j]=dumy1
find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy find dumy
count_tota2=0
for words2 in X_test:
for word2 in words2:
count_tota2+=1
print()
print(count_tota2)
print('hit rate of X_test is: {:0.1%}'.format(len(vecter_list)/count_tota2))
527554 hit rate of X_test is: 0.0%
performance_data = []
def train_model_vec2(model, model_name,label,X_train_in,Y_train_in):
start=time.time()
print(f'{label} train {label} start: {start}')
if (os.path.exists(f'Googlemodel/{model_name}_{label}_model.pickle')):
picklefile = open(f'Googlemodel/{model_name}_{label}_model.pickle','rb')
model = pickle.load(picklefile)
print(f"success load pretrainedmodel: trainedmodels/{model_name}_{label}_model.pickle")
else:
model = model.fit(X_train_in, Y_train_in)
with open(f'Googlemodel/{model_name}_{label}_model.pickle','wb') as f:
pickle.dump(model, f)
pickle_in = open(f'Googlemodel/{model_name}_{label}_model.pickle','rb')
model = pickle.load(pickle_in)
end=time.time()
print(f'{label} train {label} end: {end} take time:{end-start}')
def performance_data_show2(train_y,predict_y,model_type,model_name,y_name):
print(f"F1 Score (W): {sklearn.metrics.f1_score(y_true=train_y,y_pred= predict_y,average='weighted')}")
print(f"F1 Score (M): {sklearn.metrics.f1_score(y_true=train_y,y_pred= predict_y,average='macro')}")
print(f"Accuracy: {sklearn.metrics.accuracy_score(y_true=train_y,y_pred= predict_y)}")
print(f"Precision (W): {sklearn.metrics.precision_score(y_true=train_y,y_pred= predict_y, average='weighted')}")
confusion_matrix(y_true=train_y,y_pred= predict_y)
print(classification_report(y_true=train_y,y_pred= predict_y, zero_division='warn'))
cmp = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix(y_true=train_y,y_pred= predict_y))
fig, ax = plt.subplots(figsize=(14,14))
cmp.plot(ax=ax,cmap='Blues_r',colorbar='coolwarm')
plt.savefig(f"plotpic/{model_type}_{model_name}_{y_name}.jpg")
classifier=MLPClassifier(max_iter=20)
predict_emotion=train_model_vec2(classifier,"mplclassifier","emotion",X_train_filtered,y_train['emotion'])
emotion train emotion start: 1666553677.619993 success load pretrainedmodel: trainedmodels/mplclassifier_emotion_model.pickle emotion train emotion end: 1666553677.7011826 take time:0.08118963241577148
print(y_train['emotion'])
142943 sadness
57609 neutral
156428 neutral
99056 anger
34720 neutral
...
102914 neutral
76251 excitement
71509 neutral
84171 neutral
115158 caring
Name: emotion, Length: 137456, dtype: object
model_typeN='Googlemodel'
model_nameN='mplclassifier'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test_filtered)
print(f'test for {y_name}:')
performance_data_show2(y_test['emotion'],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.1584868141973371
F1 Score (M): 0.017956101890741316
Accuracy: 0.3214992433942498
Precision (W): 0.1275025721043025
precision recall f1-score support
admiration 0.03 0.00 0.00 2099
amusement 0.12 0.00 0.00 1258
anger 0.00 0.00 0.00 1074
annoyance 0.04 0.00 0.00 1633
approval 0.14 0.00 0.01 2276
caring 0.00 0.00 0.00 670
confusion 0.00 0.00 0.00 1012
curiosity 0.00 0.00 0.00 1181
desire 0.00 0.00 0.00 434
disappointment 0.00 0.00 0.00 955
disapproval 0.08 0.00 0.00 1514
disgust 0.00 0.00 0.00 570
embarrassment 0.00 0.00 0.00 274
excitement 0.00 0.00 0.00 604
fear 0.00 0.00 0.00 341
gratitude 0.06 0.00 0.00 1381
grief 0.00 0.00 0.00 73
joy 0.00 0.00 0.00 866
love 0.00 0.00 0.00 975
nervousness 0.00 0.00 0.00 163
neutral 0.32 0.99 0.49 11105
optimism 0.00 0.00 0.00 894
pride 0.00 0.00 0.00 142
realization 0.00 0.00 0.00 942
relief 0.00 0.00 0.00 155
remorse 0.00 0.00 0.00 294
sadness 0.00 0.00 0.00 774
surprise 0.00 0.00 0.00 705
accuracy 0.32 34364
macro avg 0.03 0.04 0.02 34364
weighted avg 0.13 0.32 0.16 34364
classifier=MLPClassifier(activation= 'relu',solver= 'adam',max_iter=10)
predict_emotion=train_model_vec2(classifier,"mplclassifier2","emotion",X_train_filtered,y_train['emotion'])
emotion train emotion start: 1666553683.5047228 success load pretrainedmodel: trainedmodels/mplclassifier2_emotion_model.pickle emotion train emotion end: 1666553683.543355 take time:0.03863215446472168
model_typeN='Googlemodel'
model_nameN='mplclassifier2'
y_name='emotion'
pickle_in = open(f'{model_typeN}/{model_nameN}_{y_name}_model.pickle','rb')
model = pickle.load(pickle_in)
y_pred = model.predict(X_test_filtered)
print(f'test for {y_name}:')
performance_data_show2(y_test['emotion'],y_pred,model_typeN,model_nameN,y_name)
test for emotion:
F1 Score (W): 0.15814862191377188
F1 Score (M): 0.017747242434440282
Accuracy: 0.32216854848096843
Precision (W): 0.13504128310199612
precision recall f1-score support
admiration 0.08 0.00 0.00 2099
amusement 0.00 0.00 0.00 1258
anger 0.17 0.00 0.00 1074
annoyance 0.00 0.00 0.00 1633
approval 0.09 0.00 0.00 2276
caring 0.00 0.00 0.00 670
confusion 0.00 0.00 0.00 1012
curiosity 0.00 0.00 0.00 1181
desire 0.00 0.00 0.00 434
disappointment 0.00 0.00 0.00 955
disapproval 0.33 0.00 0.00 1514
disgust 0.00 0.00 0.00 570
embarrassment 0.00 0.00 0.00 274
excitement 0.00 0.00 0.00 604
fear 0.00 0.00 0.00 341
gratitude 0.00 0.00 0.00 1381
grief 0.00 0.00 0.00 73
joy 0.00 0.00 0.00 866
love 0.00 0.00 0.00 975
nervousness 0.00 0.00 0.00 163
neutral 0.32 1.00 0.49 11105
optimism 0.00 0.00 0.00 894
pride 0.00 0.00 0.00 142
realization 0.00 0.00 0.00 942
relief 0.00 0.00 0.00 155
remorse 0.00 0.00 0.00 294
sadness 0.00 0.00 0.00 774
surprise 0.00 0.00 0.00 705
accuracy 0.32 34364
macro avg 0.04 0.04 0.02 34364
weighted avg 0.14 0.32 0.16 34364