I'm trying to sort with sklearn
, but I'm getting an error:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
X = df['texto'].values #texto que é a base para classificação
Y = df['sentimento'].values #sentimento é o que será treinado. Obs, a coluna setimento já está preenchida com o devido sentimento para cada texto (seguro, inseguro ou nêutro)
split_test_size = 0.30 #30% para teste e 70% para treino
#dividindo o modelo
X_treino, X_teste, Y_treino, Y_teste = train_test_split(X, Y, test_size = split_test_size, random_state = 42)
modelo_v1 = GaussianNB()
#treinando o modelo
modelo_v1.fit(X_treino, Y_treino.ravel())
Returns the error:
Traceback (most recent call last): File "C: \ Users \ USER \ workspacePython \ tests \ sampleTwitter2.py", " line 280, in main () File "C: \ Users \ USER \ workspacePython \ tests \ sampleTwitter2.py", " line 65, in main classify3 (df, "I'm afraid of violence") File "C: \ Users \ USER \ workspacePython \ tests \ sampleTwitter2.py", " line 277, in sort3 template_v1.fit (X_treino, Y_treino.ravel ()) File "C: \ ProgramData \ Anaconda3 \ lib \ site-packages \ sklearn \ naive_bayes.py", line 182, in fit X, y = check_X_y (X, y) File "C: \ ProgramData \ Anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py", line 521, in check_X_y ensure_min_features, warn_on_dtype, estimator) File "C: \ ProgramData \ Anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py", line 382, in check_array array = np.array (array, dtype = dtype, order = order, copy = copy) ValueError: Could not convert string to float: 'I just feel at ease in a quiet place'
Does not work with string? Or would I have to get the frequency number of the words?