I'm trying to sort, but it's giving the error. Follow the code, then the error.
#coding=UTF-8
import io
import os
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import tree
pasta="c:/users/usuario/dados"
os.chdir(pasta)
def main():
df=pd.read_csv('twits_classificados.csv', encoding='ISO-8859-1', sep=";")
exemploArvore(df)
def exemploArvore(df):
textos=df['texto'].values
sentimento=df['sentimento'].values
#a linha abaixo traz a frequencia de palavras
vectorizer = CountVectorizer(analyzer="word")
freqWords = vectorizer.fit_transform(textos)
modelo = tree.DecisionTreeClassifier()
modelo.fit(freqWords, sentimento)
texto=["estou com medo da violência"]
fwTexto=vectorizer.fit_transform(texto)
print(modelo.predict(fwTexto))
main()
The error:
Traceback (most recent call last): File "C:\Users\USUARIO\workspacePython\testes\arvore.py", line 39, in <module> main() File "C:\Users\USUARIO\workspacePython\testes\arvore.py", line 14, in main exemploArvore(df) File "C:\Users\USUARIO\workspacePython\testes\arvore.py", line 36, in exemploArvore print(modelo.predict(fwTexto)) File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\tree.py", line 404, in predict X = self._validate_X_predict(X, check_input) File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\tree.py", line 376, in _validate_X_predict % (self.n_features_, n_features)) ValueError: Number of features of the model must match the input. Model n_features is 2765 and input n_features is 5