import pandas as pd
import numpy as np
df = pd.read_csv("./email1.csv")
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
X = df['emails']
tfids = TfidfVectorizer(max_features=10000,ngram_range=(1,2))
X = tfids.fit_transform(X)
y = df['sentiment']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)
clf = LinearSVC()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test,y_pred))
x = """The patient presents to clinic for initial evaluation of some pain and swelling to the left foot. On the day of injury a metal bar fell directly on the top of his left foot striking above the steel toed area of his boot. He had some pain, swelling and bruising early on but this has gotten better. However, the patient continues to have some soreness on the ball of his foot and points to the seconcbdista: metatarsal region. He states that it feels a little like a stone bruise. He has continued hs regglar duties without problems and is able to wear a regular shoe."""
vec = tfids.transform([x])
a = clf.predict(vec)
if a==0:
print("Negative communication")
else:
print("Positive communication")
Is there any method to identify the sentiment for each sentence. For example: "He had some pain, swelling and bruising early on but this has gotten better", which is a positive sentence.
CodePudding user response:
If I understand correctly all you need is to split the paragraph at dots, for example:
for sentence in x.split("."):
vec = tfids.transform([sentence])
a = clf.predict(vec)
print(f'Sentiment analysis for "{sentence}":')
if a==0:
print("Negative communication")
else:
print("Positive communication")