There are many areas, how sentiment analytics could bring better business performance. Python offers various approaches to sentiment and polarity. We can examine webpages, stocks, libraries, books or twitter feed and see e.g. how positive, negative or neutral were texts about UBS. Lets try to understand e.g. the lastest tweets about UBS, they speak a lot about fintech, future, challenge, sharing and innovation as well. 72% Tweets about UBS are positive and only 1% is negative.
1.
# install and import
pip install tweepy
import tweepy
pip install TextBlob
from textblob import TextBlob
pip install wordcloud
from wordcloud import WordCloud
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import csv
import datetime
2.
# log in to developers account
consumer_key = "xxx"
consumer_secret = "xxx"
access_token = "xxx"
access_token_secret = "xxx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
3.
# download and clean text
posts=api.user_timeline(screen_name="UBS", lang="en", count=100, tweet_mode="extended")
for tweet in posts[0:5]:
print(tweet.full_text + '\n')
print("Show results \n")
i=1
for tweet in posts[0:5]:
print(str(i)+ ') '+ tweet.full_text + '\n')
i=i+1
df= pd.DataFrame([tweet.full_text for tweet in posts], columns=['Tweets'])
df.head()
def cleanTxt(text):
text=re.sub(r'@[A-Za-z0-9]+','',text)
text=re.sub(r'#','',text)
text=re.sub(r'https','',text)
return text
df['Tweets']=df['Tweets'].apply(cleanTxt)
df
4.
# calculate score
def getSubjectivity(text):
return TextBlob(text).sentiment.subjectivity
def getPolarity(text):
return TextBlob(text).sentiment.polarity
df['Subjectivity']=df['Tweets'].apply(getSubjectivity)
df['Polarity']=df['Tweets'].apply(getPolarity)
df
5.
# print wordcloud
allWords= ' '.join([twts for twts in df['Tweets']])
wordCloud= WordCloud(width=500, height=300,random_state=21,max_font_size=110,background_color="white", colormap="binary").generate(allWords)
plt.imshow(wordCloud,interpolation="bilinear" )
plt.axis('off')
plt.show()
6.
def getAnalysis(score):
if score < 0:
return 'Negative'
elif score == 0:
return 'Neutral'
else:
return 'Positive'
df['Analysis']=df['Polarity'].apply(getAnalysis)
df
7.
# Printing positive tweets
print('Printing positive tweets:\n')
j=1
sortedDF = df.sort_values(by=['Polarity']) #Sort the tweets
for i in range(0, sortedDF.shape[0] ):
if( sortedDF['Analysis'][i] == 'Positive'):
print(str(j) + ') '+ sortedDF['Tweets'][i])
print()
j= j+1
8.
# Printing negative tweets
print('Printing negative tweets:\n')
j=1
sortedDF = df.sort_values(by=['Polarity'],ascending=False) #Sort the tweets
for i in range(0, sortedDF.shape[0] ):
if( sortedDF['Analysis'][i] == 'Negative'):
print(str(j) + ') '+sortedDF['Tweets'][i])
print()
j=j+1
9.
# Plotting sentiment
plt.figure(figsize=(8,6))
for i in range(0, df.shape[0]):
plt.scatter(df["Polarity"][i], df["Subjectivity"][i], color='Blue')
# plt.scatter(x,y,color)
plt.title('Sentiment Analysis')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
plt.show()
10.
# Print the percentage of positive tweets
ptweets = df[df.Analysis == 'Positive']
ptweets = ptweets['Tweets']
ptweets
round( (ptweets.shape[0] / df.shape[0]) * 100 , 1)
11.
# Print the percentage of negative tweets
ntweets = df[df.Analysis == 'Negative']
ntweets = ntweets['Tweets']
ntweets
round( (ntweets.shape[0] / df.shape[0]) * 100, 1)
12.
# Show the value counts
df['Analysis'].value_counts()
13.
# Plotting and visualizing the counts
plt.title('Sentiment Analysis')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
df['Analysis'].value_counts().plot(kind = 'bar')
plt.show()
References:
Comments