-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
96 lines (68 loc) · 2.6 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from urlextract import URLExtract
from wordcloud import WordCloud
import pandas as pd
from collections import Counter
import emoji
extractor = URLExtract()
def fetch_stats(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
# 1. fetch no. of message
num_messages = df.shape[0]
# 2. words count
words = []
for message in df['message']:
words.extend(message.split())
# 3. fetch no. of media messages
num_media_msg = df[df['message'] == '<Media omitted>\n'].shape[0]
# 4. links extract
links = []
for message in df['message']:
links.extend(extractor.find_urls(message))
return num_messages, len(words), num_media_msg, len(links)
# finding the most busiest user
def most_busy_user(df):
x = df['user'].value_counts().head()
df = round((df['user'].value_counts() / df.shape[0])*100,
2).reset_index().rename(columns={'index': 'name', 'user': 'percent'})
return x, df
def create_wordcloud(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
wc = WordCloud(width=500, height=500, min_font_size=10,
background_color='white')
df_wc = wc.generate(df['message'].str.cat(sep=" "))
return df_wc
def emoji_helper(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
emojis = []
for message in df['message']:
emojis.extend(
[c for c in message if c in emoji.EMOJI_DATA])
emoji_df = pd.DataFrame(Counter(emojis).most_common(len(Counter(emojis))))
return emoji_df
def monthly_timeline(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
timeline = df.groupby(['year', 'month_num', 'month']).count()[
'message'].reset_index()
time = []
for i in range(timeline.shape[0]):
time.append(timeline['month'][i]+"-" + str(timeline['year'][i]))
timeline['time'] = time
return timeline
def week_activity_map(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
return df['day_name'].value_counts()
def month_activity_map(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
return df['month'].value_counts()
def activity_heatmap(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
user_heatmap = df.pivot_table(
index='day_name', columns='period', values='message', aggfunc='count').fillna(0)
return user_heatmap