-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemployee_review_analysis_gsom_mapping_discourse_analysis_dimensions.py
91 lines (61 loc) · 2.19 KB
/
employee_review_analysis_gsom_mapping_discourse_analysis_dimensions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""Employee Review Analysis - GSOM Mapping - Discourse Analysis Dimensions
***Install and import packages***
"""
!pip install pygsom
import pandas as pd
import gsom
"""***Data loading***"""
df = pd.read_excel('File-Name.xlsx')
df.head()
"""***Feature selection***"""
data_training = df.iloc[:, 1:16]
data_training.columns
"""***Visualizing variable distributions***"""
import warnings
# ignore warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
# Visualize the distribution of each variable.
plt.figure(figsize=(15,30))
for i, j in enumerate(data_training.describe().columns):
plt.subplot(12,5, i+1)
sns.distplot(x=data_training[j])
plt.xlabel(j)
plt.title('{} Distribution'.format(j))
# plt.subplots_adjust(wspace=.2, hspace=.5)
plt.tight_layout()
plt.show()
"""***Training GSOM***
***Change feature count***
"""
gsom_map = gsom.GSOM(0.83, 15, max_radius=4)
gsom_map.fit(data_training.to_numpy(), 100, 50)
"""***Predict using GSOM***"""
df_all=pd.DataFrame(data_training.to_numpy(), columns=data_training.columns)
df_all["uid"]=df["ID"].values
df_all["data-title"]=df["ID"].values
"""***Visualization***"""
map_points = gsom_map.predict(df_all,"uid","data-title")
gsom.plot(map_points, "data-title", gsom_map=gsom_map, figure_label='IB_Analysis', file_name='IB_Analysis',show_index=False,cmap_colors="Paired")
map_points.to_csv("GSOM_IB.csv", index=False)
"""***Mapping GSOM points to IDs***"""
def get_mappoint_id_mapping(map_points):
output_header = ['uid','x','y', 'output', 'hit_count']
output_data = []
for row in map_points.itertuples():
#output_header.append(row.output)
id_array = row.uid
for idx, x in enumerate(id_array):
output_data.append([x, row.x, row.y, row.output, row.hit_count])
df_out = pd.DataFrame(output_data, columns=output_header)
return df_out
"""***Viewing predicted map points***"""
map_points.head()
"""***Saving to CSV file***"""
output_df = get_mappoint_id_mapping(map_points)
output_df.to_csv('mappoint_id_mapping.csv', index=False)
"""***Get GSOM map weights***"""
df_weights = pd.DataFrame(gsom_map.node_list)
df_weights.head()