#Analysis Report
- 81% of players are male which also make the most of the percentage of the revenue.
- The game is most popular among the age group of 15 years to 25 years and after that age group there is drastic decline in player's interest.
- The most popular items are below the average price of most of the items.
- However the most popular items are not the most profitable items. The most profitable items are the item number 34 and 115 which are more pricy.
import pandas as pd
from pprint import pprint
data = pd.read_json('purchase_data.json')
# View data
# print(data.head())
# Number of players
unique_players = len(data["SN"].unique())
pd.DataFrame({"Total Players": [unique_players]})
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Total Players | |
---|---|
0 | 573 |
temp_df = pd.DataFrame()
# * Number of Unique Items
unique_items = len(data['Item ID'].unique())
temp_df["Number of Unique Items"] = [unique_items]
# * Average Purchase Price
avg_price = round(data["Price"].mean(), 2)
temp_df["Average Purchase Price"] = avg_price
# * Total Number of Purchases
total_purchases = len(data)
temp_df["Total Purchases"] = total_purchases
# * Total Revenue
total_revenue = round(data["Price"].sum(), 2)
temp_df["Total Revenue"] = total_revenue
temp_df
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Number of Unique Items | Average Purchase Price | Total Purchases | Total Revenue | |
---|---|---|---|---|
0 | 183 | 2.93 | 780 | 2286.33 |
# Removing duplicate players from the data
data_without_duplicate_players = data.drop_duplicates(subset=["SN"])
# Seperating the no duplicate data into gender groups
gender_groups = data_without_duplicate_players["Gender"].value_counts()
# Printing Percentage of palyers and total count according to their gender
x = pd.DataFrame({"Total Count":gender_groups,
"Percentage of Players": round(gender_groups/unique_players * 100, 2)})
x
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Percentage of Players | Total Count | |
---|---|---|
Male | 81.15 | 465 |
Female | 17.45 | 100 |
Other / Non-Disclosed | 1.40 | 8 |
# Number of purchases by each gender
grouped_data = data.groupby(["Gender"])
purchases = pd.DataFrame()
for gender, info in grouped_data:
price_column = info["Price"]
purchase = pd.DataFrame({
"Total Purchases" : [len(info)],
"Average Purchase Price" : [round(price_column.mean(), 2)],
"Total Purchase Value" : [price_column.sum()]
}, index=[gender])
purchases = purchases.append(purchase)
# TODO : Figure out how to calculate Normalized Totals
purchases
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Average Purchase Price | Total Purchase Value | Total Purchases | |
---|---|---|---|
Female | 2.82 | 382.91 | 136 |
Male | 2.95 | 1867.68 | 633 |
Other / Non-Disclosed | 3.25 | 35.74 | 11 |
# TODO: Need to Redo - Incorrect
# Number of purchases by each gender
grouped_data = data.groupby(["Age"])
purchases = pd.DataFrame()
for age, info in grouped_data:
price_column = info["Price"]
purchase = pd.DataFrame({
"Total Purchases" : [len(info)],
"Average Purchase Price" : [round(price_column.mean(), 2)],
"Total Purchase Value" : [price_column.sum()]
}, index=[age])
purchases = purchases.append(purchase)
# TODO : Figure out how to calculate Normalized Totals
purchases
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Average Purchase Price | Total Purchase Value | Total Purchases | |
---|---|---|---|
7 | 2.92 | 55.47 | 19 |
8 | 1.96 | 5.87 | 3 |
9 | 3.69 | 22.12 | 6 |
10 | 3.29 | 13.16 | 4 |
11 | 2.97 | 26.76 | 9 |
12 | 3.84 | 19.21 | 5 |
13 | 2.14 | 23.52 | 11 |
14 | 2.38 | 14.30 | 6 |
15 | 2.99 | 140.36 | 47 |
16 | 2.64 | 60.71 | 23 |
17 | 3.01 | 51.11 | 17 |
18 | 2.80 | 67.14 | 24 |
19 | 3.05 | 67.10 | 22 |
20 | 2.88 | 282.68 | 98 |
21 | 2.80 | 120.37 | 43 |
22 | 3.03 | 206.05 | 68 |
23 | 2.74 | 156.21 | 57 |
24 | 3.05 | 213.46 | 70 |
25 | 3.08 | 206.52 | 67 |
26 | 2.45 | 31.83 | 13 |
27 | 3.06 | 58.21 | 19 |
28 | 2.56 | 12.81 | 5 |
29 | 2.90 | 60.96 | 21 |
30 | 3.11 | 56.01 | 18 |
31 | 2.98 | 47.62 | 16 |
32 | 3.41 | 37.50 | 11 |
33 | 2.89 | 31.80 | 11 |
34 | 3.04 | 24.32 | 8 |
35 | 3.08 | 37.02 | 12 |
36 | 2.88 | 20.14 | 7 |
37 | 2.35 | 21.11 | 9 |
38 | 2.87 | 25.79 | 9 |
39 | 3.07 | 15.34 | 5 |
40 | 3.22 | 45.11 | 14 |
42 | 2.11 | 2.11 | 1 |
43 | 3.81 | 3.81 | 1 |
45 | 2.72 | 2.72 | 1 |
# Grouping the players and their purchases.
grouped_players = data.groupby(["SN"])
players_info = pd.DataFrame()
# Going through every player and storing important values
for name, info in grouped_players:
price_column = info["Price"]
spender_info = pd.DataFrame({
'Purchase Count': [len(price_column)],
'Average Purchase Price' : [price_column.mean()],
'Total Purchase Value' : [price_column.sum()]
}, index=[name])
# Need to store append value back into players_info because the person who wrote append was retarted
players_info = players_info.append(spender_info, ignore_index=False)
top_5_spenders = players_info.sort_values(by="Total Purchase Value", ascending=False).head()
top_5_spenders.style.format({'Total Purchase Value': '${:.2f}', 'Average Purchase Price': '${:.2f}'})
Average Purchase Price | Purchase Count | Total Purchase Value | |
---|---|---|---|
Undirrala66 | $3.41 | 5 | $17.06 |
Saedue76 | $3.39 | 4 | $13.56 |
Mindimnya67 | $3.18 | 4 | $12.74 |
Haellysu29 | $4.24 | 3 | $12.73 |
Eoda93 | $3.86 | 3 | $11.58 |
grouped_items = data.groupby(["Item ID"])
items = pd.DataFrame()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Using mean to get just 1 value instead of all the values #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
for item_id, info in grouped_items:
item = pd.DataFrame({
"Item ID": [item_id],
"Purchase Count" : [len(info)],
"Item Price": [info["Price"].mean()],
"Total Purchase Value": [info["Price"].sum()]
}, index=[[data.loc[item_id]["Item Name"]]])
items = items.append(item, ignore_index=False)
top_5_items = items.sort_values(by="Purchase Count", ascending=False).head()
top_5_items
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Item ID | Item Price | Purchase Count | Total Purchase Value | |
---|---|---|---|---|
Stormfury Mace | 39 | 2.35 | 11 | 25.85 |
Thorn, Satchel of Dark Souls | 84 | 2.23 | 11 | 24.53 |
Shadow Strike, Glory of Ending Hope | 31 | 2.07 | 9 | 18.63 |
Retribution Axe | 175 | 1.24 | 9 | 11.16 |
Piety, Guardian of Riddles | 13 | 1.49 | 9 | 13.41 |
top_5_profitable_items = items.sort_values(by="Total Purchase Value", ascending=False).head()
top_5_profitable_items
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
Item ID | Item Price | Purchase Count | Total Purchase Value | |
---|---|---|---|---|
Alpha, Reach of Ending Hope | 34 | 4.14 | 9 | 37.26 |
Thorn, Conqueror of the Corrupted | 115 | 4.25 | 7 | 29.75 |
Rage, Legacy of the Lone Victor | 32 | 4.95 | 6 | 29.70 |
Mercy, Katana of Dismay | 103 | 4.87 | 6 | 29.22 |
Spectral Diamond Doomblade | 107 | 3.61 | 8 | 28.88 |