Heroes of Pymoli
In [28]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [29]:
purchase = pd.read_csv("purchase_data.csv")
purchase.head(5)
Out[29]:
In [30]:
purchase.info()
In [206]:
#Total Number of Players
total_players = purchase["SN"].nunique()
total_players
Out[206]:
In [32]:
#Number of Unique Items
unique_items = purchase["Item Name"].nunique()
unique_items
Out[32]:
In [33]:
#Average Purchase Price
avg_price = purchase["Price"].mean()
avg_price
Out[33]:
In [34]:
#Total Number of Purchases
total_purchase = purchase["Purchase ID"].count()
total_purchase
Out[34]:
In [35]:
#Total Revenue
total_revenue = purchase["Price"].sum()
total_revenue
Out[35]:
In [36]:
'''Percentage and Count of Male Players
Percentage and Count of Female Players
Percentage and Count of Other / Non-Disclosed'''
total_male_players = purchase[purchase["Gender"]== "Male"].Gender.count()
total_female_players = purchase[purchase["Gender"]== "Female"].Gender.count()
total_other_players = total_players - (total_male_players + total_female_players)
per_of_male = ((total_male_players*100)/total_players).round(2)
per_of_female = ((total_female_players*100)/total_players).round(2)
per_of_other = ((total_other_players*100)/total_players).round(2)
y = pd.Series({"Male": per_of_male,
"Female": per_of_female,
"Others/Non Disclosed": per_of_other})
plt.title("Gender Demographic")
y.plot(kind = "pie", figsize = (12,8),autopct='%.2f%%');
In [37]:
'''below each broken by gender
Purchase Count
Average Purchase Price
Total Purchase Value
Average Purchase Total per Person by Gender'''
purchase_count_of_male = ((total_male_players*100) /total_players).round(2)
avg_purchase_price_of_male = purchase[purchase["Gender"]== "Male"].Price.mean()
total_purchase_of_male = purchase[purchase["Gender"]== "Male"].Price.sum()
average_purchase_total_per_male = total_purchase_of_male / total_male_players
In [38]:
purchase_count_of_female = ((total_female_players*100) /total_players).round(2)
avg_purchase_price_of_female = purchase[purchase["Gender"]== "Female"].Price.mean()
total_purchase_of_female = purchase[purchase["Gender"]== "Female"].Price.sum()
average_purchase_total_per_female = total_purchase_of_female / total_female_players
In [39]:
purchase_count_of_other = ((total_other_players*100) /total_players).round(2)
avg_purchase_price_of_other = purchase[purchase["Gender"]!= ("Female" and "Male")].Price.mean()
total_purchase_of_other = purchase[purchase["Gender"]!= ("Female" and "Male")].Price.sum()
average_purchase_total_per_other = total_purchase_of_other / total_other_players
In [40]:
plt.figure(figsize=(12,8))
plt.title("Purchase Count by Gender")
plt.pie(x = [purchase_count_of_male, purchase_count_of_female, purchase_count_of_other], autopct = "%.2f%%", labels = ["Male","Female","Other/Non Disclosed"]);
In [46]:
plt.title("Purchasing Analysis(Gender)")
plt.ylabel("Avg Purchasing Price")
plt.bar(x = ["Female", "Male", "Other/Non Disclosed"], height = [avg_purchase_price_of_female,avg_purchase_price_of_male,avg_purchase_price_of_other]);
In [50]:
purchase_count_of_bin1 = purchase[purchase["Age"] < 10]["Purchase ID"].count()
avg_purchase_price_of_bin1 = purchase[purchase["Age"] < 10]["Price"].mean()
total_purchase_value_of_bin1 = purchase[purchase["Age"] <10]["Price"].sum()
avg_purchase_total_per_bin1 = total_purchase_value_of_bin1 / purchase[purchase["Age"] < 10].SN.count()
In [51]:
purchase_count_of_bin2 = purchase[purchase["Age"].between(10, 14)]["Purchase ID"].count()
avg_purchase_price_of_bin2 = purchase[purchase["Age"].between(10, 14)]["Price"].mean()
total_purchase_value_of_bin2 = purchase[purchase["Age"].between(10, 14)]["Price"].sum()
avg_purchase_total_per_bin2 = total_purchase_value_of_bin2 / purchase[purchase["Age"].between(10,14)].SN.count()
In [77]:
purchase_count_of_bin3 = purchase[purchase["Age"].between(15, 19)]["Purchase ID"].count()
avg_purchase_price_of_bin3 = purchase[purchase["Age"].between(15, 19)]["Price"].mean()
total_purchase_value_of_bin3 = purchase[purchase["Age"].between(15, 19)]["Price"].sum()
avg_purchase_total_per_bin3 = total_purchase_value_of_bin3 / purchase[purchase["Age"].between(15,19)].SN.count()
In [76]:
purchase_count_of_bin4 = purchase[purchase["Age"].between(20, 24)]["Purchase ID"].count()
avg_purchase_price_of_bin4 = purchase[purchase["Age"].between(20, 24)]["Price"].mean()
total_purchase_value_of_bin4 = purchase[purchase["Age"].between(20, 24)]["Price"].sum()
avg_purchase_total_per_bin4 = total_purchase_value_of_bin4 / purchase[purchase["Age"].between(20,24)].SN.count()
total_purchase_value_of_bin4
Out[76]:
In [64]:
purchase_count_of_bin5 = purchase[purchase["Age"].between(25, 29)]["Purchase ID"].count()
avg_purchase_price_of_bin5 = purchase[purchase["Age"].between(25, 29)]["Price"].mean()
total_purchase_value_of_bin5 = purchase[purchase["Age"].between(25, 29)]["Price"].sum()
avg_purchase_total_per_bin5 = total_purchase_value_of_bin5 / purchase[purchase["Age"].between(25,29)].SN.count()
In [65]:
purchase_count_of_bin6 = purchase[purchase["Age"].between(30, 34)]["Purchase ID"].count()
avg_purchase_price_of_bin6 = purchase[purchase["Age"].between(30, 34)]["Price"].mean()
total_purchase_value_of_bin6 = purchase[purchase["Age"].between(30, 34)]["Price"].sum()
avg_purchase_total_per_bin6 = total_purchase_value_of_bin6 / purchase[purchase["Age"].between(30,34)].SN.count()
In [66]:
purchase_count_of_bin7 = purchase[purchase["Age"].between(35, 39)]["Purchase ID"].count()
avg_purchase_price_of_bin7 = purchase[purchase["Age"].between(35, 39)]["Price"].mean()
total_purchase_value_of_bin7 = purchase[purchase["Age"].between(35, 39)]["Price"].sum()
avg_purchase_total_per_bin7 = total_purchase_value_of_bin7 / purchase[purchase["Age"].between(35,39)].SN.count()
In [70]:
purchase_count_of_bin8 = purchase[purchase["Age"] > 40]["Purchase ID"].count()
avg_purchase_price_of_bin8 = purchase[purchase["Age"] > 40]["Price"].mean()
total_purchase_value_of_bin8 = purchase[purchase["Age"] > 40]["Price"].sum()
avg_purchase_total_per_bin8 = total_purchase_value_of_bin8 / purchase[purchase["Age"] > 40].SN.count()
In [71]:
plt.title("Age Demographic")
plt.ylabel("Count")
plt.bar(x = ["<10", "10-14", "14-19","20-24","25-29","30-34", "35-39", ">40"], height = [purchase_count_of_bin1,purchase_count_of_bin2,purchase_count_of_bin3,purchase_count_of_bin4,purchase_count_of_bin5,purchase_count_of_bin6,purchase_count_of_bin7,purchase_count_of_bin8]);
In [86]:
plt.title("Purchasing Analysis(Age)")
plt.ylabel("Avg Total Purchase Per Person")
plt.bar(x = ["<10", "10-14", "14-19","20-24","25-29","30-34", "35-39", "40+"], height = [avg_purchase_total_per_bin1,avg_purchase_total_per_bin2,avg_purchase_total_per_bin3,avg_purchase_total_per_bin4,avg_purchase_total_per_bin5,avg_purchase_total_per_bin6,avg_purchase_total_per_bin7,avg_purchase_total_per_bin8], color = ["blue","orange","green","red","violet","brown","pink","silver"]);
In [87]:
plt.title("Purchasing Analysis(Age)")
plt.ylabel("Total Purchase Value")
plt.bar(x = ["<10", "10-14", "14-19","20-24","25-29","30-34", "35-39", "40+"], height = [total_purchase_value_of_bin1,total_purchase_value_of_bin2,total_purchase_value_of_bin3,total_purchase_value_of_bin4,total_purchase_value_of_bin5,total_purchase_value_of_bin6,total_purchase_value_of_bin7,total_purchase_value_of_bin8], color = ["blue","orange","green","red","violet","brown","pink","silver"]);
In [93]:
#TOP 5 Spender
top_five =purchase.sort_values(by = "Price", ascending = False).head(5)
top_five
Out[93]:
In [159]:
#5 most popular items by purchase count
purchase["Item Name"].value_counts()
Out[159]:
In [173]:
#5 most profitable items by total purchase value
purchase.sort_values("Price", ascending = False)
Out[173]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Comments
Post a Comment