Loan Data Analysis
In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
pd.set_option('display.max_columns', 500)
loan = pd.read_csv("Prosper-Loan-Data-Analysis-main/prosperLoanData.csv")
loan
Out[2]:
In [3]:
drop_value = loan[loan["CreditScoreRangeLower"].isnull()].index
loan.drop(index = drop_value, inplace = True)
In [4]:
loan.info()
In [5]:
#state with the Highest Number of Borrowers.
loan["BorrowerState"].value_counts().head(1)
pd.DataFrame( { "Top Borrower State": loan["BorrowerState"].value_counts().head(1).index })
Out[5]:
In [6]:
loan["IsBorrowerHomeowner"].value_counts()
Out[6]:
In [27]:
# number of Borrowers who
sns.countplot(x = loan["IsBorrowerHomeowner"]);
In [8]:
#Majority of the portion of Borrowers
loan["EmploymentStatus"].value_counts()
plt.figure(figsize=(10,8))
sns.countplot(x= loan["EmploymentStatus"]);
In [9]:
#Majority of the Borrowers have an income
loan["StatedMonthlyIncome"].round()
sns.set_style("whitegrid")
plt.figure(figsize=(10,8))
plt.hist(x = loan["StatedMonthlyIncome"].round().sort_values(ascending = False), bins = np.arange(0, 30000, 2500));
In [91]:
#Majority of the Borrowers have a Credit Score
loan["CreditScoreRangeLower"]
fig, axes = plt.subplots(nrows=2,figsize = (12,6), sharey=False, sharex=True,)
sns.boxplot(x='CreditScoreRangeLower', data=loan, color='yellow', ax=axes[0])
axes[0].set_title('Credit Score Range Lower Distribution')
axes[0].set_xlabel('Credit Score Range Lower')
sns.boxplot(x='CreditScoreRangeUpper', data=loan, color='red', ax=axes[1])
axes[1].set_title('Credit Score Range Upper Distribution')
axes[1].set_xlabel('Credit Score Range Upper');
In [98]:
#Does Home Ownership lead to Higher Prosper Score ?
fig = plt.figure(figsize=(10,6))
sns.boxenplot(x ="IsBorrowerHomeowner", y = "ProsperScore", data =loan )
Out[98]:
In [11]:
#Most of the Borrowers have Prosper Scores between
loan["ProsperScore"]
plt.figure(figsize = (10,8))
plt.hist(x = loan["ProsperScore"]);
In [12]:
#Most of the loans are given for a period
plt.figure(figsize = (10,8))
loan["Term"].value_counts().plot(kind = "bar")
plt.xticks(rotation = 0);
In [13]:
#number of Loans have doubled in number from the previous years
dt = pd.to_datetime(loan["ListingCreationDate"])
year = dt.dt.year
sns.countplot(x = year)
plt.xlabel("Year")
plt.ylabel("No. of Loans");
In [81]:
#Borrowers with Higher Monthly Income are assigned
income_score = loan[[ "StatedMonthlyIncome", "ProsperScore"]].dropna()
plt.figure(figsize = (12,6))
sns.pointplot(x = "ProsperScore", y = "StatedMonthlyIncome", data = income_score);
In [35]:
#Borrowers with Prosper Score higher than or equal to 8, are more likely to own
borrower_score = loan[["ProsperScore", "IsBorrowerHomeowner"]].dropna()
plt.figure(figsize = (10,8))
sns.countplot(x = "ProsperScore", data = borrower_score, hue = "IsBorrowerHomeowner");
In [36]:
#Employed Borrowers take loans of Higher Amounts when compared to Borrower's who are Retired or Unemployed.
plt.figure(figsize = (10,8))
loan["EmploymentStatus"].value_counts().plot(kind = "bar");
In [39]:
#Unemployed Borrowers are charged a Higher Interest Rate in comparison to Employed and Retired Borrowers.
loan[["BorrowerRate","EmploymentStatus"]]
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='EmploymentStatus', data=loan);
In [48]:
#Employed Borrowers are assigned a Higher Prosper Score.
e_score = loan[["EmploymentStatus", "ProsperScore"]].dropna()
e_score
plt.figure(figsize = (10,8))
#sns.pointplot(y="ProsperScore", x="EmploymentStatus", data=e_score);
sns.boxplot(x='ProsperScore', y='EmploymentStatus', data=loan);
In [50]:
#The Interest Rate is Negatively Correlated with Prosper Score. Higher Prosper Score leads to Lower Interest Rates.
pb = loan[["ProsperScore", "BorrowerRate"]].dropna()
pb
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='ProsperScore', data=pb);
In [52]:
#The Loan Amounts given has increased over the years.
loan["LoanOriginalAmount"]
pd.to_datetime(loan["LoanOriginationDate"]).dt.year
year_amount = pd.DataFrame({"LoanYear" :pd.to_datetime(loan["LoanOriginationDate"]).dt.year, "LoanAmount":loan["LoanOriginalAmount"]})
ya = year_amount.groupby("LoanYear").sum()
plt.figure(figsize = (10,8))
sns.pointplot(y=loan["LoanOriginalAmount"], x=pd.to_datetime(loan["LoanOriginationDate"]).dt.year)
plt.ylabel("Loan Amount");
In [54]:
#Higher Loan Amount also leads to a Higher Loan Term.
loan[["LoanOriginalAmount", "Term"]]
plt.figure(figsize = (10,8))
sns.boxplot(y='LoanOriginalAmount', x='Term',
data=loan);
In [95]:
#he Prosper Score is Positively Correlated with On-Time Monthly Payments. The borrowers with Higher Prosper Scores are more likely to make On-Time Monthly Payments.
fig = plt.figure(figsize=(12,6))
sns.scatterplot(x='LoanOriginalAmount', y='MonthlyLoanPayment',
data=loan, hue='ProsperScore', palette='OrRd');
In [59]:
#A Higher Credit Score leads to a Higher Prosper Score.
credit = loan[["CreditScoreRangeLower", "CreditScoreRangeUpper", "ProsperScore"]].dropna()
fig = plt.figure(figsize=(12,6))
sns.pointplot(x="CreditScoreRangeLower", y='ProsperScore',
data=credit);
In [60]:
#The Estimated Loss reduces with an increase in the Borrowers Prosper Score.
eloss = loan[["EstimatedLoss", "ProsperScore"]].dropna()
plt.figure(figsize=(10,8))
sns.pointplot(x="ProsperScore", y="EstimatedLoss",
data=eloss);
In [68]:
#Borrowers who have Fewer Current Delinquencies and Higher On-Time Payments, are more likely to have Higher Number of Loans.
loan[["CurrentDelinquencies", "OnTimeProsperPayments"]]
plt.figure(figsize = (12,8))
sns.scatterplot(y = "OnTimeProsperPayments", x = "CurrentDelinquencies",hue='TotalProsperLoans', data = loan );
In [99]:
#The Estimated Loss increase with the increase in Interest Rate and Yield.
loan[["EstimatedLoss", "BorrowerRate","LenderYield"]]
plt.figure(figsize = (12,8))
sns.scatterplot(x='BorrowerRate', y='LenderYield',hue='EstimatedLoss', data=loan);
In [ ]:
Comments
Post a Comment