Loan Data Analysis

temp-162419136819231271
In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
pd.set_option('display.max_columns', 500)
loan = pd.read_csv("Prosper-Loan-Data-Analysis-main/prosperLoanData.csv")
loan
Out[2]:
ListingKey ListingNumber ListingCreationDate CreditGrade Term LoanStatus ClosedDate BorrowerAPR BorrowerRate LenderYield EstimatedEffectiveYield EstimatedLoss EstimatedReturn ProsperRating (numeric) ProsperRating (Alpha) ProsperScore ListingCategory (numeric) BorrowerState Occupation EmploymentStatus EmploymentStatusDuration IsBorrowerHomeowner CurrentlyInGroup GroupKey DateCreditPulled CreditScoreRangeLower CreditScoreRangeUpper FirstRecordedCreditLine CurrentCreditLines OpenCreditLines TotalCreditLinespast7years OpenRevolvingAccounts OpenRevolvingMonthlyPayment InquiriesLast6Months TotalInquiries CurrentDelinquencies AmountDelinquent DelinquenciesLast7Years PublicRecordsLast10Years PublicRecordsLast12Months RevolvingCreditBalance BankcardUtilization AvailableBankcardCredit TotalTrades TradesNeverDelinquent (percentage) TradesOpenedLast6Months DebtToIncomeRatio IncomeRange IncomeVerifiable StatedMonthlyIncome LoanKey TotalProsperLoans TotalProsperPaymentsBilled OnTimeProsperPayments ProsperPaymentsLessThanOneMonthLate ProsperPaymentsOneMonthPlusLate ProsperPrincipalBorrowed ProsperPrincipalOutstanding ScorexChangeAtTimeOfListing LoanCurrentDaysDelinquent LoanFirstDefaultedCycleNumber LoanMonthsSinceOrigination LoanNumber LoanOriginalAmount LoanOriginationDate LoanOriginationQuarter MemberKey MonthlyLoanPayment LP_CustomerPayments LP_CustomerPrincipalPayments LP_InterestandFees LP_ServiceFees LP_CollectionFees LP_GrossPrincipalLoss LP_NetPrincipalLoss LP_NonPrincipalRecoverypayments PercentFunded Recommendations InvestmentFromFriendsCount InvestmentFromFriendsAmount Investors
0 1021339766868145413AB3B 193129 2007-08-26 19:09:29.263000000 C 36 Completed 2009-08-14 00:00:00 0.16516 0.1580 0.1380 NaN NaN NaN NaN NaN NaN 0 CO Other Self-employed 2.0 True True NaN 2007-08-26 18:41:46.780000000 640.0 659.0 2001-10-11 00:00:00 5.0 4.0 12.0 1 24.0 3.0 3.0 2.0 472.0 4.0 0.0 0.0 0.0 0.00 1500.0 11.0 0.81 0.0 0.17 $25,000-49,999 True 3083.333333 E33A3400205839220442E84 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 78 19141 9425 2007-09-12 00:00:00 Q3 2007 1F3E3376408759268057EDA 330.43 11396.1400 9425.00 1971.1400 -133.18 0.0 0.0 0.0 0.0 1.0 0 0 0.0 258
1 10273602499503308B223C1 1209647 2014-02-27 08:28:07.900000000 NaN 36 Current NaN 0.12016 0.0920 0.0820 0.07960 0.0249 0.05470 6.0 A 7.0 2 CO Professional Employed 44.0 False False NaN 2014-02-27 08:28:14 680.0 699.0 1996-03-18 00:00:00 14.0 14.0 29.0 13 389.0 3.0 5.0 0.0 0.0 0.0 1.0 0.0 3989.0 0.21 10266.0 29.0 1.00 2.0 0.18 $50,000-74,999 True 6125.000000 9E3B37071505919926B1D82 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 0 134815 10000 2014-03-03 00:00:00 Q1 2014 1D13370546739025387B2F4 318.93 0.0000 0.00 0.0000 0.00 0.0 0.0 0.0 0.0 1.0 0 0 0.0 1
2 0EE9337825851032864889A 81716 2007-01-05 15:00:47.090000000 HR 36 Completed 2009-12-17 00:00:00 0.28269 0.2750 0.2400 NaN NaN NaN NaN NaN NaN 0 GA Other Not available NaN False True 783C3371218786870A73D20 2007-01-02 14:09:10.060000000 480.0 499.0 2002-07-27 00:00:00 NaN NaN 3.0 0 0.0 0.0 1.0 1.0 NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN 0.06 Not displayed True 2083.333333 6954337960046817851BCB2 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 86 6466 3001 2007-01-17 00:00:00 Q1 2007 5F7033715035555618FA612 123.32 4186.6300 3001.00 1185.6300 -24.20 0.0 0.0 0.0 0.0 1.0 0 0 0.0 41
3 0EF5356002482715299901A 658116 2012-10-22 11:02:35.010000000 NaN 36 Current NaN 0.12528 0.0974 0.0874 0.08490 0.0249 0.06000 6.0 A 9.0 16 GA Skilled Labor Employed 113.0 True False NaN 2012-10-22 11:02:32 800.0 819.0 1983-02-28 00:00:00 5.0 5.0 29.0 7 115.0 0.0 1.0 4.0 10056.0 14.0 0.0 0.0 1444.0 0.04 30754.0 26.0 0.76 0.0 0.15 $25,000-49,999 True 2875.000000 A0393664465886295619C51 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 16 77296 10000 2012-11-01 00:00:00 Q4 2012 9ADE356069835475068C6D2 321.45 5143.2000 4091.09 1052.1100 -108.01 0.0 0.0 0.0 0.0 1.0 0 0 0.0 158
4 0F023589499656230C5E3E2 909464 2013-09-14 18:38:39.097000000 NaN 36 Current NaN 0.24614 0.2085 0.1985 0.18316 0.0925 0.09066 3.0 D 4.0 2 MN Executive Employed 44.0 True False NaN 2013-09-14 18:38:44 680.0 699.0 2004-02-20 00:00:00 19.0 19.0 49.0 6 220.0 1.0 9.0 0.0 0.0 0.0 0.0 0.0 6193.0 0.81 695.0 39.0 0.95 2.0 0.26 $100,000+ True 9583.333333 A180369302188889200689E 1.0 11.0 11.0 0.0 0.0 11000.0 9947.90 NaN 0 NaN 6 102670 15000 2013-09-20 00:00:00 Q3 2013 36CE356043264555721F06C 563.97 2819.8500 1563.22 1256.6300 -60.27 0.0 0.0 0.0 0.0 1.0 0 0 0.0 20
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
113932 E6D9357655724827169606C 753087 2013-04-14 05:55:02.663000000 NaN 36 Current NaN 0.22354 0.1864 0.1764 0.16490 0.0699 0.09500 4.0 C 5.0 1 IL Food Service Management Employed 246.0 True False NaN 2013-04-14 05:54:58 700.0 719.0 1997-09-01 00:00:00 9.0 9.0 41.0 9 209.0 0.0 0.0 0.0 0.0 7.0 1.0 0.0 7714.0 0.80 1886.0 37.0 0.83 3.0 0.13 $50,000-74,999 True 4333.333333 9BD7367919051593140DB62 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 11 88485 10000 2013-04-22 00:00:00 Q2 2013 2EC435768441332602FDC15 364.74 3647.4000 2238.38 1409.0200 -75.58 0.0 0.0 0.0 0.0 1.0 0 0 0.0 1
113933 E6DB353036033497292EE43 537216 2011-11-03 20:42:55.333000000 NaN 36 FinalPaymentInProgress NaN 0.13220 0.1110 0.1010 0.10070 0.0200 0.08070 6.0 A 8.0 7 PA Professional Employed 21.0 True False NaN 2011-11-03 20:42:53 700.0 719.0 1992-01-17 00:00:00 14.0 13.0 39.0 9 495.0 1.0 4.0 1.0 5062.0 4.0 0.0 0.0 15743.0 0.69 6658.0 39.0 0.92 0.0 0.11 $75,000-99,999 True 8041.666667 62D93634569816897D5A276 3.0 60.0 60.0 0.0 0.0 33501.0 4815.42 -26.0 0 NaN 28 55801 2000 2011-11-07 00:00:00 Q4 2011 55C4336679182766893E4FC 65.57 2330.5500 1997.16 333.3900 -30.05 0.0 0.0 0.0 0.0 1.0 0 0 0.0 22
113934 E6E13596170052029692BB1 1069178 2013-12-13 05:49:12.703000000 NaN 60 Current NaN 0.23984 0.2150 0.2050 0.18828 0.1025 0.08578 3.0 D 3.0 1 TX Other Employed 84.0 True False NaN 2013-12-13 05:49:15 700.0 719.0 2002-02-25 00:00:00 12.0 12.0 25.0 9 521.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 22147.0 0.73 7853.0 25.0 1.00 0.0 0.51 $25,000-49,999 True 2875.000000 DD1A370200396006300ACA0 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 3 123122 10000 2013-12-23 00:00:00 Q4 2013 0FE0370029359765342FDB5 273.35 546.7000 183.15 363.5500 -16.91 0.0 0.0 0.0 0.0 1.0 0 0 0.0 119
113935 E6EB3531504622671970D9E 539056 2011-11-14 13:18:26.597000000 NaN 60 Completed 2013-08-13 00:00:00 0.28408 0.2605 0.2505 0.24450 0.0850 0.15950 4.0 C 5.0 2 GA Food Service Full-time 94.0 True False NaN 2011-11-14 13:18:24 680.0 699.0 1993-12-01 00:00:00 11.0 11.0 22.0 7 488.0 1.0 4.0 0.0 0.0 0.0 1.0 0.0 11956.0 0.69 4137.0 19.0 0.80 1.0 0.48 $25,000-49,999 True 3875.000000 589536350469116027ED11B 1.0 16.0 16.0 0.0 0.0 5000.0 3264.37 -4.0 0 NaN 28 56401 15000 2011-11-21 00:00:00 Q4 2011 A33834861822272782621C8 449.55 21122.5600 15000.00 6122.5600 -235.05 0.0 0.0 0.0 0.0 1.0 0 0 0.0 274
113936 E6ED3600409833199F711B7 1140093 2014-01-15 09:27:37.657000000 NaN 36 Current NaN 0.13189 0.1039 0.0939 0.09071 0.0299 0.06081 6.0 A 7.0 1 NY Professor Employed 244.0 False False NaN 2014-01-15 09:27:40 680.0 699.0 1995-01-01 00:00:00 10.0 9.0 44.0 8 289.0 0.0 1.0 1.0 257.0 3.0 1.0 0.0 6166.0 0.80 675.0 36.0 0.75 0.0 0.23 $50,000-74,999 True 4583.333333 00AF3704550953269A64E40 NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN 2 127508 2000 2014-01-21 00:00:00 Q1 2014 CE1E3704648000761C9F724 64.90 64.3307 47.25 17.0807 -1.70 0.0 0.0 0.0 0.0 1.0 0 0 0.0 1

113937 rows × 81 columns

In [3]:
drop_value = loan[loan["CreditScoreRangeLower"].isnull()].index
loan.drop(index = drop_value, inplace = True)
In [4]:
loan.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 113346 entries, 0 to 113936
Data columns (total 81 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   ListingKey                           113346 non-null  object 
 1   ListingNumber                        113346 non-null  int64  
 2   ListingCreationDate                  113346 non-null  object 
 3   CreditGrade                          28362 non-null   object 
 4   Term                                 113346 non-null  int64  
 5   LoanStatus                           113346 non-null  object 
 6   ClosedDate                           54498 non-null   object 
 7   BorrowerAPR                          113346 non-null  float64
 8   BorrowerRate                         113346 non-null  float64
 9   LenderYield                          113346 non-null  float64
 10  EstimatedEffectiveYield              84853 non-null   float64
 11  EstimatedLoss                        84853 non-null   float64
 12  EstimatedReturn                      84853 non-null   float64
 13  ProsperRating (numeric)              84853 non-null   float64
 14  ProsperRating (Alpha)                84853 non-null   object 
 15  ProsperScore                         84853 non-null   float64
 16  ListingCategory (numeric)            113346 non-null  int64  
 17  BorrowerState                        108422 non-null  object 
 18  Occupation                           110347 non-null  object 
 19  EmploymentStatus                     111680 non-null  object 
 20  EmploymentStatusDuration             106312 non-null  float64
 21  IsBorrowerHomeowner                  113346 non-null  bool   
 22  CurrentlyInGroup                     113346 non-null  bool   
 23  GroupKey                             12966 non-null   object 
 24  DateCreditPulled                     113346 non-null  object 
 25  CreditScoreRangeLower                113346 non-null  float64
 26  CreditScoreRangeUpper                113346 non-null  float64
 27  FirstRecordedCreditLine              113240 non-null  object 
 28  CurrentCreditLines                   106333 non-null  float64
 29  OpenCreditLines                      106333 non-null  float64
 30  TotalCreditLinespast7years           113240 non-null  float64
 31  OpenRevolvingAccounts                113346 non-null  int64  
 32  OpenRevolvingMonthlyPayment          113346 non-null  float64
 33  InquiriesLast6Months                 113240 non-null  float64
 34  TotalInquiries                       112778 non-null  float64
 35  CurrentDelinquencies                 113240 non-null  float64
 36  AmountDelinquent                     106315 non-null  float64
 37  DelinquenciesLast7Years              112947 non-null  float64
 38  PublicRecordsLast10Years             113240 non-null  float64
 39  PublicRecordsLast12Months            106333 non-null  float64
 40  RevolvingCreditBalance               106333 non-null  float64
 41  BankcardUtilization                  106333 non-null  float64
 42  AvailableBankcardCredit              106393 non-null  float64
 43  TotalTrades                          106393 non-null  float64
 44  TradesNeverDelinquent (percentage)   106393 non-null  float64
 45  TradesOpenedLast6Months              106393 non-null  float64
 46  DebtToIncomeRatio                    104798 non-null  float64
 47  IncomeRange                          113346 non-null  object 
 48  IncomeVerifiable                     113346 non-null  bool   
 49  StatedMonthlyIncome                  113346 non-null  float64
 50  LoanKey                              113346 non-null  object 
 51  TotalProsperLoans                    22085 non-null   float64
 52  TotalProsperPaymentsBilled           22085 non-null   float64
 53  OnTimeProsperPayments                22085 non-null   float64
 54  ProsperPaymentsLessThanOneMonthLate  22085 non-null   float64
 55  ProsperPaymentsOneMonthPlusLate      22085 non-null   float64
 56  ProsperPrincipalBorrowed             22085 non-null   float64
 57  ProsperPrincipalOutstanding          22085 non-null   float64
 58  ScorexChangeAtTimeOfListing          18928 non-null   float64
 59  LoanCurrentDaysDelinquent            113346 non-null  int64  
 60  LoanFirstDefaultedCycleNumber        16776 non-null   float64
 61  LoanMonthsSinceOrigination           113346 non-null  int64  
 62  LoanNumber                           113346 non-null  int64  
 63  LoanOriginalAmount                   113346 non-null  int64  
 64  LoanOriginationDate                  113346 non-null  object 
 65  LoanOriginationQuarter               113346 non-null  object 
 66  MemberKey                            113346 non-null  object 
 67  MonthlyLoanPayment                   113346 non-null  float64
 68  LP_CustomerPayments                  113346 non-null  float64
 69  LP_CustomerPrincipalPayments         113346 non-null  float64
 70  LP_InterestandFees                   113346 non-null  float64
 71  LP_ServiceFees                       113346 non-null  float64
 72  LP_CollectionFees                    113346 non-null  float64
 73  LP_GrossPrincipalLoss                113346 non-null  float64
 74  LP_NetPrincipalLoss                  113346 non-null  float64
 75  LP_NonPrincipalRecoverypayments      113346 non-null  float64
 76  PercentFunded                        113346 non-null  float64
 77  Recommendations                      113346 non-null  int64  
 78  InvestmentFromFriendsCount           113346 non-null  int64  
 79  InvestmentFromFriendsAmount          113346 non-null  float64
 80  Investors                            113346 non-null  int64  
dtypes: bool(3), float64(50), int64(11), object(17)
memory usage: 68.6+ MB
In [5]:
#state with the Highest Number of Borrowers.
loan["BorrowerState"].value_counts().head(1)
pd.DataFrame( { "Top Borrower State": loan["BorrowerState"].value_counts().head(1).index })
Out[5]:
Top Borrower State
0 CA
In [6]:
loan["IsBorrowerHomeowner"].value_counts()
Out[6]:
True     57478
False    55868
Name: IsBorrowerHomeowner, dtype: int64
In [27]:
# number of Borrowers who
sns.countplot(x = loan["IsBorrowerHomeowner"]);
In [8]:
#Majority of the portion of Borrowers 
loan["EmploymentStatus"].value_counts()
plt.figure(figsize=(10,8))
sns.countplot(x= loan["EmploymentStatus"]);
In [9]:
#Majority of the Borrowers have an income
loan["StatedMonthlyIncome"].round()
sns.set_style("whitegrid")
plt.figure(figsize=(10,8))
plt.hist(x = loan["StatedMonthlyIncome"].round().sort_values(ascending = False), bins = np.arange(0, 30000, 2500));
In [91]:
#Majority of the Borrowers have a Credit Score
loan["CreditScoreRangeLower"]
fig, axes = plt.subplots(nrows=2,figsize = (12,6), sharey=False, sharex=True,)
sns.boxplot(x='CreditScoreRangeLower', data=loan, color='yellow', ax=axes[0])
axes[0].set_title('Credit Score Range Lower Distribution')
axes[0].set_xlabel('Credit Score Range Lower')
sns.boxplot(x='CreditScoreRangeUpper', data=loan, color='red', ax=axes[1])
axes[1].set_title('Credit Score Range Upper Distribution')
axes[1].set_xlabel('Credit Score Range Upper');
In [98]:
#Does Home Ownership lead to Higher Prosper Score ?
fig = plt.figure(figsize=(10,6))
sns.boxenplot(x ="IsBorrowerHomeowner", y = "ProsperScore", data =loan )
Out[98]:
<AxesSubplot:xlabel='IsBorrowerHomeowner', ylabel='ProsperScore'>
In [11]:
#Most of the Borrowers have Prosper Scores between
loan["ProsperScore"]
plt.figure(figsize = (10,8))
plt.hist(x = loan["ProsperScore"]);
In [12]:
#Most of the loans are given for a period 
plt.figure(figsize = (10,8))
loan["Term"].value_counts().plot(kind = "bar")
plt.xticks(rotation = 0);
In [13]:
#number of Loans have doubled in number from the previous years
dt = pd.to_datetime(loan["ListingCreationDate"])
year = dt.dt.year
sns.countplot(x = year)
plt.xlabel("Year")
plt.ylabel("No. of Loans");
In [81]:
#Borrowers with Higher Monthly Income are assigned 
income_score = loan[[ "StatedMonthlyIncome", "ProsperScore"]].dropna()
plt.figure(figsize = (12,6))
sns.pointplot(x = "ProsperScore", y = "StatedMonthlyIncome", data = income_score);
In [35]:
#Borrowers with Prosper Score higher than or equal to 8, are more likely to own 
borrower_score = loan[["ProsperScore", "IsBorrowerHomeowner"]].dropna()
plt.figure(figsize = (10,8))
sns.countplot(x = "ProsperScore", data = borrower_score, hue = "IsBorrowerHomeowner");
In [36]:
#Employed Borrowers take loans of Higher Amounts when compared to Borrower's who are Retired or Unemployed.
plt.figure(figsize = (10,8))
loan["EmploymentStatus"].value_counts().plot(kind = "bar");
In [39]:
#Unemployed Borrowers are charged a Higher Interest Rate in comparison to Employed and Retired Borrowers.
loan[["BorrowerRate","EmploymentStatus"]]
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='EmploymentStatus', data=loan);
In [48]:
#Employed Borrowers are assigned a Higher Prosper Score.
e_score = loan[["EmploymentStatus", "ProsperScore"]].dropna()
e_score 
plt.figure(figsize = (10,8))
#sns.pointplot(y="ProsperScore", x="EmploymentStatus", data=e_score);
sns.boxplot(x='ProsperScore', y='EmploymentStatus', data=loan);
In [50]:
#The Interest Rate is Negatively Correlated with Prosper Score. Higher Prosper Score leads to Lower Interest Rates.
pb = loan[["ProsperScore", "BorrowerRate"]].dropna()
pb
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='ProsperScore', data=pb);
In [52]:
#The Loan Amounts given has increased over the years.
loan["LoanOriginalAmount"]
pd.to_datetime(loan["LoanOriginationDate"]).dt.year
year_amount = pd.DataFrame({"LoanYear" :pd.to_datetime(loan["LoanOriginationDate"]).dt.year, "LoanAmount":loan["LoanOriginalAmount"]})
ya = year_amount.groupby("LoanYear").sum()
plt.figure(figsize = (10,8))
sns.pointplot(y=loan["LoanOriginalAmount"], x=pd.to_datetime(loan["LoanOriginationDate"]).dt.year)
            
plt.ylabel("Loan Amount");
In [54]:
#Higher Loan Amount also leads to a Higher Loan Term.
loan[["LoanOriginalAmount", "Term"]]
plt.figure(figsize = (10,8))
sns.boxplot(y='LoanOriginalAmount', x='Term',
            data=loan);
In [95]:
#he Prosper Score is Positively Correlated with On-Time Monthly Payments. The borrowers with Higher Prosper Scores are more likely to make On-Time Monthly Payments.
fig = plt.figure(figsize=(12,6))
sns.scatterplot(x='LoanOriginalAmount', y='MonthlyLoanPayment',
               data=loan, hue='ProsperScore', palette='OrRd');
In [59]:
#A Higher Credit Score leads to a Higher Prosper Score.
credit = loan[["CreditScoreRangeLower", "CreditScoreRangeUpper", "ProsperScore"]].dropna()
fig = plt.figure(figsize=(12,6))
sns.pointplot(x="CreditScoreRangeLower", y='ProsperScore',
            data=credit);
In [60]:
#The Estimated Loss reduces with an increase in the Borrowers Prosper Score.
eloss = loan[["EstimatedLoss", "ProsperScore"]].dropna()
plt.figure(figsize=(10,8))
sns.pointplot(x="ProsperScore", y="EstimatedLoss",
            data=eloss);
In [68]:
#Borrowers who have Fewer Current Delinquencies and Higher On-Time Payments, are more likely to have Higher Number of Loans.
loan[["CurrentDelinquencies", "OnTimeProsperPayments"]]
plt.figure(figsize = (12,8))
sns.scatterplot(y = "OnTimeProsperPayments", x = "CurrentDelinquencies",hue='TotalProsperLoans', data = loan );
In [99]:
#The Estimated Loss increase with the increase in Interest Rate and Yield.
loan[["EstimatedLoss", "BorrowerRate","LenderYield"]]
plt.figure(figsize = (12,8))
sns.scatterplot(x='BorrowerRate', y='LenderYield',hue='EstimatedLoss', data=loan);
In [ ]:

Comments