import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', 500)
loan = pd.read_csv("Prosper-Loan-Data-Analysis-main/prosperLoanData.csv")
loan

drop_value = loan[loan["CreditScoreRangeLower"].isnull()].index
loan.drop(index = drop_value, inplace = True)

loan.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 113346 entries, 0 to 113936
Data columns (total 81 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   ListingKey                           113346 non-null  object 
 1   ListingNumber                        113346 non-null  int64  
 2   ListingCreationDate                  113346 non-null  object 
 3   CreditGrade                          28362 non-null   object 
 4   Term                                 113346 non-null  int64  
 5   LoanStatus                           113346 non-null  object 
 6   ClosedDate                           54498 non-null   object 
 7   BorrowerAPR                          113346 non-null  float64
 8   BorrowerRate                         113346 non-null  float64
 9   LenderYield                          113346 non-null  float64
 10  EstimatedEffectiveYield              84853 non-null   float64
 11  EstimatedLoss                        84853 non-null   float64
 12  EstimatedReturn                      84853 non-null   float64
 13  ProsperRating (numeric)              84853 non-null   float64
 14  ProsperRating (Alpha)                84853 non-null   object 
 15  ProsperScore                         84853 non-null   float64
 16  ListingCategory (numeric)            113346 non-null  int64  
 17  BorrowerState                        108422 non-null  object 
 18  Occupation                           110347 non-null  object 
 19  EmploymentStatus                     111680 non-null  object 
 20  EmploymentStatusDuration             106312 non-null  float64
 21  IsBorrowerHomeowner                  113346 non-null  bool   
 22  CurrentlyInGroup                     113346 non-null  bool   
 23  GroupKey                             12966 non-null   object 
 24  DateCreditPulled                     113346 non-null  object 
 25  CreditScoreRangeLower                113346 non-null  float64
 26  CreditScoreRangeUpper                113346 non-null  float64
 27  FirstRecordedCreditLine              113240 non-null  object 
 28  CurrentCreditLines                   106333 non-null  float64
 29  OpenCreditLines                      106333 non-null  float64
 30  TotalCreditLinespast7years           113240 non-null  float64
 31  OpenRevolvingAccounts                113346 non-null  int64  
 32  OpenRevolvingMonthlyPayment          113346 non-null  float64
 33  InquiriesLast6Months                 113240 non-null  float64
 34  TotalInquiries                       112778 non-null  float64
 35  CurrentDelinquencies                 113240 non-null  float64
 36  AmountDelinquent                     106315 non-null  float64
 37  DelinquenciesLast7Years              112947 non-null  float64
 38  PublicRecordsLast10Years             113240 non-null  float64
 39  PublicRecordsLast12Months            106333 non-null  float64
 40  RevolvingCreditBalance               106333 non-null  float64
 41  BankcardUtilization                  106333 non-null  float64
 42  AvailableBankcardCredit              106393 non-null  float64
 43  TotalTrades                          106393 non-null  float64
 44  TradesNeverDelinquent (percentage)   106393 non-null  float64
 45  TradesOpenedLast6Months              106393 non-null  float64
 46  DebtToIncomeRatio                    104798 non-null  float64
 47  IncomeRange                          113346 non-null  object 
 48  IncomeVerifiable                     113346 non-null  bool   
 49  StatedMonthlyIncome                  113346 non-null  float64
 50  LoanKey                              113346 non-null  object 
 51  TotalProsperLoans                    22085 non-null   float64
 52  TotalProsperPaymentsBilled           22085 non-null   float64
 53  OnTimeProsperPayments                22085 non-null   float64
 54  ProsperPaymentsLessThanOneMonthLate  22085 non-null   float64
 55  ProsperPaymentsOneMonthPlusLate      22085 non-null   float64
 56  ProsperPrincipalBorrowed             22085 non-null   float64
 57  ProsperPrincipalOutstanding          22085 non-null   float64
 58  ScorexChangeAtTimeOfListing          18928 non-null   float64
 59  LoanCurrentDaysDelinquent            113346 non-null  int64  
 60  LoanFirstDefaultedCycleNumber        16776 non-null   float64
 61  LoanMonthsSinceOrigination           113346 non-null  int64  
 62  LoanNumber                           113346 non-null  int64  
 63  LoanOriginalAmount                   113346 non-null  int64  
 64  LoanOriginationDate                  113346 non-null  object 
 65  LoanOriginationQuarter               113346 non-null  object 
 66  MemberKey                            113346 non-null  object 
 67  MonthlyLoanPayment                   113346 non-null  float64
 68  LP_CustomerPayments                  113346 non-null  float64
 69  LP_CustomerPrincipalPayments         113346 non-null  float64
 70  LP_InterestandFees                   113346 non-null  float64
 71  LP_ServiceFees                       113346 non-null  float64
 72  LP_CollectionFees                    113346 non-null  float64
 73  LP_GrossPrincipalLoss                113346 non-null  float64
 74  LP_NetPrincipalLoss                  113346 non-null  float64
 75  LP_NonPrincipalRecoverypayments      113346 non-null  float64
 76  PercentFunded                        113346 non-null  float64
 77  Recommendations                      113346 non-null  int64  
 78  InvestmentFromFriendsCount           113346 non-null  int64  
 79  InvestmentFromFriendsAmount          113346 non-null  float64
 80  Investors                            113346 non-null  int64  
dtypes: bool(3), float64(50), int64(11), object(17)
memory usage: 68.6+ MB

#state with the Highest Number of Borrowers.
loan["BorrowerState"].value_counts().head(1)
pd.DataFrame( { "Top Borrower State": loan["BorrowerState"].value_counts().head(1).index })

loan["IsBorrowerHomeowner"].value_counts()

True     57478
False    55868
Name: IsBorrowerHomeowner, dtype: int64

# number of Borrowers who
sns.countplot(x = loan["IsBorrowerHomeowner"]);

#Majority of the portion of Borrowers 
loan["EmploymentStatus"].value_counts()
plt.figure(figsize=(10,8))
sns.countplot(x= loan["EmploymentStatus"]);

#Majority of the Borrowers have an income
loan["StatedMonthlyIncome"].round()
sns.set_style("whitegrid")
plt.figure(figsize=(10,8))
plt.hist(x = loan["StatedMonthlyIncome"].round().sort_values(ascending = False), bins = np.arange(0, 30000, 2500));

#Majority of the Borrowers have a Credit Score
loan["CreditScoreRangeLower"]
fig, axes = plt.subplots(nrows=2,figsize = (12,6), sharey=False, sharex=True,)
sns.boxplot(x='CreditScoreRangeLower', data=loan, color='yellow', ax=axes[0])
axes[0].set_title('Credit Score Range Lower Distribution')
axes[0].set_xlabel('Credit Score Range Lower')
sns.boxplot(x='CreditScoreRangeUpper', data=loan, color='red', ax=axes[1])
axes[1].set_title('Credit Score Range Upper Distribution')
axes[1].set_xlabel('Credit Score Range Upper');

#Does Home Ownership lead to Higher Prosper Score ?
fig = plt.figure(figsize=(10,6))
sns.boxenplot(x ="IsBorrowerHomeowner", y = "ProsperScore", data =loan )

<AxesSubplot:xlabel='IsBorrowerHomeowner', ylabel='ProsperScore'>

#Most of the Borrowers have Prosper Scores between
loan["ProsperScore"]
plt.figure(figsize = (10,8))
plt.hist(x = loan["ProsperScore"]);

#Most of the loans are given for a period 
plt.figure(figsize = (10,8))
loan["Term"].value_counts().plot(kind = "bar")
plt.xticks(rotation = 0);

#number of Loans have doubled in number from the previous years
dt = pd.to_datetime(loan["ListingCreationDate"])
year = dt.dt.year
sns.countplot(x = year)
plt.xlabel("Year")
plt.ylabel("No. of Loans");

#Borrowers with Higher Monthly Income are assigned 
income_score = loan[[ "StatedMonthlyIncome", "ProsperScore"]].dropna()
plt.figure(figsize = (12,6))
sns.pointplot(x = "ProsperScore", y = "StatedMonthlyIncome", data = income_score);

#Borrowers with Prosper Score higher than or equal to 8, are more likely to own 
borrower_score = loan[["ProsperScore", "IsBorrowerHomeowner"]].dropna()
plt.figure(figsize = (10,8))
sns.countplot(x = "ProsperScore", data = borrower_score, hue = "IsBorrowerHomeowner");

#Employed Borrowers take loans of Higher Amounts when compared to Borrower's who are Retired or Unemployed.
plt.figure(figsize = (10,8))
loan["EmploymentStatus"].value_counts().plot(kind = "bar");

#Unemployed Borrowers are charged a Higher Interest Rate in comparison to Employed and Retired Borrowers.
loan[["BorrowerRate","EmploymentStatus"]]
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='EmploymentStatus', data=loan);

#Employed Borrowers are assigned a Higher Prosper Score.
e_score = loan[["EmploymentStatus", "ProsperScore"]].dropna()
e_score 
plt.figure(figsize = (10,8))
#sns.pointplot(y="ProsperScore", x="EmploymentStatus", data=e_score);
sns.boxplot(x='ProsperScore', y='EmploymentStatus', data=loan);

#The Interest Rate is Negatively Correlated with Prosper Score. Higher Prosper Score leads to Lower Interest Rates.
pb = loan[["ProsperScore", "BorrowerRate"]].dropna()
pb
fig = plt.figure(figsize=(12,6))
sns.pointplot(y='BorrowerRate', x='ProsperScore', data=pb);

#The Loan Amounts given has increased over the years.
loan["LoanOriginalAmount"]
pd.to_datetime(loan["LoanOriginationDate"]).dt.year
year_amount = pd.DataFrame({"LoanYear" :pd.to_datetime(loan["LoanOriginationDate"]).dt.year, "LoanAmount":loan["LoanOriginalAmount"]})
ya = year_amount.groupby("LoanYear").sum()
plt.figure(figsize = (10,8))
sns.pointplot(y=loan["LoanOriginalAmount"], x=pd.to_datetime(loan["LoanOriginationDate"]).dt.year)
            
plt.ylabel("Loan Amount");

#Higher Loan Amount also leads to a Higher Loan Term.
loan[["LoanOriginalAmount", "Term"]]
plt.figure(figsize = (10,8))
sns.boxplot(y='LoanOriginalAmount', x='Term',
            data=loan);

#he Prosper Score is Positively Correlated with On-Time Monthly Payments. The borrowers with Higher Prosper Scores are more likely to make On-Time Monthly Payments.
fig = plt.figure(figsize=(12,6))
sns.scatterplot(x='LoanOriginalAmount', y='MonthlyLoanPayment',
               data=loan, hue='ProsperScore', palette='OrRd');

#A Higher Credit Score leads to a Higher Prosper Score.
credit = loan[["CreditScoreRangeLower", "CreditScoreRangeUpper", "ProsperScore"]].dropna()
fig = plt.figure(figsize=(12,6))
sns.pointplot(x="CreditScoreRangeLower", y='ProsperScore',
            data=credit);

#The Estimated Loss reduces with an increase in the Borrowers Prosper Score.
eloss = loan[["EstimatedLoss", "ProsperScore"]].dropna()
plt.figure(figsize=(10,8))
sns.pointplot(x="ProsperScore", y="EstimatedLoss",
            data=eloss);

#Borrowers who have Fewer Current Delinquencies and Higher On-Time Payments, are more likely to have Higher Number of Loans.
loan[["CurrentDelinquencies", "OnTimeProsperPayments"]]
plt.figure(figsize = (12,8))
sns.scatterplot(y = "OnTimeProsperPayments", x = "CurrentDelinquencies",hue='TotalProsperLoans', data = loan );

#The Estimated Loss increase with the increase in Interest Rate and Yield.
loan[["EstimatedLoss", "BorrowerRate","LenderYield"]]
plt.figure(figsize = (12,8))
sns.scatterplot(x='BorrowerRate', y='LenderYield',hue='EstimatedLoss', data=loan);

	ListingKey	ListingNumber	ListingCreationDate	CreditGrade	Term	LoanStatus	ClosedDate	BorrowerAPR	BorrowerRate	LenderYield	EstimatedEffectiveYield	EstimatedLoss	EstimatedReturn	ProsperRating (numeric)	ProsperRating (Alpha)	ProsperScore	ListingCategory (numeric)	BorrowerState	Occupation	EmploymentStatus	EmploymentStatusDuration	IsBorrowerHomeowner	CurrentlyInGroup	GroupKey	DateCreditPulled	CreditScoreRangeLower	CreditScoreRangeUpper	FirstRecordedCreditLine	CurrentCreditLines	OpenCreditLines	TotalCreditLinespast7years	OpenRevolvingAccounts	OpenRevolvingMonthlyPayment	InquiriesLast6Months	TotalInquiries	CurrentDelinquencies	AmountDelinquent	DelinquenciesLast7Years	PublicRecordsLast10Years	PublicRecordsLast12Months	RevolvingCreditBalance	BankcardUtilization	AvailableBankcardCredit	TotalTrades	TradesNeverDelinquent (percentage)	TradesOpenedLast6Months	DebtToIncomeRatio	IncomeRange	IncomeVerifiable	StatedMonthlyIncome	LoanKey	TotalProsperLoans	TotalProsperPaymentsBilled	OnTimeProsperPayments	ProsperPaymentsLessThanOneMonthLate	ProsperPaymentsOneMonthPlusLate	ProsperPrincipalBorrowed	ProsperPrincipalOutstanding	ScorexChangeAtTimeOfListing	LoanCurrentDaysDelinquent	LoanFirstDefaultedCycleNumber	LoanMonthsSinceOrigination	LoanNumber	LoanOriginalAmount	LoanOriginationDate	LoanOriginationQuarter	MemberKey	MonthlyLoanPayment	LP_CustomerPayments	LP_CustomerPrincipalPayments	LP_InterestandFees	LP_ServiceFees	LP_CollectionFees	LP_GrossPrincipalLoss	LP_NetPrincipalLoss	LP_NonPrincipalRecoverypayments	PercentFunded	Recommendations	InvestmentFromFriendsCount	InvestmentFromFriendsAmount	Investors
0	1021339766868145413AB3B	193129	2007-08-26 19:09:29.263000000	C	36	Completed	2009-08-14 00:00:00	0.16516	0.1580	0.1380	NaN	NaN	NaN	NaN	NaN	NaN	0	CO	Other	Self-employed	2.0	True	True	NaN	2007-08-26 18:41:46.780000000	640.0	659.0	2001-10-11 00:00:00	5.0	4.0	12.0	1	24.0	3.0	3.0	2.0	472.0	4.0	0.0	0.0	0.0	0.00	1500.0	11.0	0.81	0.0	0.17	$25,000-49,999	True	3083.333333	E33A3400205839220442E84	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	78	19141	9425	2007-09-12 00:00:00	Q3 2007	1F3E3376408759268057EDA	330.43	11396.1400	9425.00	1971.1400	-133.18	0.0	0.0	0.0	0.0	1.0	0	0	0.0	258
1	10273602499503308B223C1	1209647	2014-02-27 08:28:07.900000000	NaN	36	Current	NaN	0.12016	0.0920	0.0820	0.07960	0.0249	0.05470	6.0	A	7.0	2	CO	Professional	Employed	44.0	False	False	NaN	2014-02-27 08:28:14	680.0	699.0	1996-03-18 00:00:00	14.0	14.0	29.0	13	389.0	3.0	5.0	0.0	0.0	0.0	1.0	0.0	3989.0	0.21	10266.0	29.0	1.00	2.0	0.18	$50,000-74,999	True	6125.000000	9E3B37071505919926B1D82	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	0	134815	10000	2014-03-03 00:00:00	Q1 2014	1D13370546739025387B2F4	318.93	0.0000	0.00	0.0000	0.00	0.0	0.0	0.0	0.0	1.0	0	0	0.0	1
2	0EE9337825851032864889A	81716	2007-01-05 15:00:47.090000000	HR	36	Completed	2009-12-17 00:00:00	0.28269	0.2750	0.2400	NaN	NaN	NaN	NaN	NaN	NaN	0	GA	Other	Not available	NaN	False	True	783C3371218786870A73D20	2007-01-02 14:09:10.060000000	480.0	499.0	2002-07-27 00:00:00	NaN	NaN	3.0	0	0.0	0.0	1.0	1.0	NaN	0.0	0.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.06	Not displayed	True	2083.333333	6954337960046817851BCB2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	86	6466	3001	2007-01-17 00:00:00	Q1 2007	5F7033715035555618FA612	123.32	4186.6300	3001.00	1185.6300	-24.20	0.0	0.0	0.0	0.0	1.0	0	0	0.0	41
3	0EF5356002482715299901A	658116	2012-10-22 11:02:35.010000000	NaN	36	Current	NaN	0.12528	0.0974	0.0874	0.08490	0.0249	0.06000	6.0	A	9.0	16	GA	Skilled Labor	Employed	113.0	True	False	NaN	2012-10-22 11:02:32	800.0	819.0	1983-02-28 00:00:00	5.0	5.0	29.0	7	115.0	0.0	1.0	4.0	10056.0	14.0	0.0	0.0	1444.0	0.04	30754.0	26.0	0.76	0.0	0.15	$25,000-49,999	True	2875.000000	A0393664465886295619C51	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	16	77296	10000	2012-11-01 00:00:00	Q4 2012	9ADE356069835475068C6D2	321.45	5143.2000	4091.09	1052.1100	-108.01	0.0	0.0	0.0	0.0	1.0	0	0	0.0	158
4	0F023589499656230C5E3E2	909464	2013-09-14 18:38:39.097000000	NaN	36	Current	NaN	0.24614	0.2085	0.1985	0.18316	0.0925	0.09066	3.0	D	4.0	2	MN	Executive	Employed	44.0	True	False	NaN	2013-09-14 18:38:44	680.0	699.0	2004-02-20 00:00:00	19.0	19.0	49.0	6	220.0	1.0	9.0	0.0	0.0	0.0	0.0	0.0	6193.0	0.81	695.0	39.0	0.95	2.0	0.26	$100,000+	True	9583.333333	A180369302188889200689E	1.0	11.0	11.0	0.0	0.0	11000.0	9947.90	NaN	0	NaN	6	102670	15000	2013-09-20 00:00:00	Q3 2013	36CE356043264555721F06C	563.97	2819.8500	1563.22	1256.6300	-60.27	0.0	0.0	0.0	0.0	1.0	0	0	0.0	20
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
113932	E6D9357655724827169606C	753087	2013-04-14 05:55:02.663000000	NaN	36	Current	NaN	0.22354	0.1864	0.1764	0.16490	0.0699	0.09500	4.0	C	5.0	1	IL	Food Service Management	Employed	246.0	True	False	NaN	2013-04-14 05:54:58	700.0	719.0	1997-09-01 00:00:00	9.0	9.0	41.0	9	209.0	0.0	0.0	0.0	0.0	7.0	1.0	0.0	7714.0	0.80	1886.0	37.0	0.83	3.0	0.13	$50,000-74,999	True	4333.333333	9BD7367919051593140DB62	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	11	88485	10000	2013-04-22 00:00:00	Q2 2013	2EC435768441332602FDC15	364.74	3647.4000	2238.38	1409.0200	-75.58	0.0	0.0	0.0	0.0	1.0	0	0	0.0	1
113933	E6DB353036033497292EE43	537216	2011-11-03 20:42:55.333000000	NaN	36	FinalPaymentInProgress	NaN	0.13220	0.1110	0.1010	0.10070	0.0200	0.08070	6.0	A	8.0	7	PA	Professional	Employed	21.0	True	False	NaN	2011-11-03 20:42:53	700.0	719.0	1992-01-17 00:00:00	14.0	13.0	39.0	9	495.0	1.0	4.0	1.0	5062.0	4.0	0.0	0.0	15743.0	0.69	6658.0	39.0	0.92	0.0	0.11	$75,000-99,999	True	8041.666667	62D93634569816897D5A276	3.0	60.0	60.0	0.0	0.0	33501.0	4815.42	-26.0	0	NaN	28	55801	2000	2011-11-07 00:00:00	Q4 2011	55C4336679182766893E4FC	65.57	2330.5500	1997.16	333.3900	-30.05	0.0	0.0	0.0	0.0	1.0	0	0	0.0	22
113934	E6E13596170052029692BB1	1069178	2013-12-13 05:49:12.703000000	NaN	60	Current	NaN	0.23984	0.2150	0.2050	0.18828	0.1025	0.08578	3.0	D	3.0	1	TX	Other	Employed	84.0	True	False	NaN	2013-12-13 05:49:15	700.0	719.0	2002-02-25 00:00:00	12.0	12.0	25.0	9	521.0	1.0	2.0	0.0	0.0	0.0	0.0	0.0	22147.0	0.73	7853.0	25.0	1.00	0.0	0.51	$25,000-49,999	True	2875.000000	DD1A370200396006300ACA0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	3	123122	10000	2013-12-23 00:00:00	Q4 2013	0FE0370029359765342FDB5	273.35	546.7000	183.15	363.5500	-16.91	0.0	0.0	0.0	0.0	1.0	0	0	0.0	119
113935	E6EB3531504622671970D9E	539056	2011-11-14 13:18:26.597000000	NaN	60	Completed	2013-08-13 00:00:00	0.28408	0.2605	0.2505	0.24450	0.0850	0.15950	4.0	C	5.0	2	GA	Food Service	Full-time	94.0	True	False	NaN	2011-11-14 13:18:24	680.0	699.0	1993-12-01 00:00:00	11.0	11.0	22.0	7	488.0	1.0	4.0	0.0	0.0	0.0	1.0	0.0	11956.0	0.69	4137.0	19.0	0.80	1.0	0.48	$25,000-49,999	True	3875.000000	589536350469116027ED11B	1.0	16.0	16.0	0.0	0.0	5000.0	3264.37	-4.0	0	NaN	28	56401	15000	2011-11-21 00:00:00	Q4 2011	A33834861822272782621C8	449.55	21122.5600	15000.00	6122.5600	-235.05	0.0	0.0	0.0	0.0	1.0	0	0	0.0	274
113936	E6ED3600409833199F711B7	1140093	2014-01-15 09:27:37.657000000	NaN	36	Current	NaN	0.13189	0.1039	0.0939	0.09071	0.0299	0.06081	6.0	A	7.0	1	NY	Professor	Employed	244.0	False	False	NaN	2014-01-15 09:27:40	680.0	699.0	1995-01-01 00:00:00	10.0	9.0	44.0	8	289.0	0.0	1.0	1.0	257.0	3.0	1.0	0.0	6166.0	0.80	675.0	36.0	0.75	0.0	0.23	$50,000-74,999	True	4583.333333	00AF3704550953269A64E40	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN	2	127508	2000	2014-01-21 00:00:00	Q1 2014	CE1E3704648000761C9F724	64.90	64.3307	47.25	17.0807	-1.70	0.0	0.0	0.0	0.0	1.0	0	0	0.0	1

Search This Blog

Portfolio

Loan Data Analysis

Comments

Post a Comment