Analyzing Credit Card Limits

BDA503: Default Of Credit Card Clients

================================================================
Group Name: Paranormal Distribution
Instructor: Associate Prof. Özgür Özlük
Data Set: https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
================================================================

Load Required Modules

#if you dont use these libraries before, please install them first!
#install.packages('ggplot2')
#install.packages('RColorBrewer')
#install.packages('gridExtra')
#install.packages('corrplot')
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(corrplot)

Load and Investigate Data

ccdata <- read.csv("default of credit card clients.csv")
#Before tidying data set
dim(ccdata)
[1] 30000    25
#Before tidying data set
str(ccdata)
'data.frame':   30000 obs. of  25 variables:
 $ ID         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ LIMIT_BAL  : int  20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ SEX        : int  2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION  : int  2 2 2 2 2 1 1 2 3 3 ...
 $ MARRIAGE   : int  1 2 2 1 1 2 2 2 1 2 ...
 $ AGE        : int  24 26 34 37 57 37 29 23 28 35 ...
 $ PAY_0      : int  2 -1 0 0 -1 0 0 0 0 -2 ...
 $ PAY_2      : int  2 2 0 0 0 0 0 -1 0 -2 ...
 $ PAY_3      : int  -1 0 0 0 -1 0 0 -1 2 -2 ...
 $ PAY_4      : int  -1 0 0 0 0 0 0 0 0 -2 ...
 $ PAY_5      : int  -2 0 0 0 0 0 0 0 0 -1 ...
 $ PAY_6      : int  -2 2 0 0 0 0 0 -1 0 -1 ...
 $ BILL_AMT1  : int  3913 2682 29239 46990 8617 64400 367965 11876 11285 0 ...
 $ BILL_AMT2  : int  3102 1725 14027 48233 5670 57069 412023 380 14096 0 ...
 $ BILL_AMT3  : int  689 2682 13559 49291 35835 57608 445007 601 12108 0 ...
 $ BILL_AMT4  : int  0 3272 14331 28314 20940 19394 542653 221 12211 0 ...
 $ BILL_AMT5  : int  0 3455 14948 28959 19146 19619 483003 -159 11793 13007 ...
 $ BILL_AMT6  : int  0 3261 15549 29547 19131 20024 473944 567 3719 13912 ...
 $ PAY_AMT1   : int  0 0 1518 2000 2000 2500 55000 380 3329 0 ...
 $ PAY_AMT2   : int  689 1000 1500 2019 36681 1815 40000 601 0 0 ...
 $ PAY_AMT3   : int  0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4   : int  0 1000 1000 1100 9000 1000 20239 581 1000 13007 ...
 $ PAY_AMT5   : int  0 0 1000 1069 689 1000 13750 1687 1000 1122 ...
 $ PAY_AMT6   : int  0 2000 5000 1000 679 800 13770 1542 1000 0 ...
 $ DEF_PAYMENT: int  1 1 0 0 0 0 0 0 0 0 ...
#Before tidying data set
summary(ccdata)
       ID          LIMIT_BAL            SEX          EDUCATION    
 Min.   :    1   Min.   :  10000   Min.   :1.000   Min.   :0.000  
 1st Qu.: 7501   1st Qu.:  50000   1st Qu.:1.000   1st Qu.:1.000  
 Median :15000   Median : 140000   Median :2.000   Median :2.000  
 Mean   :15000   Mean   : 167484   Mean   :1.604   Mean   :1.853  
 3rd Qu.:22500   3rd Qu.: 240000   3rd Qu.:2.000   3rd Qu.:2.000  
 Max.   :30000   Max.   :1000000   Max.   :2.000   Max.   :6.000  
    MARRIAGE          AGE            PAY_0             PAY_2        
 Min.   :0.000   Min.   :21.00   Min.   :-2.0000   Min.   :-2.0000  
 1st Qu.:1.000   1st Qu.:28.00   1st Qu.:-1.0000   1st Qu.:-1.0000  
 Median :2.000   Median :34.00   Median : 0.0000   Median : 0.0000  
 Mean   :1.552   Mean   :35.49   Mean   :-0.0167   Mean   :-0.1338  
 3rd Qu.:2.000   3rd Qu.:41.00   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
 Max.   :3.000   Max.   :79.00   Max.   : 8.0000   Max.   : 8.0000  
     PAY_3             PAY_4             PAY_5             PAY_6        
 Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000  
 1st Qu.:-1.0000   1st Qu.:-1.0000   1st Qu.:-1.0000   1st Qu.:-1.0000  
 Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 0.0000  
 Mean   :-0.1662   Mean   :-0.2207   Mean   :-0.2662   Mean   :-0.2911  
 3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
 Max.   : 8.0000   Max.   : 8.0000   Max.   : 8.0000   Max.   : 8.0000  
   BILL_AMT1         BILL_AMT2        BILL_AMT3         BILL_AMT4      
 Min.   :-165580   Min.   :-69777   Min.   :-157264   Min.   :-170000  
 1st Qu.:   3559   1st Qu.:  2985   1st Qu.:   2666   1st Qu.:   2327  
 Median :  22382   Median : 21200   Median :  20088   Median :  19052  
 Mean   :  51223   Mean   : 49179   Mean   :  47013   Mean   :  43263  
 3rd Qu.:  67091   3rd Qu.: 64006   3rd Qu.:  60165   3rd Qu.:  54506  
 Max.   : 964511   Max.   :983931   Max.   :1664089   Max.   : 891586  
   BILL_AMT5        BILL_AMT6          PAY_AMT1         PAY_AMT2      
 Min.   :-81334   Min.   :-339603   Min.   :     0   Min.   :      0  
 1st Qu.:  1763   1st Qu.:   1256   1st Qu.:  1000   1st Qu.:    833  
 Median : 18104   Median :  17071   Median :  2100   Median :   2009  
 Mean   : 40311   Mean   :  38872   Mean   :  5664   Mean   :   5921  
 3rd Qu.: 50190   3rd Qu.:  49198   3rd Qu.:  5006   3rd Qu.:   5000  
 Max.   :927171   Max.   : 961664   Max.   :873552   Max.   :1684259  
    PAY_AMT3         PAY_AMT4         PAY_AMT5           PAY_AMT6       
 Min.   :     0   Min.   :     0   Min.   :     0.0   Min.   :     0.0  
 1st Qu.:   390   1st Qu.:   296   1st Qu.:   252.5   1st Qu.:   117.8  
 Median :  1800   Median :  1500   Median :  1500.0   Median :  1500.0  
 Mean   :  5226   Mean   :  4826   Mean   :  4799.4   Mean   :  5215.5  
 3rd Qu.:  4505   3rd Qu.:  4013   3rd Qu.:  4031.5   3rd Qu.:  4000.0  
 Max.   :896040   Max.   :621000   Max.   :426529.0   Max.   :528666.0  
  DEF_PAYMENT    
 Min.   :0.0000  
 1st Qu.:0.0000  
 Median :0.0000  
 Mean   :0.2212  
 3rd Qu.:0.0000  
 Max.   :1.0000  

Reshaping Data 1: Creating New Variables

ccdata$workstate <- ""
ccdata$genderH <- ""
ccdata$educationH <- ""
ccdata$maritalH <- ""

for (i in 1:nrow(ccdata)) {
  if ((ccdata[i,7] + ccdata[i,8] +ccdata[i,9]+ccdata[i,10] +ccdata[i,11]+ccdata[i,12]) <= 0){
    ccdata[i,26] <- "YES"  
  }
  else {
    ccdata[i,26] <- "NO"         
  }
}

for (i in 1:nrow(ccdata)) {
  if (ccdata[i,3] == 1) {
    ccdata[i,27] <- "Male"  
  }
  else {
    ccdata[i,27] <- "Female"         
  }
}

for (i in 1:nrow(ccdata)) {
  if (ccdata[i,4] == 1) {
    ccdata[i,28] <- "Graduate"
  } else if (ccdata [i,4] == 2) {
    ccdata[i,28] <- "University" 
  } else if (ccdata [i,4] == 3) {
    ccdata[i,28] <- "High School" 
  } else {
    ccdata[i,28] <- "Unknown" 
  }
}

for (i in 1:nrow(ccdata)) {
  if(ccdata[i,5] == 1) {
    ccdata[i,29] <- "Married"
  } else if (ccdata[i,5] == 2) {
    ccdata[i,29] <- "Single"
  } else {
    ccdata[i,29] <- "Other"
  }
}

Cut a Variable

ccdata$AGE.bucket<-cut(ccdata$AGE,c(10,20,30,40,50,60,70))

Reshaping Data 2: Converting Quantitative Variables To New Factorial Variables

ccdata$workstate <-factor(ccdata$workstate)
ccdata$SEX <-factor(ccdata$SEX)
ccdata$EDUCATION <- factor(ccdata$EDUCATION)
ccdata$MARRIAGE <- factor(ccdata$MARRIAGE)
ccdata$AGEf <- factor(ccdata$AGE)
ccdata$DEF_PAYMENT<-factor(ccdata$DEF_PAYMENT)
ccdata$genderH <- factor(ccdata$genderH)
ccdata$educationH <- factor(ccdata$educationH)
ccdata$maritalH <- factor(ccdata$maritalH)

Reshaping Data 3: Removing Variables After Reshaping 1

ccdata = subset(ccdata, select = -c(PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6))

Investigate Data Again

#After tidying data set
dim(ccdata)
[1] 30000    25
#After tidying data set
str(ccdata)
'data.frame':   30000 obs. of  25 variables:
 $ ID         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ LIMIT_BAL  : int  20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ SEX        : Factor w/ 2 levels "1","2": 2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION  : Factor w/ 7 levels "0","1","2","3",..: 3 3 3 3 3 2 2 3 4 4 ...
 $ MARRIAGE   : Factor w/ 4 levels "0","1","2","3": 2 3 3 2 2 3 3 3 2 3 ...
 $ AGE        : int  24 26 34 37 57 37 29 23 28 35 ...
 $ BILL_AMT1  : int  3913 2682 29239 46990 8617 64400 367965 11876 11285 0 ...
 $ BILL_AMT2  : int  3102 1725 14027 48233 5670 57069 412023 380 14096 0 ...
 $ BILL_AMT3  : int  689 2682 13559 49291 35835 57608 445007 601 12108 0 ...
 $ BILL_AMT4  : int  0 3272 14331 28314 20940 19394 542653 221 12211 0 ...
 $ BILL_AMT5  : int  0 3455 14948 28959 19146 19619 483003 -159 11793 13007 ...
 $ BILL_AMT6  : int  0 3261 15549 29547 19131 20024 473944 567 3719 13912 ...
 $ PAY_AMT1   : int  0 0 1518 2000 2000 2500 55000 380 3329 0 ...
 $ PAY_AMT2   : int  689 1000 1500 2019 36681 1815 40000 601 0 0 ...
 $ PAY_AMT3   : int  0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4   : int  0 1000 1000 1100 9000 1000 20239 581 1000 13007 ...
 $ PAY_AMT5   : int  0 0 1000 1069 689 1000 13750 1687 1000 1122 ...
 $ PAY_AMT6   : int  0 2000 5000 1000 679 800 13770 1542 1000 0 ...
 $ DEF_PAYMENT: Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
 $ workstate  : Factor w/ 2 levels "NO","YES": 2 1 2 2 2 2 2 2 1 2 ...
 $ genderH    : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 2 2 1 1 2 ...
 $ educationH : Factor w/ 4 levels "Graduate","High School",..: 3 3 3 3 3 1 1 3 2 2 ...
 $ maritalH   : Factor w/ 3 levels "Married","Other",..: 1 3 3 1 1 3 3 3 1 3 ...
 $ AGE.bucket : Factor w/ 6 levels "(10,20]","(20,30]",..: 2 2 3 3 5 3 2 2 2 3 ...
 $ AGEf       : Factor w/ 56 levels "21","22","23",..: 4 6 14 17 37 17 9 3 8 15 ...
#After tidying data set
summary(ccdata)
       ID          LIMIT_BAL       SEX       EDUCATION MARRIAGE 
 Min.   :    1   Min.   :  10000   1:11888   0:   14   0:   54  
 1st Qu.: 7501   1st Qu.:  50000   2:18112   1:10585   1:13659  
 Median :15000   Median : 140000             2:14030   2:15964  
 Mean   :15000   Mean   : 167484             3: 4917   3:  323  
 3rd Qu.:22500   3rd Qu.: 240000             4:  123            
 Max.   :30000   Max.   :1000000             5:  280            
                                             6:   51            
      AGE          BILL_AMT1         BILL_AMT2        BILL_AMT3      
 Min.   :21.00   Min.   :-165580   Min.   :-69777   Min.   :-157264  
 1st Qu.:28.00   1st Qu.:   3559   1st Qu.:  2985   1st Qu.:   2666  
 Median :34.00   Median :  22382   Median : 21200   Median :  20088  
 Mean   :35.49   Mean   :  51223   Mean   : 49179   Mean   :  47013  
 3rd Qu.:41.00   3rd Qu.:  67091   3rd Qu.: 64006   3rd Qu.:  60165  
 Max.   :79.00   Max.   : 964511   Max.   :983931   Max.   :1664089  
                                                                     
   BILL_AMT4         BILL_AMT5        BILL_AMT6          PAY_AMT1     
 Min.   :-170000   Min.   :-81334   Min.   :-339603   Min.   :     0  
 1st Qu.:   2327   1st Qu.:  1763   1st Qu.:   1256   1st Qu.:  1000  
 Median :  19052   Median : 18104   Median :  17071   Median :  2100  
 Mean   :  43263   Mean   : 40311   Mean   :  38872   Mean   :  5664  
 3rd Qu.:  54506   3rd Qu.: 50190   3rd Qu.:  49198   3rd Qu.:  5006  
 Max.   : 891586   Max.   :927171   Max.   : 961664   Max.   :873552  
                                                                      
    PAY_AMT2          PAY_AMT3         PAY_AMT4         PAY_AMT5       
 Min.   :      0   Min.   :     0   Min.   :     0   Min.   :     0.0  
 1st Qu.:    833   1st Qu.:   390   1st Qu.:   296   1st Qu.:   252.5  
 Median :   2009   Median :  1800   Median :  1500   Median :  1500.0  
 Mean   :   5921   Mean   :  5226   Mean   :  4826   Mean   :  4799.4  
 3rd Qu.:   5000   3rd Qu.:  4505   3rd Qu.:  4013   3rd Qu.:  4031.5  
 Max.   :1684259   Max.   :896040   Max.   :621000   Max.   :426529.0  
                                                                       
    PAY_AMT6        DEF_PAYMENT workstate     genderH     
 Min.   :     0.0   0:23364     NO : 7133   Female:18112  
 1st Qu.:   117.8   1: 6636     YES:22867   Male  :11888  
 Median :  1500.0                                         
 Mean   :  5215.5                                         
 3rd Qu.:  4000.0                                         
 Max.   :528666.0                                         
                                                          
       educationH       maritalH       AGE.bucket         AGEf      
 Graduate   :10585   Married:13659   (10,20]:    0   29     : 1605  
 High School: 4917   Other  :  377   (20,30]:11013   27     : 1477  
 University :14030   Single :15964   (30,40]:10713   28     : 1409  
 Unknown    :  468                   (40,50]: 6005   30     : 1395  
                                     (50,60]: 1997   26     : 1256  
                                     (60,70]:  257   31     : 1217  
                                     NA's   :   15   (Other):21641  

Balance Limit by Gender, Education, Work State

# Balance limits by gender and education
d1 <- ggplot(ccdata, aes(factor(genderH), (LIMIT_BAL/1000), fill=educationH)) + 
  geom_boxplot() +
  xlab("Gender") + 
  ylab("BLimit(x1000 NT$)") + 
  scale_fill_brewer(palette = "Accent")

# Balance limits by education and gender
d2 <- ggplot(ccdata, aes(factor(educationH), (LIMIT_BAL/1000), fill=genderH)) + 
  geom_boxplot() +
  xlab("Education") + 
  ylab("BLimit(x1000 NT$)") + 
  scale_fill_brewer(palette = "Paired")

# Balance limits by workstate and education
d3 <-ggplot(ccdata, aes(factor(educationH), (LIMIT_BAL/1000), fill=workstate)) + 
  geom_boxplot() +
  xlab("Education") + 
  ylab("BLimit(x1000 NT$)") 

grid.arrange(d1, d2, d3)

#Result: When we compare the balance limits with gender, education and work status, we saw that gender has no effects on balance limit determination process of bank while the education level is has a positive effect on this process. Also it can be seen that work status is a very important factor at balance limit determination.

Relation Between Marital Status & Balance Limits By Gender

ggplot(ccdata, aes(factor(maritalH), (LIMIT_BAL/1000), fill=genderH)) + 
  geom_boxplot() +
  xlab("Marital Status") + 
  ylab("Balance Limit ( x 1000 NT$)") + 
  coord_cartesian(ylim = c(0,350)) +
  scale_fill_brewer(palette = "Paired")

#Result: By this graph, we can see it again that, there is no change at females side such as balance limits depending on their marital status, however it changes a lot of things on males’ side starting with the expenditures which is the reason on increased balance limits. 

Histogram of Limit Balance & Default Payment

ggplot(aes(x = ccdata$LIMIT_BAL/1000), data = ccdata) +
geom_histogram(aes(fill = ccdata$DEF_PAYMENT)) +
xlab("Balance Limit x 1000") +
ylab("Count") +
scale_fill_discrete(name="Default Payment Next Month",
                      breaks=c(0, 1),
                      labels=c("No", "Yes")) +
xlim(c(0,750)) +
facet_wrap(~educationH)

#Result: Balance limits and count of defaulted clients are almost same for University and Graduate Level. Additionally, the ratio of defaulted clients at high school level seems almost the same as the university and graduate levels.

Relation Between Education & Default Payment

d1 <- ggplot(ccdata, aes(x=DEF_PAYMENT)) + 
  geom_histogram(stat="count",color='red',fill='orange') +
  xlab("Default Payment Status") + ylab("Customer Count") + 
  facet_wrap(~educationH)

d2 <- ggplot(ccdata, aes(x=DEF_PAYMENT),aes(y=stat_count(gender))) + 
  geom_bar(aes(fill=factor(ccdata$educationH))) +
  xlab("Default Payment Status")+ylab("Customer Count") + 
  facet_wrap(~genderH)+
  scale_fill_discrete(name="Education")

grid.arrange(d1, d2, ncol=1)

#Result: There is no effect of education level on default event occurrence.

Balance Limits By Age Groups & Education

ggplot(data = subset(ccdata,!is.na(AGE.bucket)), aes(factor(educationH), (LIMIT_BAL/1000), fill=AGE.bucket)) + 
  geom_boxplot() +
  xlab("Education") + 
  ylab("Balance Limit ( x 1000 NT$)") + 
  coord_cartesian(ylim = c(0,500)) +
  scale_fill_brewer(palette = "Accent")

#Result: Reflection of the education levels on determination of balance limits for clients is increasing by later ages when we compare the averages of each age buckets with each other grouped by their education levels.

Data Heading Conversation of Customer Bills

ccdata$SEP2005<-ccdata$BILL_AMT1
ccdata$AUG2005<-ccdata$BILL_AMT2
ccdata$JUL2005<-ccdata$BILL_AMT3
ccdata$JUN2005<-ccdata$BILL_AMT4
ccdata$MAY2005<-ccdata$BILL_AMT5
ccdata$APR2005<-ccdata$BILL_AMT6

Expdenditure By Months

apr <- ggplot(aes(x=AGE,y=APR2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in April") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="orange") + geom_smooth(stat='summary', fun.y=mean)

may <- ggplot(aes(x=AGE,y=MAY2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in May") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="blue") + geom_smooth(stat='summary', fun.y=mean)

jun <- ggplot(aes(x=AGE,y=JUN2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in June") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="green") + geom_smooth(stat='summary', fun.y=mean)

jul <- ggplot(aes(x=AGE,y=JUL2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in July") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="orange") + geom_smooth(stat='summary', fun.y=mean)

aug <- ggplot(aes(x=AGE,y=AUG2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in August") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="blue") + geom_smooth(stat='summary', fun.y=mean)

sep <- ggplot(aes(x=AGE,y=SEP2005/1000),data=ccdata) +
  xlab("Age") + 
  ylab("Amount of Bill in September") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+  
  geom_jitter(alpha=0.3, color="green") + geom_smooth(stat='summary', fun.y=mean)
  
grid.arrange(apr,may,jun,jul,aug,sep,ncol=3)

#Result: Distribution of expenditures on ages doesn’t show any differentiation between months and the average expenditure by age is almost same for each month.

Correlations Between Limit Balance, Bill Amounts & Payments

M <- cor(subset(ccdata, select = c(LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6)))
corrplot(M, method="number")

#Result: When we reflect the correlations between limit balances, bill amounts and payments amounts; it presents us that there’s a low correlation between the limit balances and payments and bill amounts. Howevet it can be seen that bill amounts has high correlation between each other as expected since the bills a reflecting the cumulative amounts.

Personal Balance Limits Probabilities & Given Limits By Age

ggplot(aes(x=AGE,y=LIMIT_BAL/1000),data=subset(ccdata,!is.na(AGE.bucket)))+
  xlab("Age") + 
  ylab("Balance Limit (x1000 NT$)") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+
  scale_color_brewer(palette = "Pastel1")+
  geom_jitter(alpha=0.5, position = position_jitter(h=0), aes(color=AGE.bucket)) +
  geom_smooth(stat='summary', fun.y=mean) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.1), color = 'black', linetype=2) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.5), color = 'red', linetype=2) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.9), color = 'black', linetype=2)

#Result: When we plot the average balance limit per age bucket, we realized there are some limit levels given to clients such as 500,000 and 200,000 and these levels are belong to the probabilities of 95% and 50% respectively. So we can say that %95 of clients has balance limits equal or less than TL500k and %50 of clients has balance limits equal or less than TL200k.
# Additionally, when we investigate the blue line which represents the average limit balance of all clients, we think that we can interpret the spread between blue line and discrete red line as the bank prefers to give more balance limit than the average for the 50% probability.
# On the other hand, I can interpret the probabilities as the confidence intervals of exposures of bank to clients and I can say that bank implies that it can lose more than TL500k only by 5% if there is a client default occurs.
ggplot(aes(x=AGE,y=LIMIT_BAL/1000),data=subset(ccdata,!is.na(AGE.bucket)))+
  xlab("Age") + 
  ylab("Balance Limit (x1000 NT$)") +
  coord_cartesian(xlim = c(21,60),ylim = c(0,700))+
  scale_color_brewer(palette = "Pastel1")+
  geom_jitter(alpha=0.5, position = position_jitter(h=0), aes(color=AGE.bucket)) +
  geom_smooth(stat='summary', fun.y=mean) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.1), color = 'black', linetype=2) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.5), color = 'red', linetype=2) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.9), color = 'black', linetype=2) +
  geom_smooth(stat='summary', fun.y=quantile, fun.args = list(probs = 0.95), color = 'black', linetype=2) +
  facet_wrap(~DEF_PAYMENT)

#Result: Within the number of observations having a limit of 500,000 or less, it is not necessary to be more than 5% of the number of observations of the number of defaults when the default probability is related.
#Otherwise, 
#      a) A change in the default prediction model is required.
#      b) A change in the bank's limit allocation model is required.
# We can also say that when defining a limit, a limit of over 50% is defined, it is one of the reasons why people fall into default payment.