library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
setwd("C:/Users/ahmetozmen/Desktop/Kaggle Data")
hr_data<-read.csv("HR_comma_sep.csv")
HR Data Analytics columns:
ls(hr_data)
## [1] "average_montly_hours" "last_evaluation"
## [3] "left" "number_project"
## [5] "promotion_last_5years" "salary"
## [7] "sales" "satisfaction_level"
## [9] "time_spend_company" "Work_accident"
HR Data Analytics Summary:
summary(hr_data)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:0.5600 1st Qu.:3.000 1st Qu.:156.0
## Median :0.6400 Median :0.7200 Median :4.000 Median :200.0
## Mean :0.6128 Mean :0.7161 Mean :3.803 Mean :201.1
## 3rd Qu.:0.8200 3rd Qu.:0.8700 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
##
## time_spend_company Work_accident left
## Min. : 2.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 3.000 Median :0.0000 Median :0.0000
## Mean : 3.498 Mean :0.1446 Mean :0.2381
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :10.000 Max. :1.0000 Max. :1.0000
##
## promotion_last_5years sales salary
## Min. :0.00000 sales :4140 high :1237
## 1st Qu.:0.00000 technical :2720 low :7316
## Median :0.00000 support :2229 medium:6446
## Mean :0.02127 IT :1227
## 3rd Qu.:0.00000 product_mng: 902
## Max. :1.00000 marketing : 858
## (Other) :2923
Satisfaction Histogram:
qplot(x=satisfaction_level,data=hr_data)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
average_montly_hours Histogram
qplot(x=average_montly_hours,data=hr_data)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
I wonder about relationship between salary and average_montly_hours
qplot(x=log10(average_montly_hours),data=hr_data) + facet_wrap(~salary)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
I wonder about relationship between average_montly_hours and satisfaction_level
ggplot(hr_data, aes(satisfaction_level, average_montly_hours)) + geom_point()
Satisfaction level by departmant
ggplot(hr_data, aes(x=factor(sales), y=satisfaction_level)) + stat_summary(fun.y="mean", geom="bar")
Satisfaction level by salary
ggplot(hr_data, aes(x=factor(salary), y=satisfaction_level)) + stat_summary(fun.y="mean", geom="bar")