Data Loading
features_data_set <- read.csv2("Features data set.csv", header = TRUE, sep = ",")
sales_data_set <- read.csv2("sales data-set.csv", header = TRUE, sep = ",")
stores_data_set <- read.csv2("stores data-set.csv", header = TRUE, sep = ",")
str(features_data_set)
## 'data.frame': 8190 obs. of 12 variables:
## $ Store : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : Factor w/ 182 levels "01/02/2013","01/03/2013",..: 25 67 109 151 26 68 110 152 8 50 ...
## $ Temperature : Factor w/ 4178 levels "-2.06","-6.08",..: 1066 864 934 1308 1300 1998 1797 1601 2315 2579 ...
## $ Fuel_Price : Factor w/ 1011 levels "2.472","2.513",..: 16 10 3 12 43 58 88 95 87 121 ...
## $ MarkDown1 : Factor w/ 4023 levels "-16.93","-2781.45",..: NA NA NA NA NA NA NA NA NA NA ...
## $ MarkDown2 : Factor w/ 2715 levels "-0.01","-0.05",..: NA NA NA NA NA NA NA NA NA NA ...
## $ MarkDown3 : Factor w/ 2885 levels "-0.2","-0.73",..: NA NA NA NA NA NA NA NA NA NA ...
## $ MarkDown4 : Factor w/ 3405 levels "0.22","0.41",..: NA NA NA NA NA NA NA NA NA NA ...
## $ MarkDown5 : Factor w/ 4045 levels "-185.17","-37.02",..: NA NA NA NA NA NA NA NA NA NA ...
## $ CPI : Factor w/ 2505 levels "126.064","126.0766452",..: 1124 1143 1148 1150 1154 1157 1139 1115 1096 1084 ...
## $ Unemployment: Factor w/ 404 levels "10.064","10.115",..: 286 286 286 286 286 286 286 286 252 252 ...
## $ IsHoliday : logi FALSE TRUE FALSE FALSE FALSE FALSE ...
str(sales_data_set)
## 'data.frame': 421570 obs. of 5 variables:
## $ Store : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Dept : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : Factor w/ 143 levels "01/04/2011","01/06/2012",..: 20 53 86 119 21 54 87 120 6 39 ...
## $ Weekly_Sales: Factor w/ 359464 levels "-0.02","-0.04",..: 140272 237401 220814 101950 120302 114885 122532 148007 272542 225936 ...
## $ IsHoliday : logi FALSE TRUE FALSE FALSE FALSE FALSE ...
str(stores_data_set)
## 'data.frame': 45 obs. of 3 variables:
## $ Store: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Type : Factor w/ 3 levels "A","B","C": 1 1 2 1 2 1 2 1 2 2 ...
## $ Size : int 151315 202307 37392 205863 34875 202505 70713 155078 125833 126512 ...
Data type converting
features_data_set$Year <- substr(features_data_set$Date, 7, 10)
features_data_set$Month <- substr(features_data_set$Date, 4, 5)
features_data_set$Day <- substr(features_data_set$Date, 1, 2)
sales_data_set$Year <- substr(sales_data_set$Date, 7, 10)
sales_data_set$Month <- substr(sales_data_set$Date, 4, 5)
sales_data_set$Day <- substr(sales_data_set$Date, 1, 2)
sales_data_set$Weekly_Sales <- as.character(sales_data_set$Weekly_Sales)
sales_data_set$Weekly_Sales <- as.numeric(sales_data_set$Weekly_Sales,2)
How many Store
ggplot(stores_data_set, aes(Type, fill = Type ) ) +
geom_bar() +
xlab("Type of Store") + ylab("Count of Store")
Sales of Years
YearSales <- sales_data_set %>% group_by(Year) %>% summarise(YearSales = sum(Weekly_Sales))
ggplot(YearSales, aes(Year, YearSales)) +
geom_col()
Store Size Analyze
SalesStore <- left_join(sales_data_set, stores_data_set, by = "Store")
ggplot(SalesStore, aes(Type, Size) ) +
geom_point()
Store Size & Weekly Sales
plot(SalesStore$Size,SalesStore$Weekly_Sales, main = "Size vs Sales", xlab = "Store Size", ylab = "Weekly Sales")