str(bigMart)
## 'data.frame':    8523 obs. of  12 variables:
##  $ Item_Identifier          : Factor w/ 1559 levels "DRA12","DRA24",..: 157 9 663 1122 1298 759 697 739 441 991 ...
##  $ Item_Weight              : num  9.3 5.92 17.5 19.2 8.93 ...
##  $ Item_Fat_Content         : Factor w/ 5 levels "LF","low fat",..: 3 5 3 5 3 5 5 3 5 5 ...
##  $ Item_Visibility          : num  0.016 0.0193 0.0168 0 0 ...
##  $ Item_Type                : Factor w/ 16 levels "Baking Goods",..: 5 15 11 7 10 1 14 14 6 6 ...
##  $ Item_MRP                 : num  249.8 48.3 141.6 182.1 53.9 ...
##  $ Outlet_Identifier        : Factor w/ 10 levels "OUT010","OUT013",..: 10 4 10 1 2 4 2 6 8 3 ...
##  $ Outlet_Establishment_Year: int  1999 2009 1999 1998 1987 2009 1987 1985 2002 2007 ...
##  $ Outlet_Size              : Factor w/ 4 levels "","High","Medium",..: 3 3 3 1 2 3 2 3 1 1 ...
##  $ Outlet_Location_Type     : Factor w/ 3 levels "Tier 1","Tier 2",..: 1 3 1 3 3 3 3 3 2 2 ...
##  $ Outlet_Type              : Factor w/ 4 levels "Grocery Store",..: 2 3 2 1 2 3 2 4 2 2 ...
##  $ Item_Outlet_Sales        : num  3735 443 2097 732 995 ...
glimpse(bigMart)
## Observations: 8,523
## Variables: 12
## $ Item_Identifier           <fctr> FDA15, DRC01, FDN15, FDX07, NCD19, ...
## $ Item_Weight               <dbl> 9.300, 5.920, 17.500, 19.200, 8.930,...
## $ Item_Fat_Content          <fctr> Low Fat, Regular, Low Fat, Regular,...
## $ Item_Visibility           <dbl> 0.016047301, 0.019278216, 0.01676007...
## $ Item_Type                 <fctr> Dairy, Soft Drinks, Meat, Fruits an...
## $ Item_MRP                  <dbl> 249.8092, 48.2692, 141.6180, 182.095...
## $ Outlet_Identifier         <fctr> OUT049, OUT018, OUT049, OUT010, OUT...
## $ Outlet_Establishment_Year <int> 1999, 2009, 1999, 1998, 1987, 2009, ...
## $ Outlet_Size               <fctr> Medium, Medium, Medium, , High, Med...
## $ Outlet_Location_Type      <fctr> Tier 1, Tier 3, Tier 1, Tier 3, Tie...
## $ Outlet_Type               <fctr> Supermarket Type1, Supermarket Type...
## $ Item_Outlet_Sales         <dbl> 3735.1380, 443.4228, 2097.2700, 732....
bigMart%>%
  summarise(n_distinct(Item_Identifier))
##   n_distinct(Item_Identifier)
## 1                        1559
bigMart%>%
  group_by(Item_Fat_Content)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 5 x 3
##   Item_Fat_Content Count  Perc
##             <fctr> <int> <dbl>
## 1          Low Fat  5089 59.71
## 2          Regular  2889 33.90
## 3               LF   316  3.71
## 4              reg   117  1.37
## 5          low fat   112  1.31
bigMart%>%
  group_by(Item_Type)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 16 x 3
##                Item_Type Count  Perc
##                   <fctr> <int> <dbl>
##  1 Fruits and Vegetables  1232 14.46
##  2           Snack Foods  1200 14.08
##  3             Household   910 10.68
##  4          Frozen Foods   856 10.04
##  5                 Dairy   682  8.00
##  6                Canned   649  7.61
##  7          Baking Goods   648  7.60
##  8    Health and Hygiene   520  6.10
##  9           Soft Drinks   445  5.22
## 10                  Meat   425  4.99
## 11                Breads   251  2.94
## 12           Hard Drinks   214  2.51
## 13                Others   169  1.98
## 14         Starchy Foods   148  1.74
## 15             Breakfast   110  1.29
## 16               Seafood    64  0.75
bigMart%>%
  group_by(Outlet_Identifier)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 10 x 3
##    Outlet_Identifier Count  Perc
##               <fctr> <int> <dbl>
##  1            OUT027   935 10.97
##  2            OUT013   932 10.94
##  3            OUT035   930 10.91
##  4            OUT046   930 10.91
##  5            OUT049   930 10.91
##  6            OUT045   929 10.90
##  7            OUT018   928 10.89
##  8            OUT017   926 10.86
##  9            OUT010   555  6.51
## 10            OUT019   528  6.20
bigMart%>%
  group_by(Outlet_Size)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 4 x 3
##   Outlet_Size Count  Perc
##        <fctr> <int> <dbl>
## 1      Medium  2793 32.77
## 2              2410 28.28
## 3       Small  2388 28.02
## 4        High   932 10.94
bigMart%>%
  group_by(Outlet_Location_Type)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 3 x 3
##   Outlet_Location_Type Count  Perc
##                 <fctr> <int> <dbl>
## 1               Tier 3  3350 39.31
## 2               Tier 2  2785 32.68
## 3               Tier 1  2388 28.02
bigMart%>%
  group_by(Outlet_Type)%>%
  summarise(Count=n(),Perc=round(n()/nrow(.)*100,2))%>%
  arrange(desc(Count))
## # A tibble: 4 x 3
##         Outlet_Type Count  Perc
##              <fctr> <int> <dbl>
## 1 Supermarket Type1  5577 65.43
## 2     Grocery Store  1083 12.71
## 3 Supermarket Type3   935 10.97
## 4 Supermarket Type2   928 10.89
bigMart%>%
    summarise(is_NULL=sum(is.na(Item_Weight)==1),
              is_NOT_NULL=sum(!is.na(Item_Weight)==1)
              )
##   is_NULL is_NOT_NULL
## 1    1463        7060
bigMart%>%
  filter(!is.na(Item_Weight))%>%
  summarise(
    Max=max(Item_Weight),
    Min=min(Item_Weight),
    Mean=mean(Item_Weight),
    Median=median(Item_Weight),
    QUA1=quantile(Item_Weight,1/4),
    QUA3=quantile(Item_Weight,3/4),
    IQR=IQR(Item_Weight)
  )
##     Max   Min     Mean Median    QUA1  QUA3     IQR
## 1 21.35 4.555 12.85765   12.6 8.77375 16.85 8.07625
bigMart%>%
    summarise(is_NULL=sum(is.na(Item_Visibility)==1),
              is_NOT_NULL=sum(!is.na(Item_Visibility)==1)
              )
##   is_NULL is_NOT_NULL
## 1       0        8523
bigMart%>%
  filter(!is.na(Item_Visibility))%>%
  summarise(
    Max=max(Item_Visibility),
    Min=min(Item_Visibility),
    Mean=mean(Item_Visibility),
    Median=median(Item_Visibility),
    QUA1=quantile(Item_Visibility,1/4),
    QUA3=quantile(Item_Visibility,3/4),
    IQR=IQR(Item_Visibility)
  )
##         Max Min       Mean     Median       QUA1       QUA3        IQR
## 1 0.3283909   0 0.06613203 0.05393093 0.02698948 0.09458529 0.06759582
bigMart%>%
    summarise(is_NULL=sum(is.na(Item_MRP)==1),
              is_NOT_NULL=sum(!is.na(Item_MRP)==1)
              )
##   is_NULL is_NOT_NULL
## 1       0        8523
bigMart%>%
  filter(!is.na(Item_MRP))%>%
  summarise(
    Max=max(Item_MRP),
    Min=min(Item_MRP),
    Mean=mean(Item_MRP),
    Median=median(Item_MRP),
    QUA1=quantile(Item_MRP,1/4),
    QUA3=quantile(Item_MRP,3/4),
    IQR=IQR(Item_MRP)
  )
##        Max   Min     Mean   Median    QUA1     QUA3     IQR
## 1 266.8884 31.29 140.9928 143.0128 93.8265 185.6437 91.8172
bigMart%>%
    summarise(is_NULL=sum(is.na(Outlet_Establishment_Year)==1),
              is_NOT_NULL=sum(!is.na(Outlet_Establishment_Year)==1)
              )
##   is_NULL is_NOT_NULL
## 1       0        8523
bigMart%>%
  filter(!is.na(Outlet_Establishment_Year))%>%
  summarise(
    Max=max(Outlet_Establishment_Year),
    Min=min(Outlet_Establishment_Year),
    Mean=mean(Outlet_Establishment_Year),
    Median=median(Outlet_Establishment_Year),
    QUA1=quantile(Outlet_Establishment_Year,1/4),
    QUA3=quantile(Outlet_Establishment_Year,3/4),
    IQR=IQR(Outlet_Establishment_Year)
  )
##    Max  Min     Mean Median QUA1 QUA3 IQR
## 1 2009 1985 1997.832   1999 1987 2004  17
bigMart%>%
    summarise(is_NULL=sum(is.na(Item_Outlet_Sales)==1),
              is_NOT_NULL=sum(!is.na(Item_Outlet_Sales)==1)
              )
##   is_NULL is_NOT_NULL
## 1       0        8523
bigMart%>%
  filter(!is.na(Item_Outlet_Sales))%>%
  summarise(
    Max=max(Item_Outlet_Sales),
    Min=min(Item_Outlet_Sales),
    Mean=mean(Item_Outlet_Sales),
    Median=median(Item_Outlet_Sales),
    QUA1=quantile(Item_Outlet_Sales,1/4),
    QUA3=quantile(Item_Outlet_Sales,3/4),
    IQR=IQR(Item_Outlet_Sales)
  )
##        Max   Min     Mean   Median     QUA1     QUA3      IQR
## 1 13086.96 33.29 2181.289 1794.331 834.2474 3101.296 2267.049
summary(bigMart)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  FDG33  :  10    Min.   : 4.555   LF     : 316     Min.   :0.00000  
##  FDW13  :  10    1st Qu.: 8.774   low fat: 112     1st Qu.:0.02699  
##  DRE49  :   9    Median :12.600   Low Fat:5089     Median :0.05393  
##  DRN47  :   9    Mean   :12.858   reg    : 117     Mean   :0.06613  
##  FDD38  :   9    3rd Qu.:16.850   Regular:2889     3rd Qu.:0.09459  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.32839  
##  (Other):8467    NA's   :1463                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :2410   Tier 1:2388         
##  1st Qu.:1987              High  : 932   Tier 2:2785         
##  Median :1999              Medium:2793   Tier 3:3350         
##  Mean   :1998              Small :2388                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
##