R_hw2_sefa

df <- read.csv(file="C:/Users/serba/Desktop/Forbes2017.csv", header=TRUE, sep=",")

table(df$country.status)

## < table of extent 0 >

#Column names of "Forbes_top_2000_list"
names(df)

##  [1] "X"            "Rank"         "Company"      "Country"     
##  [5] "Sales"        "Profits"      "Assets"       "Market.Value"
##  [9] "Sector"       "Industry"

# Summary of "Forbes_top_2000_list" structure
str(df)

## 'data.frame':    2000 obs. of  10 variables:
##  $ X           : logi  NA NA NA NA NA NA ...
##  $ Rank        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Company     : Factor w/ 1999 levels "3i Group","3M",..: 921 393 277 1021 1927 43 216 219 124 1824 ...
##  $ Country     : Factor w/ 61 levels "Argentina","Australia",..: 10 10 59 59 59 10 59 10 59 26 ...
##  $ Sales       : num  151.4 134.2 222.9 102.5 97.6 ...
##  $ Profits     : num  42 35 24.1 24.2 21.9 27.8 16.6 24.9 45.2 17.1 ...
##  $ Assets      : num  3473 3017 621 2513 1943 ...
##  $ Market.Value: num  230 200 410 307 274 ...
##  $ Sector      : Factor w/ 11 levels "","Consumer Discretionary",..: 5 5 5 5 5 5 5 5 8 2 ...
##  $ Industry    : Factor w/ 81 levels "","Advertising",..: 53 69 50 53 53 69 53 53 19 9 ...

library(dplyr)#glimpse function

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#Nature of rows and columns
glimpse(df)

## Observations: 2,000
## Variables: 10
## $ X            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Rank         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15...
## $ Company      <fctr> ICBC, China Construction Bank, Berkshire Hathawa...
## $ Country      <fctr> China, China, United States, United States, Unit...
## $ Sales        <dbl> 151.4, 134.2, 222.9, 102.5, 97.6, 115.7, 92.2, 11...
## $ Profits      <dbl> 42.0, 35.0, 24.1, 24.2, 21.9, 27.8, 16.6, 24.9, 4...
## $ Assets       <dbl> 3473.2, 3016.6, 620.9, 2513.0, 1943.4, 2816.0, 21...
## $ Market.Value <dbl> 229.8, 200.5, 409.9, 306.6, 274.4, 149.2, 231.9, ...
## $ Sector       <fctr> Financials, Financials, Financials, Financials, ...
## $ Industry     <fctr> Major Banks, Regional Banks, Investment Services...

#Row and coloumn number of "Forbes_top_2000_list"
dim(df)

## [1] 2000   10

#How much money has been gined by the most profitable company?
max(df$Profits, na.rm = FALSE)

## [1] 45.2

#How many country has been placed and how many times in the top 2000 list?
q1= df %>%group_by(Country) %>% 
  summarise(country_int=n())

q1

## # A tibble: 61 x 2
##      Country country_int
##       <fctr>       <int>
##  1 Argentina           3
##  2 Australia          39
##  3   Austria           8
##  4   Bahrain           2
##  5   Belgium           9
##  6   Bermuda           9
##  7    Brazil          20
##  8    Canada          58
##  9     Chile           7
## 10     China         200
## # ... with 51 more rows

library(ggplot2)

#Which sector is the most successful in which country?
ggplot(data = df, aes(x = Sector, y = Country)) + 
  geom_point()

#How many country have 10/more than 10 company?
q2 = df %>%group_by(Country) %>% 
  summarise(country_int=n())%>% 
  filter(country_int>=10) %>% 
  arrange(desc(country_int)) 

q2

## # A tibble: 29 x 2
##           Country country_int
##            <fctr>       <int>
##  1  United States         564
##  2          Japan         229
##  3          China         200
##  4 United Kingdom          91
##  5    South Korea          64
##  6      Hong Kong          62
##  7         France          59
##  8         Canada          58
##  9          India          58
## 10        Germany          51
## # ... with 19 more rows

R_hw2_sefa_erbas

Sefa Erbas

21 Ekim 2017