import libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> readr 1.1.1
## <U+221A> tibble 1.4.2 <U+221A> purrr 0.2.5
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> ggplot2 3.0.0 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(xlsx)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
loading data
setwd("C:\\Users\\Baris\\Desktop\\Big Data Analytics MEF 2018-2019\\TERM1\\Data Anaytics Essential\\Grup Projesi")
df <- read.xlsx("data.xlsx", 1 , stringsAsFactors = FALSE)
Looking first glance at data
head(df)
## Dates GARAN_Price GARAN_Volume SAHOL_Price SAHOL_Volume BIMAS_Price
## 1 2011-01-03 7.98 22313720 7.32 4640420 26.875
## 2 2011-01-04 8.12 33332820 7.34 12339260 27.000
## 3 2011-01-05 8.20 29799820 7.30 7083469 27.375
## 4 2011-01-06 8.26 27382040 7.34 7146213 27.375
## 5 2011-01-07 8.18 23034690 7.26 3084847 27.500
## 6 2011-01-10 8.02 24683450 7.08 5862300 27.375
## BIMAS_Volume ARCLK_Price ARCLK_Volume EREGL_Price EREGL_Volume
## 1 81456 8.00 1062897 2.404 22624541
## 2 503342 8.10 933412 2.450 45164707
## 3 570148 8.28 3108811 2.450 24573772
## 4 258348 8.46 3174903 2.441 33779776
## 5 118368 8.44 1187558 2.414 23426503
## 6 591186 8.32 1386157 2.331 63788625
## YKBNK._Price YKBNK._Volume TOASO_Price TOASO_Volume TUPRS_Price
## 1 3.209 16027703 8.18 1119643 39.9
## 2 3.286 35298982 8.48 1637494 40.2
## 3 3.390 25069235 8.70 1991127 41.2
## 4 3.441 18941501 8.96 1845225 42.2
## 5 3.364 21952893 9.08 977315 42.1
## 6 3.286 20796489 9.02 320658 43.2
## TUPRS_Volume KRDMD_Price KRDMD_Volume TTRAK_Price TTRAK_Volume
## 1 755837 0.601 112858594 25.55 344739
## 2 725259 0.594 81263963 25.00 182137
## 3 929037 0.594 35891333 24.90 209594
## 4 728990 0.617 204320334 25.00 97379
## 5 456566 0.617 120759363 25.40 116766
## 6 609486 0.632 83828965 25.40 190208
## ASELS_Price ASELS_Volume IPEKE_Price IPEKE_Volume KOZAA_Price
## 1 1.971 5199209 2.368 10271053 2.979
## 2 1.976 9042217 2.355 10367390 3.010
## 3 1.981 5564285 2.336 7226195 3.016
## 4 2.014 16752517 2.394 18930444 3.134
## 5 2.009 11604105 2.375 13859027 3.103
## 6 1.967 3528972 2.400 13948611 3.103
## KOZAA_Volume ASYAB_Price ASYAB_Volume ENKAI_Price ENKAI_Volume
## 1 7609051 2.93 14975976 2.578 6291601
## 2 12414311 2.92 24037787 2.640 12878784
## 3 7925246 2.97 25808561 2.649 13316793
## 4 28707685 3.05 41710590 2.675 12883846
## 5 11696744 3.07 34537393 2.658 4723698
## 6 10068592 3.03 23813586 2.658 5326939
## KOZAL_Price KOZAL_Volume DOHOL_Price DOHOL_Volume IHLAS_Price
## 1 21.20 604411 1.12 20028270 1.457
## 2 20.80 1000572 1.13 79435950 1.457
## 3 20.80 803300 1.16 102384000 1.457
## 4 21.00 653050 1.18 182860100 1.472
## 5 20.90 742266 1.16 68353390 1.441
## 6 21.05 838852 1.15 35311240 1.315
## IHLAS_Volume KCHOL_Price KCHOL_Volume EKGYO_Price EKGYO_Volume
## 1 33729965 7.257 2451606 2.09 76837802
## 2 48222977 7.257 2941748 2.11 47346806
## 3 32485216 7.162 3045013 2.26 186308298
## 4 28644235 7.295 4258826 2.24 76831324
## 5 21074279 7.276 1919420 2.30 60379016
## 6 29570067 7.010 3081952 2.36 103913312
## TCELL_Price TCELL_Volume MGROS_Price MGROS_Volume SISE_Price SISE_Volume
## 1 10.75 1710046 32.1 353724 1.477 5851710
## 2 10.80 5269387 32.3 819396 1.498 16480364
## 3 10.95 2721640 31.8 199052 1.529 21447829
## 4 10.85 2742226 31.3 90973 1.588 31506301
## 5 10.85 1424859 31.1 46314 1.582 6916070
## 6 10.85 2222840 30.9 36216 1.593 6618226
## ISCTR_Price ISCTR_Volume VAKBN_Price VAKBN_Volume AKBNK_Price
## 1 5.66 34995630 3.934 27587022 8.68
## 2 5.74 37813780 3.973 28040042 8.74
## 3 5.80 32680300 4.069 38113868 8.62
## 4 5.82 41029510 4.127 37874576 8.64
## 5 5.68 52768820 4.050 35083251 8.52
## 6 5.60 25207960 3.973 45755402 8.34
## AKBNK_Volume PETKM_Price PETKM_Volume THYAO_Price THYAO_Volume
## 1 8345711 1.467 41499794 4.029 16095423
## 2 11769070 1.545 95221995 4.014 8639566
## 3 14211970 1.570 75822797 3.942 14944558
## 4 6590648 1.545 54379199 3.942 8276507
## 5 12918580 1.539 34941192 3.928 8554191
## 6 10513890 1.533 25254273 3.739 24322693
## TTKOM_Price TTKOM_Volume HALKB_Price HALKB_Volume
## 1 6.64 1227603 13.70 5451992
## 2 6.76 3310982 13.95 5372906
## 3 6.84 2444320 14.20 5825062
## 4 6.84 2006922 14.30 4252567
## 5 6.84 1058166 13.90 3117867
## 6 6.96 1562132 13.40 6181114
A summarise data structure
df %>% summarise_at(names(df), mean, na.rm = TRUE)
## Dates GARAN_Price GARAN_Volume SAHOL_Price SAHOL_Volume BIMAS_Price
## 1 2014-12-10 8.131021 73295049 8.985418 5804879 49.11168
## BIMAS_Volume ARCLK_Price ARCLK_Volume EREGL_Price EREGL_Volume
## 1 693089 14.30943 1665356 4.455403 16760945
## YKBNK._Price YKBNK._Volume TOASO_Price TOASO_Volume TUPRS_Price
## 1 2.636728 39530127 16.77452 1079718 65.45903
## TUPRS_Volume KRDMD_Price KRDMD_Volume TTRAK_Price TTRAK_Volume
## 1 1099433 1.582587 45147502 60.93569 117982.8
## ASELS_Price ASELS_Volume IPEKE_Price IPEKE_Volume KOZAA_Price
## 1 9.881125 4966923 3.131659 9482701 3.261183
## KOZAA_Volume ASYAB_Price ASYAB_Volume ENKAI_Price ENKAI_Volume
## 1 11547915 1.346148 7656948 3.7287 4392621
## KOZAL_Price KOZAL_Volume DOHOL_Price DOHOL_Volume IHLAS_Price
## 1 26.87641 1446803 0.8022033 25600294 0.6079129
## IHLAS_Volume KCHOL_Price KCHOL_Volume EKGYO_Price EKGYO_Volume
## 1 23340599 11.21648 4358175 2.634348 36976994
## TCELL_Price TCELL_Volume MGROS_Price MGROS_Volume SISE_Price SISE_Volume
## 1 11.34677 6376174 20.02143 997320.3 2.781455 6709219
## ISCTR_Price ISCTR_Volume VAKBN_Price VAKBN_Volume AKBNK_Price
## 1 5.429762 36284398 4.486693 32434648 7.87786
## AKBNK_Volume PETKM_Price PETKM_Volume THYAO_Price THYAO_Volume
## 1 21596945 2.875464 22455034 7.295898 41960069
## TTKOM_Price TTKOM_Volume HALKB_Price HALKB_Volume
## 1 6.525088 5768731 12.33722 21602624
namesTable <- names(df)
volumeNames <- namesTable[grepl('Volume$', namesTable)]
priceNames <- namesTable[grepl('Price$', namesTable)]
df_volume <- df %>% summarise_at(names(df), mean, na.rm = TRUE) %>%
select(matches("Volume"))
df_volume
## GARAN_Volume SAHOL_Volume BIMAS_Volume ARCLK_Volume EREGL_Volume
## 1 73295049 5804879 693089 1665356 16760945
## YKBNK._Volume TOASO_Volume TUPRS_Volume KRDMD_Volume TTRAK_Volume
## 1 39530127 1079718 1099433 45147502 117982.8
## ASELS_Volume IPEKE_Volume KOZAA_Volume ASYAB_Volume ENKAI_Volume
## 1 4966923 9482701 11547915 7656948 4392621
## KOZAL_Volume DOHOL_Volume IHLAS_Volume KCHOL_Volume EKGYO_Volume
## 1 1446803 25600294 23340599 4358175 36976994
## TCELL_Volume MGROS_Volume SISE_Volume ISCTR_Volume VAKBN_Volume
## 1 6376174 997320.3 6709219 36284398 32434648
## AKBNK_Volume PETKM_Volume THYAO_Volume TTKOM_Volume HALKB_Volume
## 1 21596945 22455034 41960069 5768731 21602624
volumes <- as.data.frame(t(df_volume))
volumeTable <-melt(data.frame(volumeNames ,volumes))
## Using volumeNames as id variables
volumeTable$variable <- NULL
volumeTable
## volumeNames value
## 1 GARAN_Volume 73295048.7
## 2 SAHOL_Volume 5804879.1
## 3 BIMAS_Volume 693089.0
## 4 ARCLK_Volume 1665355.8
## 5 EREGL_Volume 16760945.0
## 6 YKBNK._Volume 39530127.1
## 7 TOASO_Volume 1079717.7
## 8 TUPRS_Volume 1099432.9
## 9 KRDMD_Volume 45147501.8
## 10 TTRAK_Volume 117982.8
## 11 ASELS_Volume 4966922.7
## 12 IPEKE_Volume 9482700.7
## 13 KOZAA_Volume 11547915.2
## 14 ASYAB_Volume 7656947.9
## 15 ENKAI_Volume 4392620.9
## 16 KOZAL_Volume 1446803.1
## 17 DOHOL_Volume 25600294.4
## 18 IHLAS_Volume 23340598.7
## 19 KCHOL_Volume 4358174.8
## 20 EKGYO_Volume 36976993.5
## 21 TCELL_Volume 6376173.9
## 22 MGROS_Volume 997320.3
## 23 SISE_Volume 6709218.6
## 24 ISCTR_Volume 36284397.8
## 25 VAKBN_Volume 32434648.3
## 26 AKBNK_Volume 21596944.7
## 27 PETKM_Volume 22455034.0
## 28 THYAO_Volume 41960069.4
## 29 TTKOM_Volume 5768731.1
## 30 HALKB_Volume 21602624.1
df_volume <- df %>% summarise_at(names(df), mean, na.rm = TRUE) %>%
select(matches("price"))
prices <- as.data.frame(t(df_volume))
pricesTable <- melt(data.frame(priceNames ,prices))
## Using priceNames as id variables
pricesTable$variable <- NULL
pricesTable
## priceNames value
## 1 GARAN_Price 8.1310214
## 2 SAHOL_Price 8.9854183
## 3 BIMAS_Price 49.1116829
## 4 ARCLK_Price 14.3094309
## 5 EREGL_Price 4.4554027
## 6 YKBNK._Price 2.6367281
## 7 TOASO_Price 16.7745185
## 8 TUPRS_Price 65.4590321
## 9 KRDMD_Price 1.5825871
## 10 TTRAK_Price 60.9356858
## 11 ASELS_Price 9.8811245
## 12 IPEKE_Price 3.1316590
## 13 KOZAA_Price 3.2611834
## 14 ASYAB_Price 1.3461479
## 15 ENKAI_Price 3.7286999
## 16 KOZAL_Price 26.8764105
## 17 DOHOL_Price 0.8022033
## 18 IHLAS_Price 0.6079129
## 19 KCHOL_Price 11.2164805
## 20 EKGYO_Price 2.6343482
## 21 TCELL_Price 11.3467656
## 22 MGROS_Price 20.0214300
## 23 SISE_Price 2.7814548
## 24 ISCTR_Price 5.4297617
## 25 VAKBN_Price 4.4866926
## 26 AKBNK_Price 7.8778599
## 27 PETKM_Price 2.8754635
## 28 THYAO_Price 7.2958979
## 29 TTKOM_Price 6.5250875
## 30 HALKB_Price 12.3372179
After collecting prices and volumes as a two catogories, we can look at these data for patterns.
Highest values in volume table.(First five)
v1 <- volumeTable %>% filter(value > 36500000) %>%
arrange(value) %>% ggplot(data = ., aes(x = volumeNames, y = value,
fill = value)) + geom_bar(stat = "identity")
v1
Lowest values in volume table.(Last Five)
v2 <- volumeTable %>% filter(value < 1200000) %>%
arrange(value) %>% ggplot(data = ., aes(x = volumeNames, y = value,
fill = value)) + geom_bar(stat = "identity")
v2
grid.arrange(v1,
v2,
ncol=2)
First is GARAN. The average weighted changes at GARAN Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = GARAN_Price * GARAN_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Second is KRDMD The average weighted changes at KRDMD Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = KRDMD_Price * KRDMD_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates,)) + geom_line( color = 'darkblue')
Third is THYAO average weighted changes at THYAO Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = THYAO_Price * THYAO_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates,)) + geom_line( color = 'darkblue')
Forth is YKBNK average weighted changes at YKBNK Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = YKBNK._Price * YKBNK._Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates,)) + geom_line( color = 'darkblue')
Fifth is EKGYO average weighted changes at EKGYO Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = EKGYO_Price * EKGYO_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates,)) + geom_line( color = 'darkblue')
Last is TTRAK. The average weighted changes at TTRAK. Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = TTRAK_Price * TTRAK_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Second Last is BIMAS The average weighted changes at BIMAS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = BIMAS_Price * BIMAS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Third Last is MGROS The average weighted changes at MGROS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = MGROS_Price * MGROS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Forth Last is TOASO The average weighted changes at TOASO Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = TOASO_Price * TOASO_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Fifth Last is TUPRS The average weighted changes at TUPRS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = TUPRS_Price * TUPRS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'darkblue')
Highest 5 prices in stock market.
p1 <- pricesTable %>% filter(value > 20) %>%
arrange(value) %>% ggplot(data = ., aes(x = priceNames, y = value,
fill = value)) + geom_bar(stat = "identity")
p1
Lowest 5 prices in stock market.
p2 <- pricesTable %>% filter(value < 2.635) %>%
arrange(value) %>% ggplot(data = ., aes(x = priceNames, y = value,
fill = value)) + geom_bar(stat = "identity")
p2
grid.arrange(p1,
p2,
ncol=2)
First is TUPRS The average weighted changes at TUPRS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = TUPRS_Price * TUPRS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Second is TTRAK The average weighted changes at TTRAK Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = TTRAK_Price * TTRAK_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Third is BIMAS The average weighted changes at BIMAS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = BIMAS_Price * BIMAS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Forth is KOZAL The average weighted changes at KOZAL Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = KOZAL_Price * KOZAL_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Fifth is MGROS The average weighted changes at MGROS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = MGROS_Price * MGROS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Last is IHLAS The average weighted changes at IHLAS Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = IHLAS_Price * IHLAS_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Second last is DOHOL The average weighted changes at DOHOL Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = DOHOL_Price * DOHOL_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Third last is ASYAB The average weighted changes at ASYAB Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = ASYAB_Price * ASYAB_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Fourth last is KRDMD The average weighted changes at KRDMD Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = KRDMD_Price * KRDMD_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
Fifth last is EKGYO The average weighted changes at EKGYO Comp.
df %>%
group_by(Dates) %>%
mutate(weightedAvg = EKGYO_Price * EKGYO_Volume) %>%
ggplot(data = ., aes(x = Dates, y = weightedAvg,
fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')
First we need company names vector for table
companyNames <- c('GARAN','SAHOL','BIMAS','ARCLK','EREGL','YKBNK','TOASO','TUPRS','KRDMD','TTRAK','ASELS','IPEKE','KOZAA','ASYAB','ENKAI','KOZAL','DOHOL','IHLAS','KCHOL','EKGYO','TCELL','MGROS','SISE','ISCTR','VAKBN','AKBNK','PETKM','THYAO','TTKOM','HALKB')
length(companyNames)
## [1] 30
cor(volumeTable$value,pricesTable$value)
## [1] -0.4261102
corGaran <- cor(df$GARAN_Price,df$GARAN_Volume)
corSahol <- cor(df$SAHOL_Price,df$SAHOL_Volume)
corBimas <- cor(df$BIMAS_Price,df$BIMAS_Volume)
corArclk <- cor(df$ARCLK_Price,df$ARCLK_Volume)
corEregl <- cor(df$EREGL_Price,df$EREGL_Volume)
corYkbnk <- cor(df$YKBNK._Price,df$YKBNK._Volume)
corToaso <- cor(df$TOASO_Price,df$TOASO_Volume)
corTuprs <- cor(df$TUPRS_Price,df$TUPRS_Volume)
corKrdmd <- cor(df$KRDMD_Price,df$KRDMD_Volume)
corTtrak <- cor(df$TTRAK_Price,df$TTKOM_Volume)
corAsels <- cor(df$ASELS_Price,df$ASELS_Volume)
corIpeke <- cor(df$IPEKE_Price,df$IPEKE_Volume)
corKozaa <- cor(df$KOZAA_Price,df$KOZAA_Volume)
corAsyab <- cor(df$ASYAB_Price,df$ASYAB_Volume)
corEnkai <- cor(df$ENKAI_Price,df$ENKAI_Volume)
corKozal <- cor(df$KOZAL_Price,df$KOZAL_Volume)
corDohol <- cor(df$DOHOL_Price,df$DOHOL_Volume)
corIhlas <- cor(df$IHLAS_Price,df$IHLAS_Volume)
corKchol <- cor(df$KCHOL_Price,df$KCHOL_Volume)
corEgkyo <- cor(df$EKGYO_Price,df$EKGYO_Volume)
corTcell <- cor(df$TCELL_Price,df$TCELL_Volume)
corMgros <- cor(df$MGROS_Price,df$MGROS_Volume)
corSise <- cor(df$SISE_Price,df$SISE_Volume)
corIsctr <- cor(df$ISCTR_Price,df$ISCTR_Volume)
corVakbn <- cor(df$VAKBN_Price,df$VAKBN_Volume)
corAkbnk <- cor(df$AKBNK_Price,df$AKBNK_Volume)
corPetkm <- cor(df$PETKM_Price,df$PETKM_Volume)
corThyao <- cor(df$THYAO_Price,df$THYAO_Volume)
corTtkom <- cor(df$TTKOM_Price,df$TTKOM_Volume)
corHalkb <- cor(df$HALKB_Price,df$HALKB_Volume)
corVec <- c(corGaran,corSahol,corBimas,corArclk,corEregl,corYkbnk,corToaso,corTuprs,corKrdmd,corTtrak,corAsels,corIpeke,corKozaa,corAsyab,corEnkai,corKozal,corDohol,corIhlas,corKchol,corEgkyo,corTcell,corMgros,corSise,corIsctr,corVakbn,corAkbnk,corPetkm,corThyao,corTtkom,corHalkb )
corrTable <- melt(data.frame(companyNames ,corVec))
## Using companyNames as id variables
corrTable$variable <- NULL
corrTable
## companyNames value
## 1 GARAN -0.27659034
## 2 SAHOL 0.03629147
## 3 BIMAS 0.18819825
## 4 ARCLK 0.07378494
## 5 EREGL -0.12434734
## 6 YKBNK -0.29028613
## 7 TOASO -0.16690372
## 8 TUPRS 0.24543391
## 9 KRDMD 0.63467485
## 10 TTRAK -0.09599396
## 11 ASELS 0.50660385
## 12 IPEKE 0.28035056
## 13 KOZAA 0.28387359
## 14 ASYAB 0.42324271
## 15 ENKAI -0.11085345
## 16 KOZAL 0.30916404
## 17 DOHOL 0.34891968
## 18 IHLAS 0.36177059
## 19 KCHOL 0.08552598
## 20 EKGYO -0.12992177
## 21 TCELL 0.27189760
## 22 MGROS -0.11783338
## 23 SISE -0.15178686
## 24 ISCTR -0.37971077
## 25 VAKBN -0.29846621
## 26 AKBNK -0.15195815
## 27 PETKM 0.30089594
## 28 THYAO 0.34317685
## 29 TTKOM -0.41449599
## 30 HALKB -0.39876833
CorVec variable keeps the correlation values between prices and volumes . There are negative and positive values. The absolute value of correlation means the relationship of these two variables are more predictable and more meaningfull for future actions. Thus, investigating these correlation values could help for regression values to seek which stocks are the best for prediction.
theme_set(theme_bw())
corrTable %>% arrange(desc(value)) %>%
ggplot( aes(x=companyNames, y=value)) +
geom_bar(stat="identity", width=.5, fill="tomato3") +
labs(title="Ordered Bar Chart",
subtitle="Correlation Values",
caption="Correlation graph") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
corrTable %>% arrange(desc(abs(value)))
## companyNames value
## 1 KRDMD 0.63467485
## 2 ASELS 0.50660385
## 3 ASYAB 0.42324271
## 4 TTKOM -0.41449599
## 5 HALKB -0.39876833
## 6 ISCTR -0.37971077
## 7 IHLAS 0.36177059
## 8 DOHOL 0.34891968
## 9 THYAO 0.34317685
## 10 KOZAL 0.30916404
## 11 PETKM 0.30089594
## 12 VAKBN -0.29846621
## 13 YKBNK -0.29028613
## 14 KOZAA 0.28387359
## 15 IPEKE 0.28035056
## 16 GARAN -0.27659034
## 17 TCELL 0.27189760
## 18 TUPRS 0.24543391
## 19 BIMAS 0.18819825
## 20 TOASO -0.16690372
## 21 AKBNK -0.15195815
## 22 SISE -0.15178686
## 23 EKGYO -0.12992177
## 24 EREGL -0.12434734
## 25 MGROS -0.11783338
## 26 ENKAI -0.11085345
## 27 TTRAK -0.09599396
## 28 KCHOL 0.08552598
## 29 ARCLK 0.07378494
## 30 SAHOL 0.03629147
So let’s see the first five stocks which have the highest correlations
KRDMD is the first one:
ggplot(df, aes(df$KRDMD_Price, df$KRDMD_Volume)) +
geom_count(color="darkred") +
geom_smooth(method="lm", se=F,color="blue")
ASELS is the second one:
ggplot(df, aes(df$ASELS_Price, df$ASELS_Volume)) +
geom_count(color="darkred") +
geom_smooth(method="lm", se=F,color="blue")
ASYAB is the third one:
ggplot(df, aes(df$ASYAB_Price, df$ASYAB_Volume)) +
geom_count(color="darkred") +
geom_smooth(method="lm", se=F,color="blue")
TTKOM is the forth one:
ggplot(df, aes(df$TTKOM_Price, df$TTKOM_Volume)) +
geom_count(color="darkred") +
geom_smooth(method="lm", se=F,color="blue")
HALKB is the fifth one:
ggplot(df, aes(df$HALKB_Price, df$HALKB_Volume)) +
geom_count(color="darkred") +
geom_smooth(method="lm", se=F,color="blue")
Linear model for these stocks and prices
linearMod1 <- lm(df$KRDMD_Price ~ df$KRDMD_Volume, data=df)
linearMod2 <- lm(df$ASELS_Price ~ df$ASELS_Volume, data=df)
linearMod3 <- lm(df$ASYAB_Price ~ df$ASYAB_Volume, data=df)
linearMod4 <- lm(df$TTKOM_Price ~ df$TTKOM_Volume, data=df)
linearMod5 <- lm(df$HALKB_Price ~ df$HALKB_Volume, data=df)
summary(linearMod1)
##
## Call:
## lm(formula = df$KRDMD_Price ~ df$KRDMD_Volume, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0284 -0.4086 -0.1078 0.2586 3.1083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.495e-01 2.598e-02 32.70 <2e-16 ***
## df$KRDMD_Volume 1.624e-08 4.363e-10 37.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7679 on 2054 degrees of freedom
## Multiple R-squared: 0.4028, Adjusted R-squared: 0.4025
## F-statistic: 1385 on 1 and 2054 DF, p-value: < 2.2e-16
summary(linearMod2)
##
## Call:
## lm(formula = df$ASELS_Price ~ df$ASELS_Volume, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.981 -5.126 -2.862 1.913 23.194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.391e+00 2.000e-01 36.95 <2e-16 ***
## df$ASELS_Volume 5.014e-07 1.883e-08 26.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.017 on 2054 degrees of freedom
## Multiple R-squared: 0.2566, Adjusted R-squared: 0.2563
## F-statistic: 709.2 on 1 and 2054 DF, p-value: < 2.2e-16
summary(linearMod3)
##
## Call:
## lm(formula = df$ASYAB_Price ~ df$ASYAB_Volume, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9209 -0.3646 -0.3646 0.4634 1.7158
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.183e+00 1.525e-02 77.56 <2e-16 ***
## df$ASYAB_Volume 2.132e-08 1.007e-09 21.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5966 on 2054 degrees of freedom
## Multiple R-squared: 0.1791, Adjusted R-squared: 0.1787
## F-statistic: 448.2 on 1 and 2054 DF, p-value: < 2.2e-16
summary(linearMod4)
##
## Call:
## lm(formula = df$TTKOM_Price ~ df$TTKOM_Volume, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1650 -0.5676 0.0048 0.5430 4.7753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.662e+00 2.032e-02 327.83 <2e-16 ***
## df$TTKOM_Volume -2.370e-08 1.148e-09 -20.64 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8711 on 2054 degrees of freedom
## Multiple R-squared: 0.1718, Adjusted R-squared: 0.1714
## F-statistic: 426.1 on 1 and 2054 DF, p-value: < 2.2e-16
summary(linearMod5)
##
## Call:
## lm(formula = df$HALKB_Price ~ df$HALKB_Volume, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2797 -1.9519 -0.4472 2.0084 9.9181
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.375e+01 9.431e-02 145.81 <2e-16 ***
## df$HALKB_Volume -6.544e-08 3.321e-09 -19.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.776 on 2054 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1586
## F-statistic: 388.4 on 1 and 2054 DF, p-value: < 2.2e-16
Merging the volumes and prices
weightedPrices <- pricesTable$value * volumeTable$value
generalTable <- melt(data.frame(companyNames ,weightedPrices))
## Using companyNames as id variables
generalTable$variable <- NULL
generalTable %>%
arrange(desc(value)) %>%
ggplot(data = ., aes(x = companyNames, y = value,
fill = companyNames, color = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6))