import libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0     <U+221A> readr   1.1.1
## <U+221A> tibble  1.4.2     <U+221A> purrr   0.2.5
## <U+221A> tidyr   0.8.1     <U+221A> stringr 1.3.1
## <U+221A> ggplot2 3.0.0     <U+221A> forcats 0.3.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(xlsx)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

loading data

setwd("C:\\Users\\Baris\\Desktop\\Big Data Analytics MEF 2018-2019\\TERM1\\Data Anaytics Essential\\Grup Projesi")
df <- read.xlsx("data.xlsx", 1 , stringsAsFactors = FALSE)

Looking first glance at data

head(df)
##        Dates GARAN_Price GARAN_Volume SAHOL_Price SAHOL_Volume BIMAS_Price
## 1 2011-01-03        7.98     22313720        7.32      4640420      26.875
## 2 2011-01-04        8.12     33332820        7.34     12339260      27.000
## 3 2011-01-05        8.20     29799820        7.30      7083469      27.375
## 4 2011-01-06        8.26     27382040        7.34      7146213      27.375
## 5 2011-01-07        8.18     23034690        7.26      3084847      27.500
## 6 2011-01-10        8.02     24683450        7.08      5862300      27.375
##   BIMAS_Volume ARCLK_Price ARCLK_Volume EREGL_Price EREGL_Volume
## 1        81456        8.00      1062897       2.404     22624541
## 2       503342        8.10       933412       2.450     45164707
## 3       570148        8.28      3108811       2.450     24573772
## 4       258348        8.46      3174903       2.441     33779776
## 5       118368        8.44      1187558       2.414     23426503
## 6       591186        8.32      1386157       2.331     63788625
##   YKBNK._Price YKBNK._Volume TOASO_Price TOASO_Volume TUPRS_Price
## 1        3.209      16027703        8.18      1119643        39.9
## 2        3.286      35298982        8.48      1637494        40.2
## 3        3.390      25069235        8.70      1991127        41.2
## 4        3.441      18941501        8.96      1845225        42.2
## 5        3.364      21952893        9.08       977315        42.1
## 6        3.286      20796489        9.02       320658        43.2
##   TUPRS_Volume KRDMD_Price KRDMD_Volume TTRAK_Price TTRAK_Volume
## 1       755837       0.601    112858594       25.55       344739
## 2       725259       0.594     81263963       25.00       182137
## 3       929037       0.594     35891333       24.90       209594
## 4       728990       0.617    204320334       25.00        97379
## 5       456566       0.617    120759363       25.40       116766
## 6       609486       0.632     83828965       25.40       190208
##   ASELS_Price ASELS_Volume IPEKE_Price IPEKE_Volume KOZAA_Price
## 1       1.971      5199209       2.368     10271053       2.979
## 2       1.976      9042217       2.355     10367390       3.010
## 3       1.981      5564285       2.336      7226195       3.016
## 4       2.014     16752517       2.394     18930444       3.134
## 5       2.009     11604105       2.375     13859027       3.103
## 6       1.967      3528972       2.400     13948611       3.103
##   KOZAA_Volume ASYAB_Price ASYAB_Volume ENKAI_Price ENKAI_Volume
## 1      7609051        2.93     14975976       2.578      6291601
## 2     12414311        2.92     24037787       2.640     12878784
## 3      7925246        2.97     25808561       2.649     13316793
## 4     28707685        3.05     41710590       2.675     12883846
## 5     11696744        3.07     34537393       2.658      4723698
## 6     10068592        3.03     23813586       2.658      5326939
##   KOZAL_Price KOZAL_Volume DOHOL_Price DOHOL_Volume IHLAS_Price
## 1       21.20       604411        1.12     20028270       1.457
## 2       20.80      1000572        1.13     79435950       1.457
## 3       20.80       803300        1.16    102384000       1.457
## 4       21.00       653050        1.18    182860100       1.472
## 5       20.90       742266        1.16     68353390       1.441
## 6       21.05       838852        1.15     35311240       1.315
##   IHLAS_Volume KCHOL_Price KCHOL_Volume EKGYO_Price EKGYO_Volume
## 1     33729965       7.257      2451606        2.09     76837802
## 2     48222977       7.257      2941748        2.11     47346806
## 3     32485216       7.162      3045013        2.26    186308298
## 4     28644235       7.295      4258826        2.24     76831324
## 5     21074279       7.276      1919420        2.30     60379016
## 6     29570067       7.010      3081952        2.36    103913312
##   TCELL_Price TCELL_Volume MGROS_Price MGROS_Volume SISE_Price SISE_Volume
## 1       10.75      1710046        32.1       353724      1.477     5851710
## 2       10.80      5269387        32.3       819396      1.498    16480364
## 3       10.95      2721640        31.8       199052      1.529    21447829
## 4       10.85      2742226        31.3        90973      1.588    31506301
## 5       10.85      1424859        31.1        46314      1.582     6916070
## 6       10.85      2222840        30.9        36216      1.593     6618226
##   ISCTR_Price ISCTR_Volume VAKBN_Price VAKBN_Volume AKBNK_Price
## 1        5.66     34995630       3.934     27587022        8.68
## 2        5.74     37813780       3.973     28040042        8.74
## 3        5.80     32680300       4.069     38113868        8.62
## 4        5.82     41029510       4.127     37874576        8.64
## 5        5.68     52768820       4.050     35083251        8.52
## 6        5.60     25207960       3.973     45755402        8.34
##   AKBNK_Volume PETKM_Price PETKM_Volume THYAO_Price THYAO_Volume
## 1      8345711       1.467     41499794       4.029     16095423
## 2     11769070       1.545     95221995       4.014      8639566
## 3     14211970       1.570     75822797       3.942     14944558
## 4      6590648       1.545     54379199       3.942      8276507
## 5     12918580       1.539     34941192       3.928      8554191
## 6     10513890       1.533     25254273       3.739     24322693
##   TTKOM_Price TTKOM_Volume HALKB_Price HALKB_Volume
## 1        6.64      1227603       13.70      5451992
## 2        6.76      3310982       13.95      5372906
## 3        6.84      2444320       14.20      5825062
## 4        6.84      2006922       14.30      4252567
## 5        6.84      1058166       13.90      3117867
## 6        6.96      1562132       13.40      6181114

A summarise data structure

df %>% summarise_at(names(df), mean, na.rm = TRUE) 
##        Dates GARAN_Price GARAN_Volume SAHOL_Price SAHOL_Volume BIMAS_Price
## 1 2014-12-10    8.131021     73295049    8.985418      5804879    49.11168
##   BIMAS_Volume ARCLK_Price ARCLK_Volume EREGL_Price EREGL_Volume
## 1       693089    14.30943      1665356    4.455403     16760945
##   YKBNK._Price YKBNK._Volume TOASO_Price TOASO_Volume TUPRS_Price
## 1     2.636728      39530127    16.77452      1079718    65.45903
##   TUPRS_Volume KRDMD_Price KRDMD_Volume TTRAK_Price TTRAK_Volume
## 1      1099433    1.582587     45147502    60.93569     117982.8
##   ASELS_Price ASELS_Volume IPEKE_Price IPEKE_Volume KOZAA_Price
## 1    9.881125      4966923    3.131659      9482701    3.261183
##   KOZAA_Volume ASYAB_Price ASYAB_Volume ENKAI_Price ENKAI_Volume
## 1     11547915    1.346148      7656948      3.7287      4392621
##   KOZAL_Price KOZAL_Volume DOHOL_Price DOHOL_Volume IHLAS_Price
## 1    26.87641      1446803   0.8022033     25600294   0.6079129
##   IHLAS_Volume KCHOL_Price KCHOL_Volume EKGYO_Price EKGYO_Volume
## 1     23340599    11.21648      4358175    2.634348     36976994
##   TCELL_Price TCELL_Volume MGROS_Price MGROS_Volume SISE_Price SISE_Volume
## 1    11.34677      6376174    20.02143     997320.3   2.781455     6709219
##   ISCTR_Price ISCTR_Volume VAKBN_Price VAKBN_Volume AKBNK_Price
## 1    5.429762     36284398    4.486693     32434648     7.87786
##   AKBNK_Volume PETKM_Price PETKM_Volume THYAO_Price THYAO_Volume
## 1     21596945    2.875464     22455034    7.295898     41960069
##   TTKOM_Price TTKOM_Volume HALKB_Price HALKB_Volume
## 1    6.525088      5768731    12.33722     21602624

Data Wrangling

namesTable <- names(df)
volumeNames <- namesTable[grepl('Volume$', namesTable)]
priceNames <- namesTable[grepl('Price$', namesTable)]
df_volume <- df %>% summarise_at(names(df), mean, na.rm = TRUE) %>%
  select(matches("Volume")) 

df_volume
##   GARAN_Volume SAHOL_Volume BIMAS_Volume ARCLK_Volume EREGL_Volume
## 1     73295049      5804879       693089      1665356     16760945
##   YKBNK._Volume TOASO_Volume TUPRS_Volume KRDMD_Volume TTRAK_Volume
## 1      39530127      1079718      1099433     45147502     117982.8
##   ASELS_Volume IPEKE_Volume KOZAA_Volume ASYAB_Volume ENKAI_Volume
## 1      4966923      9482701     11547915      7656948      4392621
##   KOZAL_Volume DOHOL_Volume IHLAS_Volume KCHOL_Volume EKGYO_Volume
## 1      1446803     25600294     23340599      4358175     36976994
##   TCELL_Volume MGROS_Volume SISE_Volume ISCTR_Volume VAKBN_Volume
## 1      6376174     997320.3     6709219     36284398     32434648
##   AKBNK_Volume PETKM_Volume THYAO_Volume TTKOM_Volume HALKB_Volume
## 1     21596945     22455034     41960069      5768731     21602624
volumes <- as.data.frame(t(df_volume))
volumeTable <-melt(data.frame(volumeNames ,volumes))
## Using volumeNames as id variables
volumeTable$variable <- NULL
volumeTable 
##      volumeNames      value
## 1   GARAN_Volume 73295048.7
## 2   SAHOL_Volume  5804879.1
## 3   BIMAS_Volume   693089.0
## 4   ARCLK_Volume  1665355.8
## 5   EREGL_Volume 16760945.0
## 6  YKBNK._Volume 39530127.1
## 7   TOASO_Volume  1079717.7
## 8   TUPRS_Volume  1099432.9
## 9   KRDMD_Volume 45147501.8
## 10  TTRAK_Volume   117982.8
## 11  ASELS_Volume  4966922.7
## 12  IPEKE_Volume  9482700.7
## 13  KOZAA_Volume 11547915.2
## 14  ASYAB_Volume  7656947.9
## 15  ENKAI_Volume  4392620.9
## 16  KOZAL_Volume  1446803.1
## 17  DOHOL_Volume 25600294.4
## 18  IHLAS_Volume 23340598.7
## 19  KCHOL_Volume  4358174.8
## 20  EKGYO_Volume 36976993.5
## 21  TCELL_Volume  6376173.9
## 22  MGROS_Volume   997320.3
## 23   SISE_Volume  6709218.6
## 24  ISCTR_Volume 36284397.8
## 25  VAKBN_Volume 32434648.3
## 26  AKBNK_Volume 21596944.7
## 27  PETKM_Volume 22455034.0
## 28  THYAO_Volume 41960069.4
## 29  TTKOM_Volume  5768731.1
## 30  HALKB_Volume 21602624.1
df_volume <- df %>% summarise_at(names(df), mean, na.rm = TRUE) %>%
  select(matches("price")) 

prices <- as.data.frame(t(df_volume))
pricesTable <- melt(data.frame(priceNames ,prices))
## Using priceNames as id variables
pricesTable$variable <- NULL
pricesTable
##      priceNames      value
## 1   GARAN_Price  8.1310214
## 2   SAHOL_Price  8.9854183
## 3   BIMAS_Price 49.1116829
## 4   ARCLK_Price 14.3094309
## 5   EREGL_Price  4.4554027
## 6  YKBNK._Price  2.6367281
## 7   TOASO_Price 16.7745185
## 8   TUPRS_Price 65.4590321
## 9   KRDMD_Price  1.5825871
## 10  TTRAK_Price 60.9356858
## 11  ASELS_Price  9.8811245
## 12  IPEKE_Price  3.1316590
## 13  KOZAA_Price  3.2611834
## 14  ASYAB_Price  1.3461479
## 15  ENKAI_Price  3.7286999
## 16  KOZAL_Price 26.8764105
## 17  DOHOL_Price  0.8022033
## 18  IHLAS_Price  0.6079129
## 19  KCHOL_Price 11.2164805
## 20  EKGYO_Price  2.6343482
## 21  TCELL_Price 11.3467656
## 22  MGROS_Price 20.0214300
## 23   SISE_Price  2.7814548
## 24  ISCTR_Price  5.4297617
## 25  VAKBN_Price  4.4866926
## 26  AKBNK_Price  7.8778599
## 27  PETKM_Price  2.8754635
## 28  THYAO_Price  7.2958979
## 29  TTKOM_Price  6.5250875
## 30  HALKB_Price 12.3372179

After collecting prices and volumes as a two catogories, we can look at these data for patterns.

Visualization

Volume Values

Highest values in volume table.(First five)

v1 <- volumeTable %>% filter(value > 36500000) %>%
  arrange(value) %>% ggplot(data = ., aes(x = volumeNames, y = value, 
    fill = value)) + geom_bar(stat = "identity")
v1

Lowest values in volume table.(Last Five)

v2 <- volumeTable %>% filter(value < 1200000) %>%
  arrange(value) %>% ggplot(data = ., aes(x = volumeNames, y = value, 
    fill = value)) + geom_bar(stat = "identity")
v2

grid.arrange(v1, 
             v2, 
             ncol=2)

Highest First 5 Volume Graphics by Date

First is GARAN. The average weighted changes at GARAN Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = GARAN_Price * GARAN_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Second is KRDMD The average weighted changes at KRDMD Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = KRDMD_Price * KRDMD_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates,)) + geom_line( color = 'darkblue')

Third is THYAO average weighted changes at THYAO Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = THYAO_Price * THYAO_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates,)) + geom_line( color = 'darkblue')

Forth is YKBNK average weighted changes at YKBNK Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = YKBNK._Price * YKBNK._Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates,)) + geom_line( color = 'darkblue')

Fifth is EKGYO average weighted changes at EKGYO Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = EKGYO_Price * EKGYO_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates,)) + geom_line( color = 'darkblue')

Lowest Last 5 Volume Graphics by Date

Last is TTRAK. The average weighted changes at TTRAK. Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = TTRAK_Price * TTRAK_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Second Last is BIMAS The average weighted changes at BIMAS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = BIMAS_Price * BIMAS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Third Last is MGROS The average weighted changes at MGROS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = MGROS_Price * MGROS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Forth Last is TOASO The average weighted changes at TOASO Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = TOASO_Price * TOASO_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Fifth Last is TUPRS The average weighted changes at TUPRS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = TUPRS_Price * TUPRS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'darkblue')

Price Values

Highest 5 prices in stock market.

p1 <- pricesTable %>% filter(value > 20) %>%
  arrange(value) %>% ggplot(data = ., aes(x = priceNames, y = value, 
    fill = value)) + geom_bar(stat = "identity")
p1

Lowest 5 prices in stock market.

p2 <- pricesTable %>% filter(value < 2.635) %>%
  arrange(value) %>% ggplot(data = ., aes(x = priceNames, y = value, 
    fill = value)) + geom_bar(stat = "identity")
p2

grid.arrange(p1, 
             p2, 
             ncol=2)

Highest First 5 Price Graphics by Date

First is TUPRS The average weighted changes at TUPRS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = TUPRS_Price * TUPRS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Second is TTRAK The average weighted changes at TTRAK Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = TTRAK_Price * TTRAK_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Third is BIMAS The average weighted changes at BIMAS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = BIMAS_Price * BIMAS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Forth is KOZAL The average weighted changes at KOZAL Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = KOZAL_Price * KOZAL_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Fifth is MGROS The average weighted changes at MGROS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = MGROS_Price * MGROS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Lowest Last 5 Price Graphics by Date

Last is IHLAS The average weighted changes at IHLAS Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = IHLAS_Price * IHLAS_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Second last is DOHOL The average weighted changes at DOHOL Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = DOHOL_Price * DOHOL_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Third last is ASYAB The average weighted changes at ASYAB Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = ASYAB_Price * ASYAB_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Fourth last is KRDMD The average weighted changes at KRDMD Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = KRDMD_Price * KRDMD_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Fifth last is EKGYO The average weighted changes at EKGYO Comp.

df %>%
  group_by(Dates) %>%
  mutate(weightedAvg = EKGYO_Price * EKGYO_Volume) %>%
  ggplot(data = ., aes(x = Dates, y = weightedAvg, 
    fill = Dates, color = Dates)) + geom_line(color = 'chocolate1')

Correlation of Volume and Price

First we need company names vector for table

companyNames <- c('GARAN','SAHOL','BIMAS','ARCLK','EREGL','YKBNK','TOASO','TUPRS','KRDMD','TTRAK','ASELS','IPEKE','KOZAA','ASYAB','ENKAI','KOZAL','DOHOL','IHLAS','KCHOL','EKGYO','TCELL','MGROS','SISE','ISCTR','VAKBN','AKBNK','PETKM','THYAO','TTKOM','HALKB')
length(companyNames)
## [1] 30
cor(volumeTable$value,pricesTable$value)
## [1] -0.4261102
corGaran <- cor(df$GARAN_Price,df$GARAN_Volume)
corSahol <- cor(df$SAHOL_Price,df$SAHOL_Volume)
corBimas <- cor(df$BIMAS_Price,df$BIMAS_Volume)
corArclk <- cor(df$ARCLK_Price,df$ARCLK_Volume)
corEregl <- cor(df$EREGL_Price,df$EREGL_Volume)
corYkbnk <- cor(df$YKBNK._Price,df$YKBNK._Volume)
corToaso <- cor(df$TOASO_Price,df$TOASO_Volume)
corTuprs <- cor(df$TUPRS_Price,df$TUPRS_Volume)
corKrdmd <- cor(df$KRDMD_Price,df$KRDMD_Volume)
corTtrak <- cor(df$TTRAK_Price,df$TTKOM_Volume)
corAsels <- cor(df$ASELS_Price,df$ASELS_Volume)
corIpeke <- cor(df$IPEKE_Price,df$IPEKE_Volume)
corKozaa <- cor(df$KOZAA_Price,df$KOZAA_Volume)
corAsyab <- cor(df$ASYAB_Price,df$ASYAB_Volume)
corEnkai <- cor(df$ENKAI_Price,df$ENKAI_Volume)
corKozal <- cor(df$KOZAL_Price,df$KOZAL_Volume)
corDohol <- cor(df$DOHOL_Price,df$DOHOL_Volume)
corIhlas <- cor(df$IHLAS_Price,df$IHLAS_Volume)
corKchol <- cor(df$KCHOL_Price,df$KCHOL_Volume)
corEgkyo <- cor(df$EKGYO_Price,df$EKGYO_Volume)
corTcell <- cor(df$TCELL_Price,df$TCELL_Volume)
corMgros <- cor(df$MGROS_Price,df$MGROS_Volume)
corSise <- cor(df$SISE_Price,df$SISE_Volume)
corIsctr <- cor(df$ISCTR_Price,df$ISCTR_Volume)
corVakbn <- cor(df$VAKBN_Price,df$VAKBN_Volume)
corAkbnk <- cor(df$AKBNK_Price,df$AKBNK_Volume)
corPetkm <- cor(df$PETKM_Price,df$PETKM_Volume)
corThyao <- cor(df$THYAO_Price,df$THYAO_Volume)
corTtkom <- cor(df$TTKOM_Price,df$TTKOM_Volume)
corHalkb <- cor(df$HALKB_Price,df$HALKB_Volume)

corVec <- c(corGaran,corSahol,corBimas,corArclk,corEregl,corYkbnk,corToaso,corTuprs,corKrdmd,corTtrak,corAsels,corIpeke,corKozaa,corAsyab,corEnkai,corKozal,corDohol,corIhlas,corKchol,corEgkyo,corTcell,corMgros,corSise,corIsctr,corVakbn,corAkbnk,corPetkm,corThyao,corTtkom,corHalkb )
corrTable <- melt(data.frame(companyNames ,corVec))
## Using companyNames as id variables
corrTable$variable <- NULL
corrTable
##    companyNames       value
## 1         GARAN -0.27659034
## 2         SAHOL  0.03629147
## 3         BIMAS  0.18819825
## 4         ARCLK  0.07378494
## 5         EREGL -0.12434734
## 6         YKBNK -0.29028613
## 7         TOASO -0.16690372
## 8         TUPRS  0.24543391
## 9         KRDMD  0.63467485
## 10        TTRAK -0.09599396
## 11        ASELS  0.50660385
## 12        IPEKE  0.28035056
## 13        KOZAA  0.28387359
## 14        ASYAB  0.42324271
## 15        ENKAI -0.11085345
## 16        KOZAL  0.30916404
## 17        DOHOL  0.34891968
## 18        IHLAS  0.36177059
## 19        KCHOL  0.08552598
## 20        EKGYO -0.12992177
## 21        TCELL  0.27189760
## 22        MGROS -0.11783338
## 23         SISE -0.15178686
## 24        ISCTR -0.37971077
## 25        VAKBN -0.29846621
## 26        AKBNK -0.15195815
## 27        PETKM  0.30089594
## 28        THYAO  0.34317685
## 29        TTKOM -0.41449599
## 30        HALKB -0.39876833

CorVec variable keeps the correlation values between prices and volumes . There are negative and positive values. The absolute value of correlation means the relationship of these two variables are more predictable and more meaningfull for future actions. Thus, investigating these correlation values could help for regression values to seek which stocks are the best for prediction.

theme_set(theme_bw())

corrTable %>% arrange(desc(value)) %>%
ggplot( aes(x=companyNames, y=value)) + 
  geom_bar(stat="identity", width=.5, fill="tomato3") + 
  labs(title="Ordered Bar Chart", 
       subtitle="Correlation Values", 
       caption="Correlation graph") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

corrTable %>% arrange(desc(abs(value)))
##    companyNames       value
## 1         KRDMD  0.63467485
## 2         ASELS  0.50660385
## 3         ASYAB  0.42324271
## 4         TTKOM -0.41449599
## 5         HALKB -0.39876833
## 6         ISCTR -0.37971077
## 7         IHLAS  0.36177059
## 8         DOHOL  0.34891968
## 9         THYAO  0.34317685
## 10        KOZAL  0.30916404
## 11        PETKM  0.30089594
## 12        VAKBN -0.29846621
## 13        YKBNK -0.29028613
## 14        KOZAA  0.28387359
## 15        IPEKE  0.28035056
## 16        GARAN -0.27659034
## 17        TCELL  0.27189760
## 18        TUPRS  0.24543391
## 19        BIMAS  0.18819825
## 20        TOASO -0.16690372
## 21        AKBNK -0.15195815
## 22         SISE -0.15178686
## 23        EKGYO -0.12992177
## 24        EREGL -0.12434734
## 25        MGROS -0.11783338
## 26        ENKAI -0.11085345
## 27        TTRAK -0.09599396
## 28        KCHOL  0.08552598
## 29        ARCLK  0.07378494
## 30        SAHOL  0.03629147

So let’s see the first five stocks which have the highest correlations

KRDMD is the first one:

ggplot(df, aes(df$KRDMD_Price, df$KRDMD_Volume)) + 
  geom_count(color="darkred") + 
  geom_smooth(method="lm", se=F,color="blue")

ASELS is the second one:

ggplot(df, aes(df$ASELS_Price, df$ASELS_Volume)) + 
  geom_count(color="darkred") + 
  geom_smooth(method="lm", se=F,color="blue")

ASYAB is the third one:

ggplot(df, aes(df$ASYAB_Price, df$ASYAB_Volume)) + 
  geom_count(color="darkred") + 
  geom_smooth(method="lm", se=F,color="blue")

TTKOM is the forth one:

ggplot(df, aes(df$TTKOM_Price, df$TTKOM_Volume)) + 
  geom_count(color="darkred") + 
  geom_smooth(method="lm", se=F,color="blue")

HALKB is the fifth one:

ggplot(df, aes(df$HALKB_Price, df$HALKB_Volume)) + 
  geom_count(color="darkred") + 
  geom_smooth(method="lm", se=F,color="blue")

Linear model for these stocks and prices

linearMod1 <- lm(df$KRDMD_Price ~ df$KRDMD_Volume, data=df)
linearMod2 <- lm(df$ASELS_Price ~ df$ASELS_Volume, data=df)
linearMod3 <- lm(df$ASYAB_Price ~ df$ASYAB_Volume, data=df)
linearMod4 <- lm(df$TTKOM_Price ~ df$TTKOM_Volume, data=df)
linearMod5 <- lm(df$HALKB_Price ~ df$HALKB_Volume, data=df)
summary(linearMod1)
## 
## Call:
## lm(formula = df$KRDMD_Price ~ df$KRDMD_Volume, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0284 -0.4086 -0.1078  0.2586  3.1083 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     8.495e-01  2.598e-02   32.70   <2e-16 ***
## df$KRDMD_Volume 1.624e-08  4.363e-10   37.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7679 on 2054 degrees of freedom
## Multiple R-squared:  0.4028, Adjusted R-squared:  0.4025 
## F-statistic:  1385 on 1 and 2054 DF,  p-value: < 2.2e-16
summary(linearMod2)
## 
## Call:
## lm(formula = df$ASELS_Price ~ df$ASELS_Volume, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.981  -5.126  -2.862   1.913  23.194 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.391e+00  2.000e-01   36.95   <2e-16 ***
## df$ASELS_Volume 5.014e-07  1.883e-08   26.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.017 on 2054 degrees of freedom
## Multiple R-squared:  0.2566, Adjusted R-squared:  0.2563 
## F-statistic: 709.2 on 1 and 2054 DF,  p-value: < 2.2e-16
summary(linearMod3)
## 
## Call:
## lm(formula = df$ASYAB_Price ~ df$ASYAB_Volume, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9209 -0.3646 -0.3646  0.4634  1.7158 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.183e+00  1.525e-02   77.56   <2e-16 ***
## df$ASYAB_Volume 2.132e-08  1.007e-09   21.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5966 on 2054 degrees of freedom
## Multiple R-squared:  0.1791, Adjusted R-squared:  0.1787 
## F-statistic: 448.2 on 1 and 2054 DF,  p-value: < 2.2e-16
summary(linearMod4)
## 
## Call:
## lm(formula = df$TTKOM_Price ~ df$TTKOM_Volume, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1650 -0.5676  0.0048  0.5430  4.7753 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.662e+00  2.032e-02  327.83   <2e-16 ***
## df$TTKOM_Volume -2.370e-08  1.148e-09  -20.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8711 on 2054 degrees of freedom
## Multiple R-squared:  0.1718, Adjusted R-squared:  0.1714 
## F-statistic: 426.1 on 1 and 2054 DF,  p-value: < 2.2e-16
summary(linearMod5)
## 
## Call:
## lm(formula = df$HALKB_Price ~ df$HALKB_Volume, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2797 -1.9519 -0.4472  2.0084  9.9181 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.375e+01  9.431e-02  145.81   <2e-16 ***
## df$HALKB_Volume -6.544e-08  3.321e-09  -19.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.776 on 2054 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1586 
## F-statistic: 388.4 on 1 and 2054 DF,  p-value: < 2.2e-16

General Values of Weighted Averages

Merging the volumes and prices

weightedPrices <- pricesTable$value * volumeTable$value
generalTable <- melt(data.frame(companyNames ,weightedPrices))
## Using companyNames as id variables
generalTable$variable <- NULL
generalTable %>%
  arrange(desc(value)) %>%
  ggplot(data = ., aes(x = companyNames, y = value, 
    fill = companyNames, color = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6))