-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathWorking_With_Market_Data_Example_Julia.jl
129 lines (97 loc) · 4.17 KB
/
Working_With_Market_Data_Example_Julia.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# MIT license (c) 2019 by Andrew Lyasoff
# Julia 1.1.0.
# The code illustrates some basic operations with large chunks of market data
# (extracted from public sources), such as
# creating histograms from the returns, and other similar operations.
using Dates, JuliaDB
# The historical quotes are downloaded in .csv format from nasdaq.com and are placed
# in the directory "HistoricalQuotes." Before those files can be used here they
# must be modified (slightly) in Excel, LibreOffice Calc, or a text editor (e.g., Emacs):
# there should be no empty line, or a line that contains a time-stamp (all lines except
# the first one must be identically formatted -- this may involve removing the second
# line in the spreadhseet). The data for all stocks can be stored in a single variable
# (note that HistoricalQuotes is the name of the sub-directory that contains all .csv files).
stocksdata = loadndsparse("HistoricalQuotes"; filenamecol = :ticker, indexcols = [:ticker, :date])
# The same database can now be saved in a special binary format that makes reloading
# at a later time very fast.
save(stocksdata, "stocksdata.jdb")
@time reloaded_stocksdata = load("stocksdata.jdb")
# Test the saved database for consistency.
stocksdata == reloaded_stocksdata
# Look up the data associated only with Apple (symbol AAPL):
stocksdata["AAPL",:]
length(stocksdata["AAPL",:])
# Similarly, look up the data linked to Microsoft.
stocksdata["MSFT",:]
length(stocksdata["MSFT",:])
# Look up the price of Google on a specific date in the past.
stocksdata["GOOGL", Date(2009,9,9)]
stocksdata["GOOGL", Date(2009,9,9)].close
# Extract the closing prices of Amazon *only*.
selectvalues(stocksdata,:close)["AMZN",:]
keytype(stocksdata)
# List all dates for which the closing prices for Apple are available.
AAPLdate=columns(stocksdata["AAPL",:])[1]
#Extract the closing prices of Apple on those days.
AAPLclose=columns(stocksdata["AAPL",:])[2]
stocksdata["AAPL", Date(2008,8,11)].close
# Produce some plots.
using Plots
pyplot()
plot(AAPLdate, AAPLclose,label="AAPL closing price")
# Calculate and plot the daily returns for AAPL.
begin
llc=length(AAPLclose);
AAPLreturns=(AAPLclose[2:llc]-AAPLclose[1:llc-1])./AAPLclose[1:llc-1];
scatter(AAPLdate[2:llc],AAPLreturns,label="AAPL daily returns",markersize=2)
end
# Now build the histogram from the returns using a custom made 'histogram' function
# (called 'hstgram' to avoid the confucion with the standard 'histogram').
# It takes as an input a single 1-dimensional array of data. The number of bins in
# the histogram is determined automatically by using the Diaconis-Friedman rule.
# The function returns two arrays: the mid-points of the bins and the (unnormalized)
# heights of the bars.
using StatsBase
function hstgram(data_sample::Array{Float64,1})
data_sorted=sort(data_sample)
first=data_sorted[1]
last=data_sorted[end]
nmb=length(data_sorted)
IQR=percentile(data_sorted,75)-percentile(data_sorted,25)
bin_size_loc = 2*IQR*(nmb^(-1.0/3))
num_bins=Int(floor((last-first)/bin_size_loc))
bin_size=(last-first)/(num_bins)
bin_end_points=[first+(i-1)*bin_size for i=1:(num_bins+1)]
ahist_val=[length(data_sorted[data_sorted .< u]) for u in bin_end_points]
hist_val=[ahist_val[i+1]-ahist_val[i] for i=1:num_bins]
mid_bins=[first-bin_size/2+i*bin_size for i=1:num_bins]
return mid_bins, hist_val
end
# Normalize the bars so that the area of the histogram equals 1.
begin
U,V=hstgram(AAPLreturns);
VV=V/(sum(V)*(U[2]-U[1]));
end
begin
plot(U,VV,line=(:sticks,0.75),label="")
xlabel!("returns")
ylabel!("normalized frequency")
title!("Histogram from the 10y daily returns from AAPL.")
end
# Test that the area of the histogram is indeed 1.
sum(VV)*(U[2]-U[1])
# Another variation of the same histogram.
begin
plot(U.+(U[2]-U[1])/2,VV,label="",line=(:steppre,1),linewidth=0.05)
xlabel!("samples")
ylabel!("normalized frequency")
end
# Normalize the bars to give the probabilities for hitting the bins.
begin
VVV=V/sum(V);
plot(U.+(U[2]-U[1])/2,VVV,label="",line=(:steppre,1),linewidth=0.05)
xlabel!("samples")
ylabel!("probability")
end
# Test that the probabilities do sum to 1.
sum(VVV)