This repository has been archived by the owner on Sep 13, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprice_parsing.py
67 lines (59 loc) · 2.48 KB
/
price_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import util
import Quandl
# For the moment, we're using Yahoo Finance as our source
STOCK_DATA_SOURCE = "YAHOO/"
def getStockPrices(ticker, frequency="monthly", update=False):
"""
Gets the closing prices for a given stock ticker at a given frequency
:param ticker: Name of the company's ticker
:param frequency: Frequency of returned time series. See Quandl.get()'s collapse param.
:param update: Always updates instead of using cache if true
:return: Pandas dataframe representing time series of ticker's closing prices, -1 for
connection errors
"""
name = ticker + "_" + frequency # Name of data in cache
prices = None
# If there is no cached version of the pickle, or update flag is on, download price data and cache it
if update or not util.pickleExists(name):
try:
prices = Quandl.get(STOCK_DATA_SOURCE + ticker, collapse=frequency, authtoken="xx_T2u2fsQ_MjyZjTb6E")
util.savePickle(prices, name)
# Catch various connection errors
except:
return -1
# Otherwise, use most recent cache entry
else:
prices = util.getMostRecentPickle(name)
# Return closing prices
return prices.get("Close")
def getDateRange(prices, before, after):
"""
Takes a pandas dataframe and truncates it to include only dates between before and after
:param prices: Pandas dataframe representing a time series
:param before: Datetime-like object representing the lower limit
:param after: Datetime-like object representing the upper limit
:return: Truncated Pandas dataframe
"""
return prices.truncate(before=before, after=after)
def splitByDate(prices, date):
"""
Split prices into two parts at given date
:param prices: price data to split
:param date: place to split
:return: prices before data, prices after date
"""
return prices.truncate(after=date), prices.truncate(before=(date))
def preprocessStocks(priceData):
"""
Processes priceData into a format usable by sklearn
:param priceData: Pandas dataframe representing a time series of prices
:return: List of ordinal dates, list of float prices
"""
timestamps = []
prices = []
# For every tuple of (date, price) in priceData...
for row in priceData.iteritems():
timestamps.append(row[0].toordinal()) # Convert date to ordinal
prices.append(float(row[1])) # Convert price to float
# Return in form X, y
return timestamps, prices