-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathGuided Project - Storing Storm Data.py
70 lines (61 loc) · 2.01 KB
/
Guided Project - Storing Storm Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import csv
from datetime import datetime
import io
import psycopg2
from urllib import request
conn = psycopg2.connect(dbname='postgres', user='postgres')
cur = conn.cursor()
# Autocommit instead of commiting every transaction.
conn.autocommit = True
# Create database and users.
cur.execute('CREATE DATABASE ihw')
cur.execute("CREATE USER production WITH PASSWORD 'abc123'")
cur.execute("CREATE USER analyst WITH PASSWORD 'def456'")
# Reconnect to ihw database.
conn = psycopg2.connect(dbname='ihw', user='postgres')
conn.autocommit = True
cur = conn.cursor()
# Create the table.
cur.execute(
"""
CREATE TABLE hurricanes (
fid INTEGER PRIMARY KEY,
recorded_at TIMESTAMP,
btid INTEGER,
name VARCHAR(10),
lat DECIMAL(4, 1),
long DECIMAL(4, 1),
wind_kts SMALLINT,
pressure INTEGER,
category VARCHAR(2),
basin VARCHAR(16),
shape_length DECIMAL(8, 6)
)
"""
)
# Manage privileges.
cur.execute("REVOKE ALL ON hurricanes FROM production")
cur.execute("REVOKE ALL ON hurricanes FROM analyst")
cur.execute("GRANT SELECT, INSERT, UPDATE ON hurricanes TO production")
cur.execute("GRANT SELECT ON hurricanes TO analyst")
conn.close()
# Reconnect with production user.
conn = psycopg2.connect(dbname='ihw', user='production', password='abc123')
cur = conn.cursor()
conn.autocommit = True
# Insert the data.
response = request.urlopen('https://dq-content.s3.amazonaws.com/251/storm_data.csv')
reader = csv.reader(io.TextIOWrapper(response))
# Skip the header.
_ = next(reader)
rows = []
for line in reader:
recorded_at = datetime(int(line[1]), int(line[2]), int(line[3]), hour=int(line[4][:2]), minute=int(line[4][2:-1]))
new_line = [line[0], recorded_at] + line[5:]
rows.append(
cur.mogrify(
"(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
new_line
).decode('utf-8')
)
cur.execute('INSERT INTO hurricanes VALUES ' + ",".join(rows))