-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmock_data_sample.py
122 lines (107 loc) · 3.48 KB
/
mock_data_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from secrets import choice
from faker import Faker
from faker import *
from faker.providers import BaseProvider
import csv
import datetime
from random import *
from random import choice
fake = Faker()
class MyProvider(BaseProvider):
__provider__ = "types"
__provider__ = "description"
__provider__ = "location_description"
def types(self):
types = [
"ASSAULT",
"BATTERY",
"BURGLARY",
"CRIMINAL DAMAGE",
"MOTOR VEHICLE THEFT",
"NARCOTICS",
"OFFENSE INVOLVING CHILDREN",
"OTHER OFFENSE",
]
return choice(types)
def description(self):
description = ["SIMPLE", "AGGREVATED", "RECKLESS CONDUCT", "UNLAWFUL"]
return choice(description)
def location_description(self):
location_description = [
"RESIDENCE",
"OFFICE",
"STREET",
"PARKING LOT",
"STORE",
"PARKING",
]
return choice(location_description)
fake.add_provider(MyProvider)
now = datetime.datetime.now()
now_date_time = now.strftime("%d/%m/%Y %H:%M:%S %p")
def datagenerate(records, headers):
with open("Crimes_2001_to_Present.csv", "a") as csvFile:
writer = csv.DictWriter(csvFile, fieldnames=headers)
# Only while creating a new file, we open in 'wt' mode and write header
# writer.writeheader()
for i in range(records):
location_lat = str(fake.latitude())
location_lon = str(fake.longitude())
location_co = "(" + location_lat + ", " + location_lon + ")"
writer.writerow(
{
"ID": i,
"Case_Number": fake.bothify(text="H?######", letters="PQRST"),
"Date": fake.date_time_this_year().strftime("%x %X")
+ " "
+ fake.am_pm(),
"Block": fake.street_address(),
"IUCR": fake.pyint(),
"Primary_Type": fake.types(),
"Description": fake.description(),
"Location_Description": fake.location_description(),
"Arrest": fake.boolean(chance_of_getting_true=25),
"Domestic": fake.boolean(chance_of_getting_true=25),
"Beat": fake.pyint(),
"District": fake.pyint(1, 25),
"Ward": fake.pyint(1, 50),
"Community_Area": fake.pyint(1, 75),
"FBI_Code": fake.pyint(1, 30),
"XCoordinate": fake.pyint(1000001, 9999999),
"YCoordinate": fake.pyint(1000001, 9999999),
"Year": fake.date_between_dates(
datetime.date(2000, 1, 1), datetime.date(2022, 1, 1)
).year,
"Updated_On": now_date_time,
"Latitude": location_lat,
"Longitude": location_lat,
"Location": location_co,
}
)
records = 100000
headers = [
"ID",
"Case_Number",
"Date",
"Block",
"IUCR",
"Primary_Type",
"Description",
"Location_Description",
"Arrest",
"Domestic",
"Beat",
"District",
"Ward",
"Community_Area",
"FBI_Code",
"XCoordinate",
"YCoordinate",
"Year",
"Updated_On",
"Latitude",
"Longitude",
"Location",
]
datagenerate(records, headers)
print("CSV generation complete!")