-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatagenerator.py
59 lines (44 loc) · 1.36 KB
/
datagenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
import os
import json
import duckdb
import random
import time
from datetime import datetime, timedelta
from memory_profiler import profile
def generateDate(start, end):
start = datetime.strptime(start, '%Y-%m-%d')
end = datetime.strptime(end, '%Y-%m-%d')
days = (end - start).days
date = start + timedelta(days=random.randint(0,days))
return date
def generateTimeSeries():
d = pd.date_range(start='2023/01/01', end='2023/12/31', freq='s')
return d
def generateSamples():
data = [{
'timestamp': str(i),
'sensorID': '1',
'temp': random.randint(0,1000)
} for i in generateTimeSeries().tolist()]
return data
def init():
db = duckdb.connect('sensordb.duckdb')
db.execute('CREATE OR REPLACE TABLE FACT_SENSOR (timestamp TIMESTAMP, sensorID STRING, temp INTEGER)')
return db
def append(db, df):
db.register('sample_df', df)
db.sql('INSERT INTO FACT_SENSOR SELECT * FROM sample_df')
if __name__ == '__main__':
db = init()
# for i in range(0,100000):
#print('Running Batch : ', 1)
samples = generateSamples()
df = pd.DataFrame(samples)
append(db, df)
db.sql("""
COPY (SELECT timestamp::DATE date, timestamp, temp FROM FACT_SENSOR)
TO
sensor_readings
(FORMAT PARQUET, PARTITION_BY (date), OVERWRITE_OR_IGNORE)
""")