import pandas as pd
import numpy as np
import os.path as op
IMAGE_WIDTH = {5: 15, 20: 60, 60: 180}
IMAGE_HEIGHT = {5: 32, 20: 64, 60: 96}
year = 2017
images = np.memmap(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_images.dat"), dtype=np.uint8, mode='r').reshape(
(-1, IMAGE_HEIGHT[20], IMAGE_WIDTH[20]))
print(images.shape)
images[0]
(67858, 64, 60)
memmap([[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., 0, 0, 0],
...,
[ 0, 255, 0, ..., 0, 0, 0],
[ 0, 255, 0, ..., 0, 0, 0],
[ 0, 255, 0, ..., 0, 255, 0]], dtype=uint8)
label_df = pd.read_feather(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_labels_w_delay.feather"))
label_df.head()
| Date | StockID | MarketCap | Ret_5d | Ret_20d | Ret_60d | Ret_month | EWMA_vol | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2017-01-31 | 10001 | 133078.0 | 4.370390e-07 | -0.000002 | -0.005954 | -0.000002 | 0.000450 |
| 1 | 2017-02-28 | 10001 | 133078.0 | 3.951997e-03 | 0.002795 | 0.009953 | 0.009953 | 0.000180 |
| 2 | 2017-03-31 | 10001 | 133604.0 | -7.874612e-03 | -0.015749 | 0.021723 | -0.015749 | 0.000064 |
| 3 | 2017-04-28 | 10001 | 131500.0 | 9.999880e-03 | 0.016001 | 0.038072 | 0.016001 | 0.000030 |
| 4 | 2017-05-31 | 10001 | 133604.0 | 4.370390e-07 | 0.021722 | NaN | 0.023703 | 0.000015 |
from matplotlib import pyplot as plt
for i in range(5):
print(i)
plt.imshow(images[i], cmap='gray')
plt.show()
0
1
2
3
4