diff --git a/Project Final/.ipynb_checkpoints/Project-checkpoint.ipynb b/Project Final/.ipynb_checkpoints/Project-checkpoint.ipynb new file mode 100644 index 0000000..3924940 --- /dev/null +++ b/Project Final/.ipynb_checkpoints/Project-checkpoint.ipynb @@ -0,0 +1,532 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Project Notebook\n", + "This is the full and complete notebook that takes in the data from NOAA and processes it into frames to be used in the PredNet architecture and produce a resulting prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Getting a list of files in raw data folder\n", + "filenames = os.listdir('D:/Nico/Desktop/processed_data')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "header_wanted = [\n", + " 'HOURLYVISIBILITY',\n", + " 'HOURLYDRYBULBTEMPC',\n", + " 'HOURLYWETBULBTEMPC',\n", + " 'HOURLYDewPointTempC',\n", + " 'HOURLYRelativeHumidity',\n", + " 'HOURLYWindSpeed',\n", + " 'HOURLYWindGustSpeed',\n", + " 'HOURLYStationPressure',\n", + " 'HOURLYPressureTendency',\n", + " 'HOURLYPressureChange',\n", + " 'HOURLYSeaLevelPressure',\n", + " 'HOURLYPrecip',\n", + " 'HOURLYAltimeterSetting']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "usecols = ['DATE','STATION'] + header_wanted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Loading all files into a pandas Dataframe\n", + "tqdm.pandas()\n", + "df = pd.concat([pd.read_csv('D:/Nico/Desktop/processed_data/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point all the data has been loaded into a single dataframe and any data changes have been made. The next step is to break the data up by WBAN and place in a 2D array at the appropriate grid cell. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stations = pd.read_csv(\"../Playground/stations_unique.csv\", usecols = ['STATION_ID', 'LON_SCALED', 'LAT_SCALED'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "height = 20\n", + "width = 40" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask = [([0] * width) for i in range(height)]\n", + "\n", + "wban_loc = dict(zip(stations.STATION_ID,zip(stations.LON_SCALED,stations.LAT_SCALED)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grid = [([pd.DataFrame()] * width) for i in range(height)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for key, value in tqdm(wban_loc.items()):\n", + " mask[value[1]][value[0]] = 1\n", + " grid[value[1]][value[0]] = df.loc[df.STATION == key]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#TODO Handle different sized data some stacks too short\n", + "def create_frames(data,height, width, depth):\n", + " days = []\n", + " frames = []\n", + " for i in tqdm(range(depth)):\n", + " frame = np.zeros((height,width,12))\n", + " for y in range(height):\n", + " for x in range(width):\n", + " if(not data[y][x].empty):\n", + " frame[y][x] = data[y][x].iloc[[i],1:13].values.flatten()\n", + " if((i+1)%24 != 0):\n", + " frames.append(frame)\n", + " else:\n", + " frames.append(frame)\n", + " days.append(frames)\n", + " frames = []\n", + " return days" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def average_grid_fill(mask,data, height, width):\n", + " \n", + " for i in range(height):\n", + " for j in range(width):\n", + " if(mask[i][j] != 1):\n", + " neighbors = get_neighbors(j,i,data)\n", + " data[i][j] = np.mean(neighbors)\n", + " \n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_neighbors(x,y,g):\n", + " neighbors = []\n", + " for i in [y-1,y,y+1]:\n", + " for j in [x-1,x,x+1]:\n", + " if(i >= 0 and j >= 0):\n", + " if(i != y or j != x ):\n", + " try:\n", + " neighbors.append(g[i][j])\n", + " except:\n", + " pass\n", + " return neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def store_sequence(frames):\n", + " import hickle as hkl\n", + " source_list = []\n", + " \n", + " for days in range(len(frames)):\n", + " for day in range(len(frames[days])):\n", + " source_list += '{}'.format(days)\n", + " \n", + " hkl.dump(frames, './data/train/x_train.hkl')\n", + " hkl.dump(source_list, './data/train/x_sources.hkl')\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Splits is a dictionary holding train, test, val\n", + "the values for train, test, and val are lists of tuples holding category and folder name\n", + "in the end each image gets a source associated with it\n", + "there is only one data and one source hickle dump for each of train test and val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "frames = create_frames(grid, height, width,504)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#TODO use loop to average each frame\n", + "for x in tqdm(range(len(frames))):\n", + " for y in range(len(frames[0])):\n", + " frames[x][y] = average_grid_fill(mask, frames[x][y], height, width )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_sequence(frames)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np_frames = np.array(frames)\n", + "np_frames.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_sequence(np_frames)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point I have processed the data and made it into discrete frames of data and it is time to run it through the PredNet architecture for training." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "np.random.seed(123)\n", + "from six.moves import cPickle\n", + "\n", + "from keras import backend as K\n", + "from keras.models import Model\n", + "from keras.layers import Input, Dense, Flatten\n", + "from keras.layers import LSTM\n", + "from keras.layers import TimeDistributed\n", + "from keras.callbacks import LearningRateScheduler, ModelCheckpoint\n", + "from keras.optimizers import Adam\n", + "\n", + "from prednet import PredNet\n", + "from data_utils import SequenceGenerator" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "WEIGHTS_DIR = './weights/'\n", + "DATA_DIR = './data/'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "save_model = True # if weights will be saved\n", + "weights_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_weights.hdf5') # where weights will be saved\n", + "json_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_model.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Data files\n", + "#TODO: Use the files from NOAA and process them into proper frames\n", + "train_file = os.path.join(DATA_DIR,'train/', 'x_train.hkl')\n", + "train_sources = os.path.join(DATA_DIR, 'train/', 'x_sources.hkl')\n", + "#val_file = os.path.join(DATA_DIR, 'X_val.hkl')\n", + "#val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Training parameters\n", + "nb_epoch = 1\n", + "batch_size = 4\n", + "samples_per_epoch = 500\n", + "N_seq_val = 100 # number of sequences to use for validation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Model parameters\n", + "n_channels, im_height, im_width = (12, 20, 40)\n", + "input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels)\n", + "stack_sizes = (n_channels, 48, 96)\n", + "R_stack_sizes = stack_sizes\n", + "A_filt_sizes = (3, 3)\n", + "Ahat_filt_sizes = (3, 3, 3)\n", + "R_filt_sizes = (3, 3, 3)\n", + "layer_loss_weights = np.array([1., 0., 0.]) # weighting for each layer in final loss; \"L_0\" model: [1, 0, 0, 0], \"L_all\": [1, 0.1, 0.1, 0.1]\n", + "layer_loss_weights = np.expand_dims(layer_loss_weights, 1)\n", + "nt = 24 # number of timesteps used for sequences in training\n", + "time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) # equally weight all timesteps except the first\n", + "time_loss_weights[0] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "prednet = PredNet(stack_sizes, R_stack_sizes,\n", + " A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,\n", + " output_mode='error', return_sequences=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = Input(shape=(nt,) + input_shape)\n", + "errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers)\n", + "errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer\n", + "errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt)\n", + "final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time\n", + "model = Model(inputs=inputs, outputs=final_errors)\n", + "model.compile(loss='mean_absolute_error', optimizer='adam')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "input_1 (InputLayer) (None, 24, 20, 40, 12) 0 \n", + "_________________________________________________________________\n", + "pred_net_1 (PredNet) (None, 24, 3) 1645548 \n", + "_________________________________________________________________\n", + "time_distributed_1 (TimeDist (None, 24, 1) 4 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 24) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 1) 25 \n", + "=================================================================\n", + "Total params: 1,645,577\n", + "Trainable params: 1,645,548\n", + "Non-trainable params: 29\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "truth = []\n", + "for i in range(20):\n", + " truth.append(np.random.randint(255,size=(1)))\n", + "output = np.array(truth)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001 # start with lr of 0.001 and then drop to 0.0001 after 75 epochs\n", + "callbacks = [LearningRateScheduler(lr_schedule)]\n", + "#history = model.fit(np_frames, output ,batch_size, nb_epoch, callbacks=callbacks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1\n" + ] + } + ], + "source": [ + "history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Project Final/Project.ipynb b/Project Final/Project.ipynb new file mode 100644 index 0000000..3924940 --- /dev/null +++ b/Project Final/Project.ipynb @@ -0,0 +1,532 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Project Notebook\n", + "This is the full and complete notebook that takes in the data from NOAA and processes it into frames to be used in the PredNet architecture and produce a resulting prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Getting a list of files in raw data folder\n", + "filenames = os.listdir('D:/Nico/Desktop/processed_data')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "header_wanted = [\n", + " 'HOURLYVISIBILITY',\n", + " 'HOURLYDRYBULBTEMPC',\n", + " 'HOURLYWETBULBTEMPC',\n", + " 'HOURLYDewPointTempC',\n", + " 'HOURLYRelativeHumidity',\n", + " 'HOURLYWindSpeed',\n", + " 'HOURLYWindGustSpeed',\n", + " 'HOURLYStationPressure',\n", + " 'HOURLYPressureTendency',\n", + " 'HOURLYPressureChange',\n", + " 'HOURLYSeaLevelPressure',\n", + " 'HOURLYPrecip',\n", + " 'HOURLYAltimeterSetting']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "usecols = ['DATE','STATION'] + header_wanted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Loading all files into a pandas Dataframe\n", + "tqdm.pandas()\n", + "df = pd.concat([pd.read_csv('D:/Nico/Desktop/processed_data/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point all the data has been loaded into a single dataframe and any data changes have been made. The next step is to break the data up by WBAN and place in a 2D array at the appropriate grid cell. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stations = pd.read_csv(\"../Playground/stations_unique.csv\", usecols = ['STATION_ID', 'LON_SCALED', 'LAT_SCALED'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "height = 20\n", + "width = 40" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask = [([0] * width) for i in range(height)]\n", + "\n", + "wban_loc = dict(zip(stations.STATION_ID,zip(stations.LON_SCALED,stations.LAT_SCALED)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grid = [([pd.DataFrame()] * width) for i in range(height)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for key, value in tqdm(wban_loc.items()):\n", + " mask[value[1]][value[0]] = 1\n", + " grid[value[1]][value[0]] = df.loc[df.STATION == key]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#TODO Handle different sized data some stacks too short\n", + "def create_frames(data,height, width, depth):\n", + " days = []\n", + " frames = []\n", + " for i in tqdm(range(depth)):\n", + " frame = np.zeros((height,width,12))\n", + " for y in range(height):\n", + " for x in range(width):\n", + " if(not data[y][x].empty):\n", + " frame[y][x] = data[y][x].iloc[[i],1:13].values.flatten()\n", + " if((i+1)%24 != 0):\n", + " frames.append(frame)\n", + " else:\n", + " frames.append(frame)\n", + " days.append(frames)\n", + " frames = []\n", + " return days" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def average_grid_fill(mask,data, height, width):\n", + " \n", + " for i in range(height):\n", + " for j in range(width):\n", + " if(mask[i][j] != 1):\n", + " neighbors = get_neighbors(j,i,data)\n", + " data[i][j] = np.mean(neighbors)\n", + " \n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_neighbors(x,y,g):\n", + " neighbors = []\n", + " for i in [y-1,y,y+1]:\n", + " for j in [x-1,x,x+1]:\n", + " if(i >= 0 and j >= 0):\n", + " if(i != y or j != x ):\n", + " try:\n", + " neighbors.append(g[i][j])\n", + " except:\n", + " pass\n", + " return neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def store_sequence(frames):\n", + " import hickle as hkl\n", + " source_list = []\n", + " \n", + " for days in range(len(frames)):\n", + " for day in range(len(frames[days])):\n", + " source_list += '{}'.format(days)\n", + " \n", + " hkl.dump(frames, './data/train/x_train.hkl')\n", + " hkl.dump(source_list, './data/train/x_sources.hkl')\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Splits is a dictionary holding train, test, val\n", + "the values for train, test, and val are lists of tuples holding category and folder name\n", + "in the end each image gets a source associated with it\n", + "there is only one data and one source hickle dump for each of train test and val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "frames = create_frames(grid, height, width,504)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#TODO use loop to average each frame\n", + "for x in tqdm(range(len(frames))):\n", + " for y in range(len(frames[0])):\n", + " frames[x][y] = average_grid_fill(mask, frames[x][y], height, width )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_sequence(frames)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np_frames = np.array(frames)\n", + "np_frames.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_sequence(np_frames)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point I have processed the data and made it into discrete frames of data and it is time to run it through the PredNet architecture for training." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "np.random.seed(123)\n", + "from six.moves import cPickle\n", + "\n", + "from keras import backend as K\n", + "from keras.models import Model\n", + "from keras.layers import Input, Dense, Flatten\n", + "from keras.layers import LSTM\n", + "from keras.layers import TimeDistributed\n", + "from keras.callbacks import LearningRateScheduler, ModelCheckpoint\n", + "from keras.optimizers import Adam\n", + "\n", + "from prednet import PredNet\n", + "from data_utils import SequenceGenerator" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "WEIGHTS_DIR = './weights/'\n", + "DATA_DIR = './data/'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "save_model = True # if weights will be saved\n", + "weights_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_weights.hdf5') # where weights will be saved\n", + "json_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_model.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Data files\n", + "#TODO: Use the files from NOAA and process them into proper frames\n", + "train_file = os.path.join(DATA_DIR,'train/', 'x_train.hkl')\n", + "train_sources = os.path.join(DATA_DIR, 'train/', 'x_sources.hkl')\n", + "#val_file = os.path.join(DATA_DIR, 'X_val.hkl')\n", + "#val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Training parameters\n", + "nb_epoch = 1\n", + "batch_size = 4\n", + "samples_per_epoch = 500\n", + "N_seq_val = 100 # number of sequences to use for validation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Model parameters\n", + "n_channels, im_height, im_width = (12, 20, 40)\n", + "input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels)\n", + "stack_sizes = (n_channels, 48, 96)\n", + "R_stack_sizes = stack_sizes\n", + "A_filt_sizes = (3, 3)\n", + "Ahat_filt_sizes = (3, 3, 3)\n", + "R_filt_sizes = (3, 3, 3)\n", + "layer_loss_weights = np.array([1., 0., 0.]) # weighting for each layer in final loss; \"L_0\" model: [1, 0, 0, 0], \"L_all\": [1, 0.1, 0.1, 0.1]\n", + "layer_loss_weights = np.expand_dims(layer_loss_weights, 1)\n", + "nt = 24 # number of timesteps used for sequences in training\n", + "time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) # equally weight all timesteps except the first\n", + "time_loss_weights[0] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "prednet = PredNet(stack_sizes, R_stack_sizes,\n", + " A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,\n", + " output_mode='error', return_sequences=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = Input(shape=(nt,) + input_shape)\n", + "errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers)\n", + "errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer\n", + "errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt)\n", + "final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time\n", + "model = Model(inputs=inputs, outputs=final_errors)\n", + "model.compile(loss='mean_absolute_error', optimizer='adam')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "input_1 (InputLayer) (None, 24, 20, 40, 12) 0 \n", + "_________________________________________________________________\n", + "pred_net_1 (PredNet) (None, 24, 3) 1645548 \n", + "_________________________________________________________________\n", + "time_distributed_1 (TimeDist (None, 24, 1) 4 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 24) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 1) 25 \n", + "=================================================================\n", + "Total params: 1,645,577\n", + "Trainable params: 1,645,548\n", + "Non-trainable params: 29\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "truth = []\n", + "for i in range(20):\n", + " truth.append(np.random.randint(255,size=(1)))\n", + "output = np.array(truth)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001 # start with lr of 0.001 and then drop to 0.0001 after 75 epochs\n", + "callbacks = [LearningRateScheduler(lr_schedule)]\n", + "#history = model.fit(np_frames, output ,batch_size, nb_epoch, callbacks=callbacks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1\n" + ] + } + ], + "source": [ + "history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Project Final/__pycache__/data_utils.cpython-36.pyc b/Project Final/__pycache__/data_utils.cpython-36.pyc new file mode 100644 index 0000000..a4084dd Binary files /dev/null and b/Project Final/__pycache__/data_utils.cpython-36.pyc differ diff --git a/Project Final/__pycache__/keras_utils.cpython-36.pyc b/Project Final/__pycache__/keras_utils.cpython-36.pyc new file mode 100644 index 0000000..3cc921a Binary files /dev/null and b/Project Final/__pycache__/keras_utils.cpython-36.pyc differ diff --git a/Project Final/__pycache__/prednet.cpython-36.pyc b/Project Final/__pycache__/prednet.cpython-36.pyc new file mode 100644 index 0000000..40e185f Binary files /dev/null and b/Project Final/__pycache__/prednet.cpython-36.pyc differ diff --git a/Project Final/data/train/x_sources.hkl b/Project Final/data/train/x_sources.hkl new file mode 100644 index 0000000..f0be695 Binary files /dev/null and b/Project Final/data/train/x_sources.hkl differ diff --git a/Project Final/data/train/x_train.hkl b/Project Final/data/train/x_train.hkl new file mode 100644 index 0000000..2806410 Binary files /dev/null and b/Project Final/data/train/x_train.hkl differ diff --git a/Project Final/data_utils.py b/Project Final/data_utils.py new file mode 100644 index 0000000..863ddbd --- /dev/null +++ b/Project Final/data_utils.py @@ -0,0 +1,66 @@ +import hickle as hkl +import numpy as np +from keras import backend as K +from keras.preprocessing.image import Iterator + +# Data generator that creates sequences for input into PredNet. +class SequenceGenerator(Iterator): + def __init__(self, data_file, source_file, nt, + batch_size=8, shuffle=False, seed=None, + output_mode='error', sequence_start_mode='all', N_seq=None, + data_format=K.image_data_format()): + self.X = hkl.load(data_file) # X will be like (n_images, nb_cols, nb_rows, nb_channels) + self.sources = hkl.load(source_file) # source for each image so when creating sequences can assure that consecutive frames are from same video + self.nt = nt + self.batch_size = batch_size + self.data_format = data_format + assert sequence_start_mode in {'all', 'unique'}, 'sequence_start_mode must be in {all, unique}' + self.sequence_start_mode = sequence_start_mode + assert output_mode in {'error', 'prediction'}, 'output_mode must be in {error, prediction}' + self.output_mode = output_mode + + if self.data_format == 'channels_first': + self.X = np.transpose(self.X, (0, 3, 1, 2)) + self.im_shape = self.X[0].shape + + if self.sequence_start_mode == 'all': # allow for any possible sequence, starting from any frame + self.possible_starts = np.array([i for i in range(self.X.shape[0] - self.nt) if self.sources[i] == self.sources[i + self.nt - 1]]) + elif self.sequence_start_mode == 'unique': #create sequences where each unique frame is in at most one sequence + curr_location = 0 + possible_starts = [] + while curr_location < self.X.shape[0] - self.nt + 1: + if self.sources[curr_location] == self.sources[curr_location + self.nt - 1]: + possible_starts.append(curr_location) + curr_location += self.nt + else: + curr_location += 1 + self.possible_starts = possible_starts + + if shuffle: + self.possible_starts = np.random.permutation(self.possible_starts) + if N_seq is not None and len(self.possible_starts) > N_seq: # select a subset of sequences if want to + self.possible_starts = self.possible_starts[:N_seq] + self.N_sequences = len(self.possible_starts) + super(SequenceGenerator, self).__init__(len(self.possible_starts), batch_size, shuffle, seed) + + def next(self): + with self.lock: + index_array, current_index, current_batch_size = next(self.index_generator) + batch_x = np.zeros((current_batch_size, self.nt) + self.im_shape, np.float32) + for i, idx in enumerate(index_array): + idx = self.possible_starts[idx] + batch_x[i] = self.preprocess(self.X[idx:idx+self.nt]) + if self.output_mode == 'error': # model outputs errors, so y should be zeros + batch_y = np.zeros(current_batch_size, np.float32) + elif self.output_mode == 'prediction': # output actual pixels + batch_y = batch_x + return batch_x, batch_y + + def preprocess(self, X): + return X.astype(np.float32) / 255 + + def create_all(self): + X_all = np.zeros((self.N_sequences, self.nt) + self.im_shape, np.float32) + for i, idx in enumerate(self.possible_starts): + X_all[i] = self.preprocess(self.X[idx:idx+self.nt]) + return X_all diff --git a/Project Final/keras_utils.py b/Project Final/keras_utils.py new file mode 100644 index 0000000..ededcc7 --- /dev/null +++ b/Project Final/keras_utils.py @@ -0,0 +1,58 @@ +import os +import numpy as np + +from keras import backend as K +from keras.legacy.interfaces import generate_legacy_interface, recurrent_args_preprocessor +from keras.models import model_from_json + +legacy_prednet_support = generate_legacy_interface( + allowed_positional_args=['stack_sizes', 'R_stack_sizes', + 'A_filt_sizes', 'Ahat_filt_sizes', 'R_filt_sizes'], + conversions=[('dim_ordering', 'data_format'), + ('consume_less', 'implementation')], + value_conversions={'dim_ordering': {'tf': 'channels_last', + 'th': 'channels_first', + 'default': None}, + 'consume_less': {'cpu': 0, + 'mem': 1, + 'gpu': 2}}, + preprocessor=recurrent_args_preprocessor) + +# Convert old Keras (1.2) json models and weights to Keras 2.0 +def convert_model_to_keras2(old_json_file, old_weights_file, new_json_file, new_weights_file): + from prednet import PredNet + # If using tensorflow, it doesn't allow you to load the old weights. + if K.backend() != 'theano': + os.environ['KERAS_BACKEND'] = backend + reload(K) + + f = open(old_json_file, 'r') + json_string = f.read() + f.close() + model = model_from_json(json_string, custom_objects = {'PredNet': PredNet}) + model.load_weights(old_weights_file) + + weights = model.layers[1].get_weights() + if weights[0].shape[0] == model.layers[1].stack_sizes[1]: + for i, w in enumerate(weights): + if w.ndim == 4: + weights[i] = np.transpose(w, (2, 3, 1, 0)) + model.set_weights(weights) + + model.save_weights(new_weights_file) + json_string = model.to_json() + with open(new_json_file, "w") as f: + f.write(json_string) + + +if __name__ == '__main__': + old_dir = './model_data/' + new_dir = './model_data_keras2/' + if not os.path.exists(new_dir): + os.mkdir(new_dir) + for w_tag in ['', '-Lall', '-extrapfinetuned']: + m_tag = '' if w_tag == '-Lall' else w_tag + convert_model_to_keras2(old_dir + 'prednet_kitti_model' + m_tag + '.json', + old_dir + 'prednet_kitti_weights' + w_tag + '.hdf5', + new_dir + 'prednet_kitti_model' + m_tag + '.json', + new_dir + 'prednet_kitti_weights' + w_tag + '.hdf5') diff --git a/Project Final/prednet.py b/Project Final/prednet.py new file mode 100644 index 0000000..cc7b012 --- /dev/null +++ b/Project Final/prednet.py @@ -0,0 +1,311 @@ +import numpy as np + +from keras import backend as K +from keras import activations +from keras.layers import Recurrent +from keras.layers import Conv2D, UpSampling2D, MaxPooling2D +from keras.engine import InputSpec +from keras_utils import legacy_prednet_support + +class PredNet(Recurrent): + '''PredNet architecture - Lotter 2016. + Stacked convolutional LSTM inspired by predictive coding principles. + + # Arguments + stack_sizes: number of channels in targets (A) and predictions (Ahat) in each layer of the architecture. + Length is the number of layers in the architecture. + First element is the number of channels in the input. + Ex. (3, 16, 32) would correspond to a 3 layer architecture that takes in RGB images and has 16 and 32 + channels in the second and third layers, respectively. + R_stack_sizes: number of channels in the representation (R) modules. + Length must equal length of stack_sizes, but the number of channels per layer can be different. + A_filt_sizes: filter sizes for the target (A) modules. + Has length of 1 - len(stack_sizes). + Ex. (3, 3) would mean that targets for layers 2 and 3 are computed by a 3x3 convolution of the errors (E) + from the layer below (followed by max-pooling) + Ahat_filt_sizes: filter sizes for the prediction (Ahat) modules. + Has length equal to length of stack_sizes. + Ex. (3, 3, 3) would mean that the predictions for each layer are computed by a 3x3 convolution of the + representation (R) modules at each layer. + R_filt_sizes: filter sizes for the representation (R) modules. + Has length equal to length of stack_sizes. + Corresponds to the filter sizes for all convolutions in the LSTM. + pixel_max: the maximum pixel value. + Used to clip the pixel-layer prediction. + error_activation: activation function for the error (E) units. + A_activation: activation function for the target (A) and prediction (A_hat) units. + LSTM_activation: activation function for the cell and hidden states of the LSTM. + LSTM_inner_activation: activation function for the gates in the LSTM. + output_mode: either 'error', 'prediction', 'all' or layer specification (ex. R2, see below). + Controls what is outputted by the PredNet. + If 'error', the mean response of the error (E) units of each layer will be outputted. + That is, the output shape will be (batch_size, nb_layers). + If 'prediction', the frame prediction will be outputted. + If 'all', the output will be the frame prediction concatenated with the mean layer errors. + The frame prediction is flattened before concatenation. + Nomenclature of 'all' is kept for backwards compatibility, but should not be confused with returning all of the layers of the model + For returning the features of a particular layer, output_mode should be of the form unit_type + layer_number. + For instance, to return the features of the LSTM "representational" units in the lowest layer, output_mode should be specificied as 'R0'. + The possible unit types are 'R', 'Ahat', 'A', and 'E' corresponding to the 'representation', 'prediction', 'target', and 'error' units respectively. + extrap_start_time: time step for which model will start extrapolating. + Starting at this time step, the prediction from the previous time step will be treated as the "actual" + data_format: 'channels_first' or 'channels_last'. + It defaults to the `image_data_format` value found in your + Keras config file at `~/.keras/keras.json`. + + # References + - [Deep predictive coding networks for video prediction and unsupervised learning](https://arxiv.org/abs/1605.08104) + - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) + - [Convolutional LSTM network: a machine learning approach for precipitation nowcasting](http://arxiv.org/abs/1506.04214) + - [Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects](http://www.nature.com/neuro/journal/v2/n1/pdf/nn0199_79.pdf) + ''' + @legacy_prednet_support + def __init__(self, stack_sizes, R_stack_sizes, + A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, + pixel_max=1., error_activation='relu', A_activation='relu', + LSTM_activation='tanh', LSTM_inner_activation='hard_sigmoid', + output_mode='error', extrap_start_time=None, + data_format=K.image_data_format(), **kwargs): + self.stack_sizes = stack_sizes + self.nb_layers = len(stack_sizes) + assert len(R_stack_sizes) == self.nb_layers, 'len(R_stack_sizes) must equal len(stack_sizes)' + self.R_stack_sizes = R_stack_sizes + assert len(A_filt_sizes) == (self.nb_layers - 1), 'len(A_filt_sizes) must equal len(stack_sizes) - 1' + self.A_filt_sizes = A_filt_sizes + assert len(Ahat_filt_sizes) == self.nb_layers, 'len(Ahat_filt_sizes) must equal len(stack_sizes)' + self.Ahat_filt_sizes = Ahat_filt_sizes + assert len(R_filt_sizes) == (self.nb_layers), 'len(R_filt_sizes) must equal len(stack_sizes)' + self.R_filt_sizes = R_filt_sizes + + self.pixel_max = pixel_max + self.error_activation = activations.get(error_activation) + self.A_activation = activations.get(A_activation) + self.LSTM_activation = activations.get(LSTM_activation) + self.LSTM_inner_activation = activations.get(LSTM_inner_activation) + + default_output_modes = ['prediction', 'error', 'all'] + layer_output_modes = [layer + str(n) for n in range(self.nb_layers) for layer in ['R', 'E', 'A', 'Ahat']] + assert output_mode in default_output_modes + layer_output_modes, 'Invalid output_mode: ' + str(output_mode) + self.output_mode = output_mode + if self.output_mode in layer_output_modes: + self.output_layer_type = self.output_mode[:-1] + self.output_layer_num = int(self.output_mode[-1]) + else: + self.output_layer_type = None + self.output_layer_num = None + self.extrap_start_time = extrap_start_time + + assert data_format in {'channels_last', 'channels_first'}, 'data_format must be in {channels_last, channels_first}' + self.data_format = data_format + self.channel_axis = -3 if data_format == 'channels_first' else -1 + self.row_axis = -2 if data_format == 'channels_first' else -3 + self.column_axis = -1 if data_format == 'channels_first' else -2 + super(PredNet, self).__init__(**kwargs) + self.input_spec = [InputSpec(ndim=5)] + + def compute_output_shape(self, input_shape): + if self.output_mode == 'prediction': + out_shape = input_shape[2:] + elif self.output_mode == 'error': + out_shape = (self.nb_layers,) + elif self.output_mode == 'all': + out_shape = (np.prod(input_shape[2:]) + self.nb_layers,) + else: + stack_str = 'R_stack_sizes' if self.output_layer_type == 'R' else 'stack_sizes' + stack_mult = 2 if self.output_layer_type == 'E' else 1 + out_stack_size = stack_mult * getattr(self, stack_str)[self.output_layer_num] + out_nb_row = input_shape[self.row_axis] / 2**self.output_layer_num + out_nb_col = input_shape[self.column_axis] / 2**self.output_layer_num + if self.data_format == 'channels_first': + out_shape = (out_stack_size, out_nb_row, out_nb_col) + else: + out_shape = (out_nb_row, out_nb_col, out_stack_size) + + if self.return_sequences: + return (input_shape[0], input_shape[1]) + out_shape + else: + return (input_shape[0],) + out_shape + + def get_initial_state(self, x): + input_shape = self.input_spec[0].shape + init_nb_row = input_shape[self.row_axis] + init_nb_col = input_shape[self.column_axis] + + base_initial_state = K.zeros_like(x) # (samples, timesteps) + image_shape + non_channel_axis = -1 if self.data_format == 'channels_first' else -2 + for _ in range(2): + base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) + base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) + + initial_states = [] + states_to_pass = ['r', 'c', 'e'] + nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} + if self.extrap_start_time is not None: + states_to_pass.append('ahat') # pass prediction in states so can use as actual for t+1 when extrapolating + nlayers_to_pass['ahat'] = 1 + for u in states_to_pass: + for l in range(nlayers_to_pass[u]): + ds_factor = 2 ** l + nb_row = init_nb_row // ds_factor + nb_col = init_nb_col // ds_factor + if u in ['r', 'c']: + stack_size = self.R_stack_sizes[l] + elif u == 'e': + stack_size = 2 * self.stack_sizes[l] + elif u == 'ahat': + stack_size = self.stack_sizes[l] + output_size = stack_size * nb_row * nb_col # flattened size + + reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) + initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) + if self.data_format == 'channels_first': + output_shp = (-1, stack_size, nb_row, nb_col) + else: + output_shp = (-1, nb_row, nb_col, stack_size) + initial_state = K.reshape(initial_state, output_shp) + initial_states += [initial_state] + + if K._BACKEND == 'theano': + from theano import tensor as T + # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. + # In our case, this is a problem when training on grayscale images, and the below line fixes it. + initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states] + + if self.extrap_start_time is not None: + initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')] # the last state will correspond to the current timestep + return initial_states + + def build(self, input_shape): + self.input_spec = [InputSpec(shape=input_shape)] + self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']} + + for l in range(self.nb_layers): + for c in ['i', 'f', 'c', 'o']: + act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation + self.conv_layers[c].append(Conv2D(self.R_stack_sizes[l], self.R_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + act = 'relu' if l == 0 else self.A_activation + self.conv_layers['ahat'].append(Conv2D(self.stack_sizes[l], self.Ahat_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + if l < self.nb_layers - 1: + self.conv_layers['a'].append(Conv2D(self.stack_sizes[l+1], self.A_filt_sizes[l], padding='same', activation=self.A_activation, data_format=self.data_format)) + + self.upsample = UpSampling2D(data_format=self.data_format) + self.pool = MaxPooling2D(data_format=self.data_format) + + self.trainable_weights = [] + nb_row, nb_col = (input_shape[-2], input_shape[-1]) if self.data_format == 'channels_first' else (input_shape[-3], input_shape[-2]) + for c in sorted(self.conv_layers.keys()): + for l in range(len(self.conv_layers[c])): + ds_factor = 2 ** l + if c == 'ahat': + nb_channels = self.R_stack_sizes[l] + elif c == 'a': + nb_channels = 2 * self.R_stack_sizes[l] + else: + nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[l] + if l < self.nb_layers - 1: + nb_channels += self.R_stack_sizes[l+1] + in_shape = (input_shape[0], nb_channels, nb_row // ds_factor, nb_col // ds_factor) + if self.data_format == 'channels_last': in_shape = (in_shape[0], in_shape[2], in_shape[3], in_shape[1]) + with K.name_scope('layer_' + c + '_' + str(l)): + self.conv_layers[c][l].build(in_shape) + self.trainable_weights += self.conv_layers[c][l].trainable_weights + + self.states = [None] * self.nb_layers*3 + + if self.extrap_start_time is not None: + self.t_extrap = K.variable(self.extrap_start_time, int if K.backend() != 'tensorflow' else 'int32') + self.states += [None] * 2 # [previous frame prediction, timestep] + + def step(self, a, states): + r_tm1 = states[:self.nb_layers] + c_tm1 = states[self.nb_layers:2*self.nb_layers] + e_tm1 = states[2*self.nb_layers:3*self.nb_layers] + + if self.extrap_start_time is not None: + t = states[-1] + a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual + + c = [] + r = [] + e = [] + + # Update R units starting from the top + for l in reversed(range(self.nb_layers)): + inputs = [r_tm1[l], e_tm1[l]] + if l < self.nb_layers - 1: + inputs.append(r_up) + + inputs = K.concatenate(inputs, axis=self.channel_axis) + i = self.conv_layers['i'][l].call(inputs) + f = self.conv_layers['f'][l].call(inputs) + o = self.conv_layers['o'][l].call(inputs) + _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) + _r = o * self.LSTM_activation(_c) + c.insert(0, _c) + r.insert(0, _r) + + if l > 0: + r_up = self.upsample.call(_r) + + # Update feedforward path starting from the bottom + for l in range(self.nb_layers): + ahat = self.conv_layers['ahat'][l].call(r[l]) + if l == 0: + ahat = K.minimum(ahat, self.pixel_max) + frame_prediction = ahat + + # compute errors + e_up = self.error_activation(ahat - a) + e_down = self.error_activation(a - ahat) + + e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) + + if self.output_layer_num == l: + if self.output_layer_type == 'A': + output = a + elif self.output_layer_type == 'Ahat': + output = ahat + elif self.output_layer_type == 'R': + output = r[l] + elif self.output_layer_type == 'E': + output = e[l] + + if l < self.nb_layers - 1: + a = self.conv_layers['a'][l].call(e[l]) + a = self.pool.call(a) # target for next layer + + if self.output_layer_type is None: + if self.output_mode == 'prediction': + output = frame_prediction + else: + for l in range(self.nb_layers): + layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) + all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) + if self.output_mode == 'error': + output = all_error + else: + output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) + + states = r + c + e + if self.extrap_start_time is not None: + states += [frame_prediction, t + 1] + return output, states + + def get_config(self): + config = {'stack_sizes': self.stack_sizes, + 'R_stack_sizes': self.R_stack_sizes, + 'A_filt_sizes': self.A_filt_sizes, + 'Ahat_filt_sizes': self.Ahat_filt_sizes, + 'R_filt_sizes': self.R_filt_sizes, + 'pixel_max': self.pixel_max, + 'error_activation': self.error_activation.__name__, + 'A_activation': self.A_activation.__name__, + 'LSTM_activation': self.LSTM_activation.__name__, + 'LSTM_inner_activation': self.LSTM_inner_activation.__name__, + 'data_format': self.data_format, + 'extrap_start_time': self.extrap_start_time, + 'output_mode': self.output_mode} + base_config = super(PredNet, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/preprocessing/.ipynb_checkpoints/preprocess_data-checkpoint.ipynb b/preprocessing/.ipynb_checkpoints/preprocess_data-checkpoint.ipynb new file mode 100644 index 0000000..239840f --- /dev/null +++ b/preprocessing/.ipynb_checkpoints/preprocess_data-checkpoint.ipynb @@ -0,0 +1,1788 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocess the raw data from NOAA\n", + "This notebook is setup to take in the CSV from NOAA and remove the unneccasary data. This will also seperate out each station for later positioning." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Getting a list of files in raw data folder\n", + "filenames = os.listdir('D:/Nico/Desktop/full_grid')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "header_wanted = [\n", + " 'HOURLYVISIBILITY',\n", + " 'HOURLYDRYBULBTEMPC',\n", + " 'HOURLYWETBULBTEMPC',\n", + " 'HOURLYDewPointTempC',\n", + " 'HOURLYRelativeHumidity',\n", + " 'HOURLYWindSpeed',\n", + " 'HOURLYWindGustSpeed',\n", + " 'HOURLYStationPressure',\n", + " 'HOURLYPressureTendency',\n", + " 'HOURLYPressureChange',\n", + " 'HOURLYSeaLevelPressure',\n", + " 'HOURLYPrecip',\n", + " 'HOURLYAltimeterSetting']" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "usecols = ['DATE','STATION'] + header_wanted" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████| 82/82 [03:04<00:00, 2.25s/it]\n" + ] + } + ], + "source": [ + "#Loading all files into a pandas Dataframe\n", + "tqdm.pandas()\n", + "df = pd.concat([pd.read_csv('D:/Nico/Desktop/full_grid/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#Getting the station names\n", + "wban = df['STATION'].unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_letters(headers,dataframes):\n", + " for i in tqdm(headers):\n", + " dataframes[i].replace(regex=True,inplace=True,to_replace=r'\\D',value=r'')\n", + " dataframes[i] = dataframes[i].apply(pd.to_numeric)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████| 13/13 [1:16:01<00:00, 350.87s/it]\n" + ] + } + ], + "source": [ + "remove_letters(header_wanted,df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 394/394 [10:28<00:00, 1.60s/it]\n" + ] + } + ], + "source": [ + "by_station_list = []\n", + "\n", + "for i in tqdm(wban):\n", + " by_station_list.append(df.loc[df.STATION == i])\n", + "del df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 50881 entries, 0 to 50880\n", + "Data columns (total 15 columns):\n", + "STATION 50881 non-null object\n", + "DATE 50881 non-null object\n", + "HOURLYVISIBILITY 50315 non-null float64\n", + "HOURLYDRYBULBTEMPC 43475 non-null float64\n", + "HOURLYWETBULBTEMPC 41842 non-null float64\n", + "HOURLYDewPointTempC 43475 non-null float64\n", + "HOURLYRelativeHumidity 43475 non-null float64\n", + "HOURLYWindSpeed 50380 non-null float64\n", + "HOURLYWindGustSpeed 3039 non-null float64\n", + "HOURLYStationPressure 48624 non-null float64\n", + "HOURLYPressureTendency 0 non-null float64\n", + "HOURLYPressureChange 0 non-null float64\n", + "HOURLYSeaLevelPressure 0 non-null float64\n", + "HOURLYPrecip 0 non-null float64\n", + "HOURLYAltimeterSetting 48624 non-null float64\n", + "dtypes: float64(13), object(2)\n", + "memory usage: 6.2+ MB\n" + ] + } + ], + "source": [ + "by_station_list[0].info()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 394/394 [00:05<00:00, 68.16it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(by_station_list))):\n", + " by_station_list[i] = by_station_list[i].set_index(pd.DatetimeIndex(by_station_list[i]['DATE']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "DatetimeIndex: 50881 entries, 2016-08-01 00:15:00 to 2018-07-31 23:59:00\n", + "Data columns (total 15 columns):\n", + "STATION 50881 non-null object\n", + "DATE 50881 non-null object\n", + "HOURLYVISIBILITY 50315 non-null float64\n", + "HOURLYDRYBULBTEMPC 43475 non-null float64\n", + "HOURLYWETBULBTEMPC 41842 non-null float64\n", + "HOURLYDewPointTempC 43475 non-null float64\n", + "HOURLYRelativeHumidity 43475 non-null float64\n", + "HOURLYWindSpeed 50380 non-null float64\n", + "HOURLYWindGustSpeed 3039 non-null float64\n", + "HOURLYStationPressure 48624 non-null float64\n", + "HOURLYPressureTendency 0 non-null float64\n", + "HOURLYPressureChange 0 non-null float64\n", + "HOURLYSeaLevelPressure 0 non-null float64\n", + "HOURLYPrecip 0 non-null float64\n", + "HOURLYAltimeterSetting 48624 non-null float64\n", + "dtypes: float64(13), object(2)\n", + "memory usage: 6.2+ MB\n" + ] + } + ], + "source": [ + "by_station_list[0].info()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 394/394 [00:04<00:00, 80.90it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(by_station_list))):\n", + " by_station_list[i] = by_station_list[i].resample('60T').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "wban_list = []\n", + "for x in wban:\n", + " wban_list.append(x[:4] + x[5:])" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'WBAN00445'" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wban_list[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HOURLYVISIBILITYHOURLYDRYBULBTEMPCHOURLYWETBULBTEMPCHOURLYDewPointTempCHOURLYRelativeHumidityHOURLYWindSpeedHOURLYWindGustSpeedHOURLYStationPressureHOURLYPressureTendencyHOURLYPressureChangeHOURLYSeaLevelPressureHOURLYPrecipHOURLYAltimeterSetting
DATE
2016-08-01 00:00:001000.000000254.00000025.600000254.000000100.0000003.000000NaN2992.000000NaNNaNNaNNaN3004.000000
2016-08-01 01:00:001000.000000246.33333324.600000246.333333100.0000001.000000NaN2991.333333NaNNaNNaNNaN3003.333333
2016-08-01 02:00:001000.000000NaNNaNNaNNaN2.000000NaN2992.000000NaNNaNNaNNaN3004.000000
2016-08-01 03:00:001000.000000NaNNaNNaNNaN0.000000NaN2992.333333NaNNaNNaNNaN3004.333333
2016-08-01 04:00:001000.000000NaNNaNNaNNaN0.000000NaN2994.000000NaNNaNNaNNaN3006.000000
2016-08-01 05:00:001000.000000NaNNaNNaNNaN0.000000NaN2995.666667NaNNaNNaNNaN3007.666667
2016-08-01 06:00:001000.000000NaNNaNNaNNaN2.000000NaN2997.333333NaNNaNNaNNaN3009.333333
2016-08-01 07:00:001000.000000253.00000024.600000242.66666794.3333333.666667NaN2998.000000NaNNaNNaNNaN3010.000000
2016-08-01 08:00:001000.000000274.00000024.933333238.33333381.0000001.666667NaN2998.000000NaNNaNNaNNaN3010.000000
2016-08-01 09:00:001000.000000296.33333325.066667231.33333368.0000002.000000NaN2998.000000NaNNaNNaNNaN3010.000000
2016-08-01 10:00:001000.000000310.00000025.366667229.66666762.3333331.000000NaN2997.666667NaNNaNNaNNaN3009.666667
2016-08-01 11:00:001000.000000327.33333325.233333221.33333354.0000002.666667NaN2995.333333NaNNaNNaNNaN3007.333333
2016-08-01 12:00:001000.000000333.00000025.000000214.00000050.0000003.666667NaN2993.333333NaNNaNNaNNaN3005.333333
2016-08-01 13:00:001000.000000338.66666725.033333213.33333348.0000003.666667NaN2992.333333NaNNaNNaNNaN3004.333333
2016-08-01 14:00:001000.000000336.33333325.333333219.00000050.3333335.666667NaN2991.000000NaNNaNNaNNaN3003.000000
2016-08-01 15:00:001000.000000329.66666725.066667215.33333351.33333310.000000NaN2990.000000NaNNaNNaNNaN3002.000000
2016-08-01 16:00:001000.000000307.00000024.566667218.66666759.3333337.000000NaN2990.666667NaNNaNNaNNaN3002.666667
2016-08-01 17:00:001000.000000290.66666724.200000219.66666765.6666673.666667NaN2991.000000NaNNaNNaNNaN3003.000000
2016-08-01 18:00:001000.000000278.00000023.966667222.00000071.6666673.000000NaN2993.333333NaNNaNNaNNaN3005.333333
2016-08-01 19:00:001000.000000264.33333323.633333223.66666778.6666670.000000NaN2992.666667NaNNaNNaNNaN3004.666667
2016-08-01 20:00:001000.000000256.33333323.933333232.33333386.6666671.000000NaN2994.666667NaNNaNNaNNaN3006.666667
2016-08-01 21:00:001000.000000247.33333324.600000247.333333100.0000002.000000NaN2996.000000NaNNaNNaNNaN3008.000000
2016-08-01 22:00:001000.000000244.00000024.400000244.000000100.0000000.000000NaN2995.000000NaNNaNNaNNaN3007.000000
2016-08-01 23:00:001000.000000NaNNaNNaNNaN0.000000NaN2995.000000NaNNaNNaNNaN3007.000000
2016-08-02 00:00:001000.000000NaNNaNNaNNaN0.000000NaN2994.000000NaNNaNNaNNaN3006.000000
2016-08-02 01:00:001000.000000NaNNaNNaNNaN0.000000NaN2994.666667NaNNaNNaNNaN3006.666667
2016-08-02 02:00:001000.000000NaNNaNNaNNaN1.000000NaN2995.000000NaNNaNNaNNaN3007.000000
2016-08-02 03:00:001000.000000NaNNaNNaNNaN0.000000NaN2995.000000NaNNaNNaNNaN3007.000000
2016-08-02 04:00:00800.000000NaNNaNNaNNaN0.000000NaN2995.000000NaNNaNNaNNaN3007.000000
2016-08-02 05:00:00700.000000NaNNaNNaNNaN0.000000NaN2996.000000NaNNaNNaNNaN3008.000000
..........................................
2018-07-30 18:00:001000.000000266.00000022.000000196.00000065.3333336.33333324.02980.666667NaNNaNNaNNaN2992.666667
2018-07-30 19:00:001000.000000245.00000021.400000198.66666775.3333333.000000NaN2982.666667NaNNaNNaNNaN2994.666667
2018-07-30 20:00:001000.000000245.00000021.900000205.66666778.3333331.666667NaN2984.000000NaNNaNNaNNaN2996.000000
2018-07-30 21:00:001000.000000247.33333322.266667210.33333379.6666671.000000NaN2984.333333NaNNaNNaNNaN2996.333333
2018-07-30 22:00:001000.000000246.66666722.266667209.66666779.6666673.666667NaN2985.666667NaNNaNNaNNaN2997.666667
2018-07-30 23:00:001000.000000242.33333322.133333210.33333382.3333332.000000NaN2984.666667NaNNaNNaNNaN2996.666667
2018-07-31 00:00:001000.000000236.66666721.933333211.66666785.6666670.000000NaN2984.000000NaNNaNNaNNaN2996.000000
2018-07-31 01:00:001000.000000231.33333321.833333211.33333388.3333330.000000NaN2982.666667NaNNaNNaNNaN2994.666667
2018-07-31 02:00:00900.000000227.00000021.700000211.66666791.0000003.333333NaN2982.000000NaNNaNNaNNaN2994.000000
2018-07-31 03:00:00700.000000225.66666721.566667211.00000091.6666672.000000NaN2982.666667NaNNaNNaNNaN2994.666667
2018-07-31 04:00:00900.000000224.66666721.500000210.66666792.0000001.000000NaN2984.666667NaNNaNNaNNaN2996.666667
2018-07-31 05:00:00633.333333221.00000021.233333208.33333392.6666670.000000NaN2986.333333NaNNaNNaNNaN2998.333333
2018-07-31 06:00:00800.000000229.33333321.900000214.00000091.0000001.666667NaN2986.666667NaNNaNNaNNaN2998.666667
2018-07-31 07:00:001000.000000248.33333322.700000217.66666783.0000003.666667NaN2987.000000NaNNaNNaNNaN2999.000000
2018-07-31 08:00:001000.000000272.66666723.800000223.66666774.6666674.000000NaN2987.000000NaNNaNNaNNaN2999.000000
2018-07-31 09:00:001000.000000297.66666724.300000219.00000062.6666675.666667NaN2986.333333NaNNaNNaNNaN2998.333333
2018-07-31 10:00:00900.000000306.00000024.233333212.66666757.6666676.333333NaN2985.333333NaNNaNNaNNaN2997.333333
2018-07-31 11:00:001000.000000315.66666724.100000206.00000052.3333335.000000NaN2984.000000NaNNaNNaNNaN2996.000000
2018-07-31 12:00:00800.000000313.66666724.366667211.66666755.0000005.000000NaN2980.666667NaNNaNNaNNaN2992.666667
2018-07-31 13:00:00900.000000285.33333324.100000221.66666768.6666675.000000NaN2978.333333NaNNaNNaNNaN2990.333333
2018-07-31 14:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 15:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 16:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 17:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 18:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 19:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 20:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 21:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 22:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2018-07-31 23:00:001000.000000NaNNaNNaNNaN0.000000NaN2987.666667NaNNaNNaNNaN2999.666667
\n", + "

17520 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " HOURLYVISIBILITY HOURLYDRYBULBTEMPC HOURLYWETBULBTEMPC \\\n", + "DATE \n", + "2016-08-01 00:00:00 1000.000000 254.000000 25.600000 \n", + "2016-08-01 01:00:00 1000.000000 246.333333 24.600000 \n", + "2016-08-01 02:00:00 1000.000000 NaN NaN \n", + "2016-08-01 03:00:00 1000.000000 NaN NaN \n", + "2016-08-01 04:00:00 1000.000000 NaN NaN \n", + "2016-08-01 05:00:00 1000.000000 NaN NaN \n", + "2016-08-01 06:00:00 1000.000000 NaN NaN \n", + "2016-08-01 07:00:00 1000.000000 253.000000 24.600000 \n", + "2016-08-01 08:00:00 1000.000000 274.000000 24.933333 \n", + "2016-08-01 09:00:00 1000.000000 296.333333 25.066667 \n", + "2016-08-01 10:00:00 1000.000000 310.000000 25.366667 \n", + "2016-08-01 11:00:00 1000.000000 327.333333 25.233333 \n", + "2016-08-01 12:00:00 1000.000000 333.000000 25.000000 \n", + "2016-08-01 13:00:00 1000.000000 338.666667 25.033333 \n", + "2016-08-01 14:00:00 1000.000000 336.333333 25.333333 \n", + "2016-08-01 15:00:00 1000.000000 329.666667 25.066667 \n", + "2016-08-01 16:00:00 1000.000000 307.000000 24.566667 \n", + "2016-08-01 17:00:00 1000.000000 290.666667 24.200000 \n", + "2016-08-01 18:00:00 1000.000000 278.000000 23.966667 \n", + "2016-08-01 19:00:00 1000.000000 264.333333 23.633333 \n", + "2016-08-01 20:00:00 1000.000000 256.333333 23.933333 \n", + "2016-08-01 21:00:00 1000.000000 247.333333 24.600000 \n", + "2016-08-01 22:00:00 1000.000000 244.000000 24.400000 \n", + "2016-08-01 23:00:00 1000.000000 NaN NaN \n", + "2016-08-02 00:00:00 1000.000000 NaN NaN \n", + "2016-08-02 01:00:00 1000.000000 NaN NaN \n", + "2016-08-02 02:00:00 1000.000000 NaN NaN \n", + "2016-08-02 03:00:00 1000.000000 NaN NaN \n", + "2016-08-02 04:00:00 800.000000 NaN NaN \n", + "2016-08-02 05:00:00 700.000000 NaN NaN \n", + "... ... ... ... \n", + "2018-07-30 18:00:00 1000.000000 266.000000 22.000000 \n", + "2018-07-30 19:00:00 1000.000000 245.000000 21.400000 \n", + "2018-07-30 20:00:00 1000.000000 245.000000 21.900000 \n", + "2018-07-30 21:00:00 1000.000000 247.333333 22.266667 \n", + "2018-07-30 22:00:00 1000.000000 246.666667 22.266667 \n", + "2018-07-30 23:00:00 1000.000000 242.333333 22.133333 \n", + "2018-07-31 00:00:00 1000.000000 236.666667 21.933333 \n", + "2018-07-31 01:00:00 1000.000000 231.333333 21.833333 \n", + "2018-07-31 02:00:00 900.000000 227.000000 21.700000 \n", + "2018-07-31 03:00:00 700.000000 225.666667 21.566667 \n", + "2018-07-31 04:00:00 900.000000 224.666667 21.500000 \n", + "2018-07-31 05:00:00 633.333333 221.000000 21.233333 \n", + "2018-07-31 06:00:00 800.000000 229.333333 21.900000 \n", + "2018-07-31 07:00:00 1000.000000 248.333333 22.700000 \n", + "2018-07-31 08:00:00 1000.000000 272.666667 23.800000 \n", + "2018-07-31 09:00:00 1000.000000 297.666667 24.300000 \n", + "2018-07-31 10:00:00 900.000000 306.000000 24.233333 \n", + "2018-07-31 11:00:00 1000.000000 315.666667 24.100000 \n", + "2018-07-31 12:00:00 800.000000 313.666667 24.366667 \n", + "2018-07-31 13:00:00 900.000000 285.333333 24.100000 \n", + "2018-07-31 14:00:00 NaN NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN NaN \n", + "2018-07-31 23:00:00 1000.000000 NaN NaN \n", + "\n", + " HOURLYDewPointTempC HOURLYRelativeHumidity \\\n", + "DATE \n", + "2016-08-01 00:00:00 254.000000 100.000000 \n", + "2016-08-01 01:00:00 246.333333 100.000000 \n", + "2016-08-01 02:00:00 NaN NaN \n", + "2016-08-01 03:00:00 NaN NaN \n", + "2016-08-01 04:00:00 NaN NaN \n", + "2016-08-01 05:00:00 NaN NaN \n", + "2016-08-01 06:00:00 NaN NaN \n", + "2016-08-01 07:00:00 242.666667 94.333333 \n", + "2016-08-01 08:00:00 238.333333 81.000000 \n", + "2016-08-01 09:00:00 231.333333 68.000000 \n", + "2016-08-01 10:00:00 229.666667 62.333333 \n", + "2016-08-01 11:00:00 221.333333 54.000000 \n", + "2016-08-01 12:00:00 214.000000 50.000000 \n", + "2016-08-01 13:00:00 213.333333 48.000000 \n", + "2016-08-01 14:00:00 219.000000 50.333333 \n", + "2016-08-01 15:00:00 215.333333 51.333333 \n", + "2016-08-01 16:00:00 218.666667 59.333333 \n", + "2016-08-01 17:00:00 219.666667 65.666667 \n", + "2016-08-01 18:00:00 222.000000 71.666667 \n", + "2016-08-01 19:00:00 223.666667 78.666667 \n", + "2016-08-01 20:00:00 232.333333 86.666667 \n", + "2016-08-01 21:00:00 247.333333 100.000000 \n", + "2016-08-01 22:00:00 244.000000 100.000000 \n", + "2016-08-01 23:00:00 NaN NaN \n", + "2016-08-02 00:00:00 NaN NaN \n", + "2016-08-02 01:00:00 NaN NaN \n", + "2016-08-02 02:00:00 NaN NaN \n", + "2016-08-02 03:00:00 NaN NaN \n", + "2016-08-02 04:00:00 NaN NaN \n", + "2016-08-02 05:00:00 NaN NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 196.000000 65.333333 \n", + "2018-07-30 19:00:00 198.666667 75.333333 \n", + "2018-07-30 20:00:00 205.666667 78.333333 \n", + "2018-07-30 21:00:00 210.333333 79.666667 \n", + "2018-07-30 22:00:00 209.666667 79.666667 \n", + "2018-07-30 23:00:00 210.333333 82.333333 \n", + "2018-07-31 00:00:00 211.666667 85.666667 \n", + "2018-07-31 01:00:00 211.333333 88.333333 \n", + "2018-07-31 02:00:00 211.666667 91.000000 \n", + "2018-07-31 03:00:00 211.000000 91.666667 \n", + "2018-07-31 04:00:00 210.666667 92.000000 \n", + "2018-07-31 05:00:00 208.333333 92.666667 \n", + "2018-07-31 06:00:00 214.000000 91.000000 \n", + "2018-07-31 07:00:00 217.666667 83.000000 \n", + "2018-07-31 08:00:00 223.666667 74.666667 \n", + "2018-07-31 09:00:00 219.000000 62.666667 \n", + "2018-07-31 10:00:00 212.666667 57.666667 \n", + "2018-07-31 11:00:00 206.000000 52.333333 \n", + "2018-07-31 12:00:00 211.666667 55.000000 \n", + "2018-07-31 13:00:00 221.666667 68.666667 \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 NaN NaN \n", + "\n", + " HOURLYWindSpeed HOURLYWindGustSpeed \\\n", + "DATE \n", + "2016-08-01 00:00:00 3.000000 NaN \n", + "2016-08-01 01:00:00 1.000000 NaN \n", + "2016-08-01 02:00:00 2.000000 NaN \n", + "2016-08-01 03:00:00 0.000000 NaN \n", + "2016-08-01 04:00:00 0.000000 NaN \n", + "2016-08-01 05:00:00 0.000000 NaN \n", + "2016-08-01 06:00:00 2.000000 NaN \n", + "2016-08-01 07:00:00 3.666667 NaN \n", + "2016-08-01 08:00:00 1.666667 NaN \n", + "2016-08-01 09:00:00 2.000000 NaN \n", + "2016-08-01 10:00:00 1.000000 NaN \n", + "2016-08-01 11:00:00 2.666667 NaN \n", + "2016-08-01 12:00:00 3.666667 NaN \n", + "2016-08-01 13:00:00 3.666667 NaN \n", + "2016-08-01 14:00:00 5.666667 NaN \n", + "2016-08-01 15:00:00 10.000000 NaN \n", + "2016-08-01 16:00:00 7.000000 NaN \n", + "2016-08-01 17:00:00 3.666667 NaN \n", + "2016-08-01 18:00:00 3.000000 NaN \n", + "2016-08-01 19:00:00 0.000000 NaN \n", + "2016-08-01 20:00:00 1.000000 NaN \n", + "2016-08-01 21:00:00 2.000000 NaN \n", + "2016-08-01 22:00:00 0.000000 NaN \n", + "2016-08-01 23:00:00 0.000000 NaN \n", + "2016-08-02 00:00:00 0.000000 NaN \n", + "2016-08-02 01:00:00 0.000000 NaN \n", + "2016-08-02 02:00:00 1.000000 NaN \n", + "2016-08-02 03:00:00 0.000000 NaN \n", + "2016-08-02 04:00:00 0.000000 NaN \n", + "2016-08-02 05:00:00 0.000000 NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 6.333333 24.0 \n", + "2018-07-30 19:00:00 3.000000 NaN \n", + "2018-07-30 20:00:00 1.666667 NaN \n", + "2018-07-30 21:00:00 1.000000 NaN \n", + "2018-07-30 22:00:00 3.666667 NaN \n", + "2018-07-30 23:00:00 2.000000 NaN \n", + "2018-07-31 00:00:00 0.000000 NaN \n", + "2018-07-31 01:00:00 0.000000 NaN \n", + "2018-07-31 02:00:00 3.333333 NaN \n", + "2018-07-31 03:00:00 2.000000 NaN \n", + "2018-07-31 04:00:00 1.000000 NaN \n", + "2018-07-31 05:00:00 0.000000 NaN \n", + "2018-07-31 06:00:00 1.666667 NaN \n", + "2018-07-31 07:00:00 3.666667 NaN \n", + "2018-07-31 08:00:00 4.000000 NaN \n", + "2018-07-31 09:00:00 5.666667 NaN \n", + "2018-07-31 10:00:00 6.333333 NaN \n", + "2018-07-31 11:00:00 5.000000 NaN \n", + "2018-07-31 12:00:00 5.000000 NaN \n", + "2018-07-31 13:00:00 5.000000 NaN \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 0.000000 NaN \n", + "\n", + " HOURLYStationPressure HOURLYPressureTendency \\\n", + "DATE \n", + "2016-08-01 00:00:00 2992.000000 NaN \n", + "2016-08-01 01:00:00 2991.333333 NaN \n", + "2016-08-01 02:00:00 2992.000000 NaN \n", + "2016-08-01 03:00:00 2992.333333 NaN \n", + "2016-08-01 04:00:00 2994.000000 NaN \n", + "2016-08-01 05:00:00 2995.666667 NaN \n", + "2016-08-01 06:00:00 2997.333333 NaN \n", + "2016-08-01 07:00:00 2998.000000 NaN \n", + "2016-08-01 08:00:00 2998.000000 NaN \n", + "2016-08-01 09:00:00 2998.000000 NaN \n", + "2016-08-01 10:00:00 2997.666667 NaN \n", + "2016-08-01 11:00:00 2995.333333 NaN \n", + "2016-08-01 12:00:00 2993.333333 NaN \n", + "2016-08-01 13:00:00 2992.333333 NaN \n", + "2016-08-01 14:00:00 2991.000000 NaN \n", + "2016-08-01 15:00:00 2990.000000 NaN \n", + "2016-08-01 16:00:00 2990.666667 NaN \n", + "2016-08-01 17:00:00 2991.000000 NaN \n", + "2016-08-01 18:00:00 2993.333333 NaN \n", + "2016-08-01 19:00:00 2992.666667 NaN \n", + "2016-08-01 20:00:00 2994.666667 NaN \n", + "2016-08-01 21:00:00 2996.000000 NaN \n", + "2016-08-01 22:00:00 2995.000000 NaN \n", + "2016-08-01 23:00:00 2995.000000 NaN \n", + "2016-08-02 00:00:00 2994.000000 NaN \n", + "2016-08-02 01:00:00 2994.666667 NaN \n", + "2016-08-02 02:00:00 2995.000000 NaN \n", + "2016-08-02 03:00:00 2995.000000 NaN \n", + "2016-08-02 04:00:00 2995.000000 NaN \n", + "2016-08-02 05:00:00 2996.000000 NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 2980.666667 NaN \n", + "2018-07-30 19:00:00 2982.666667 NaN \n", + "2018-07-30 20:00:00 2984.000000 NaN \n", + "2018-07-30 21:00:00 2984.333333 NaN \n", + "2018-07-30 22:00:00 2985.666667 NaN \n", + "2018-07-30 23:00:00 2984.666667 NaN \n", + "2018-07-31 00:00:00 2984.000000 NaN \n", + "2018-07-31 01:00:00 2982.666667 NaN \n", + "2018-07-31 02:00:00 2982.000000 NaN \n", + "2018-07-31 03:00:00 2982.666667 NaN \n", + "2018-07-31 04:00:00 2984.666667 NaN \n", + "2018-07-31 05:00:00 2986.333333 NaN \n", + "2018-07-31 06:00:00 2986.666667 NaN \n", + "2018-07-31 07:00:00 2987.000000 NaN \n", + "2018-07-31 08:00:00 2987.000000 NaN \n", + "2018-07-31 09:00:00 2986.333333 NaN \n", + "2018-07-31 10:00:00 2985.333333 NaN \n", + "2018-07-31 11:00:00 2984.000000 NaN \n", + "2018-07-31 12:00:00 2980.666667 NaN \n", + "2018-07-31 13:00:00 2978.333333 NaN \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 2987.666667 NaN \n", + "\n", + " HOURLYPressureChange HOURLYSeaLevelPressure \\\n", + "DATE \n", + "2016-08-01 00:00:00 NaN NaN \n", + "2016-08-01 01:00:00 NaN NaN \n", + "2016-08-01 02:00:00 NaN NaN \n", + "2016-08-01 03:00:00 NaN NaN \n", + "2016-08-01 04:00:00 NaN NaN \n", + "2016-08-01 05:00:00 NaN NaN \n", + "2016-08-01 06:00:00 NaN NaN \n", + "2016-08-01 07:00:00 NaN NaN \n", + "2016-08-01 08:00:00 NaN NaN \n", + "2016-08-01 09:00:00 NaN NaN \n", + "2016-08-01 10:00:00 NaN NaN \n", + "2016-08-01 11:00:00 NaN NaN \n", + "2016-08-01 12:00:00 NaN NaN \n", + "2016-08-01 13:00:00 NaN NaN \n", + "2016-08-01 14:00:00 NaN NaN \n", + "2016-08-01 15:00:00 NaN NaN \n", + "2016-08-01 16:00:00 NaN NaN \n", + "2016-08-01 17:00:00 NaN NaN \n", + "2016-08-01 18:00:00 NaN NaN \n", + "2016-08-01 19:00:00 NaN NaN \n", + "2016-08-01 20:00:00 NaN NaN \n", + "2016-08-01 21:00:00 NaN NaN \n", + "2016-08-01 22:00:00 NaN NaN \n", + "2016-08-01 23:00:00 NaN NaN \n", + "2016-08-02 00:00:00 NaN NaN \n", + "2016-08-02 01:00:00 NaN NaN \n", + "2016-08-02 02:00:00 NaN NaN \n", + "2016-08-02 03:00:00 NaN NaN \n", + "2016-08-02 04:00:00 NaN NaN \n", + "2016-08-02 05:00:00 NaN NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 NaN NaN \n", + "2018-07-30 19:00:00 NaN NaN \n", + "2018-07-30 20:00:00 NaN NaN \n", + "2018-07-30 21:00:00 NaN NaN \n", + "2018-07-30 22:00:00 NaN NaN \n", + "2018-07-30 23:00:00 NaN NaN \n", + "2018-07-31 00:00:00 NaN NaN \n", + "2018-07-31 01:00:00 NaN NaN \n", + "2018-07-31 02:00:00 NaN NaN \n", + "2018-07-31 03:00:00 NaN NaN \n", + "2018-07-31 04:00:00 NaN NaN \n", + "2018-07-31 05:00:00 NaN NaN \n", + "2018-07-31 06:00:00 NaN NaN \n", + "2018-07-31 07:00:00 NaN NaN \n", + "2018-07-31 08:00:00 NaN NaN \n", + "2018-07-31 09:00:00 NaN NaN \n", + "2018-07-31 10:00:00 NaN NaN \n", + "2018-07-31 11:00:00 NaN NaN \n", + "2018-07-31 12:00:00 NaN NaN \n", + "2018-07-31 13:00:00 NaN NaN \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 NaN NaN \n", + "\n", + " HOURLYPrecip HOURLYAltimeterSetting \n", + "DATE \n", + "2016-08-01 00:00:00 NaN 3004.000000 \n", + "2016-08-01 01:00:00 NaN 3003.333333 \n", + "2016-08-01 02:00:00 NaN 3004.000000 \n", + "2016-08-01 03:00:00 NaN 3004.333333 \n", + "2016-08-01 04:00:00 NaN 3006.000000 \n", + "2016-08-01 05:00:00 NaN 3007.666667 \n", + "2016-08-01 06:00:00 NaN 3009.333333 \n", + "2016-08-01 07:00:00 NaN 3010.000000 \n", + "2016-08-01 08:00:00 NaN 3010.000000 \n", + "2016-08-01 09:00:00 NaN 3010.000000 \n", + "2016-08-01 10:00:00 NaN 3009.666667 \n", + "2016-08-01 11:00:00 NaN 3007.333333 \n", + "2016-08-01 12:00:00 NaN 3005.333333 \n", + "2016-08-01 13:00:00 NaN 3004.333333 \n", + "2016-08-01 14:00:00 NaN 3003.000000 \n", + "2016-08-01 15:00:00 NaN 3002.000000 \n", + "2016-08-01 16:00:00 NaN 3002.666667 \n", + "2016-08-01 17:00:00 NaN 3003.000000 \n", + "2016-08-01 18:00:00 NaN 3005.333333 \n", + "2016-08-01 19:00:00 NaN 3004.666667 \n", + "2016-08-01 20:00:00 NaN 3006.666667 \n", + "2016-08-01 21:00:00 NaN 3008.000000 \n", + "2016-08-01 22:00:00 NaN 3007.000000 \n", + "2016-08-01 23:00:00 NaN 3007.000000 \n", + "2016-08-02 00:00:00 NaN 3006.000000 \n", + "2016-08-02 01:00:00 NaN 3006.666667 \n", + "2016-08-02 02:00:00 NaN 3007.000000 \n", + "2016-08-02 03:00:00 NaN 3007.000000 \n", + "2016-08-02 04:00:00 NaN 3007.000000 \n", + "2016-08-02 05:00:00 NaN 3008.000000 \n", + "... ... ... \n", + "2018-07-30 18:00:00 NaN 2992.666667 \n", + "2018-07-30 19:00:00 NaN 2994.666667 \n", + "2018-07-30 20:00:00 NaN 2996.000000 \n", + "2018-07-30 21:00:00 NaN 2996.333333 \n", + "2018-07-30 22:00:00 NaN 2997.666667 \n", + "2018-07-30 23:00:00 NaN 2996.666667 \n", + "2018-07-31 00:00:00 NaN 2996.000000 \n", + "2018-07-31 01:00:00 NaN 2994.666667 \n", + "2018-07-31 02:00:00 NaN 2994.000000 \n", + "2018-07-31 03:00:00 NaN 2994.666667 \n", + "2018-07-31 04:00:00 NaN 2996.666667 \n", + "2018-07-31 05:00:00 NaN 2998.333333 \n", + "2018-07-31 06:00:00 NaN 2998.666667 \n", + "2018-07-31 07:00:00 NaN 2999.000000 \n", + "2018-07-31 08:00:00 NaN 2999.000000 \n", + "2018-07-31 09:00:00 NaN 2998.333333 \n", + "2018-07-31 10:00:00 NaN 2997.333333 \n", + "2018-07-31 11:00:00 NaN 2996.000000 \n", + "2018-07-31 12:00:00 NaN 2992.666667 \n", + "2018-07-31 13:00:00 NaN 2990.333333 \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 NaN 2999.666667 \n", + "\n", + "[17520 rows x 13 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_station_list[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 394/394 [02:04<00:00, 3.17it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(wban))):\n", + " by_station_list[i].to_csv('D:/Nico/Desktop/processed_data/{}.csv'.format(wban_list[i]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Current Issues\n", + "\n", + "1) data is not synced across time-zone and all entries start at midnight local time. \n", + "\n", + "2) some stations have multiple entries per hour and need to be reduced.\n", + "\n", + "Solutions\n", + "\n", + "Remove rows from data based on timezone to sync times\n", + "limit only 1 entry per hour for a station" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/preprocessing/preprocess_data.ipynb b/preprocessing/preprocess_data.ipynb new file mode 100644 index 0000000..0304a5a --- /dev/null +++ b/preprocessing/preprocess_data.ipynb @@ -0,0 +1,1898 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocess the raw data from NOAA\n", + "This notebook is setup to take in the CSV from NOAA and remove the unneccasary data. This will also seperate out each station for later positioning." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Getting a list of files in raw data folder\n", + "filenames = os.listdir('D:/Nico/Desktop/full_grid')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "header_wanted = [\n", + " 'HOURLYVISIBILITY',\n", + " 'HOURLYDRYBULBTEMPC',\n", + " 'HOURLYWETBULBTEMPC',\n", + " 'HOURLYDewPointTempC',\n", + " 'HOURLYRelativeHumidity',\n", + " 'HOURLYWindSpeed',\n", + " 'HOURLYWindGustSpeed',\n", + " 'HOURLYStationPressure',\n", + " 'HOURLYPressureTendency',\n", + " 'HOURLYPressureChange',\n", + " 'HOURLYSeaLevelPressure',\n", + " 'HOURLYPrecip',\n", + " 'HOURLYAltimeterSetting']" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "usecols = ['DATE','STATION'] + header_wanted" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████| 82/82 [03:05<00:00, 2.26s/it]\n" + ] + } + ], + "source": [ + "#Loading all files into a pandas Dataframe\n", + "tqdm.pandas()\n", + "df = pd.concat([pd.read_csv('D:/Nico/Desktop/full_grid/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "406" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Getting the station names\n", + "#wban = df['STATION'].unique()\n", + "stations = pd.read_csv(\"../Playground/stations_unique.csv\", usecols = ['STATION_ID'])\n", + "wban_list = stations['STATION_ID'].tolist()\n", + "len(wban_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_letters(headers,dataframes):\n", + " for i in tqdm(headers):\n", + " dataframes[i].replace(regex=True,inplace=True,to_replace=r'\\D',value=r'')\n", + " dataframes[i] = dataframes[i].apply(pd.to_numeric)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████| 13/13 [1:11:23<00:00, 329.48s/it]\n" + ] + } + ], + "source": [ + "remove_letters(header_wanted,df)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [10:47<00:00, 1.59s/it]\n" + ] + } + ], + "source": [ + "by_station_list = []\n", + "\n", + "for i in tqdm(wban_list):\n", + " by_station_list.append(df.loc[df.STATION == i])\n", + "del df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 46719 entries, 109127 to 155845\n", + "Data columns (total 15 columns):\n", + "STATION 46719 non-null object\n", + "DATE 46719 non-null object\n", + "HOURLYVISIBILITY 43438 non-null float64\n", + "HOURLYDRYBULBTEMPC 46196 non-null float64\n", + "HOURLYWETBULBTEMPC 44790 non-null float64\n", + "HOURLYDewPointTempC 46195 non-null float64\n", + "HOURLYRelativeHumidity 46195 non-null float64\n", + "HOURLYWindSpeed 44852 non-null float64\n", + "HOURLYWindGustSpeed 6657 non-null float64\n", + "HOURLYStationPressure 44796 non-null float64\n", + "HOURLYPressureTendency 0 non-null float64\n", + "HOURLYPressureChange 0 non-null float64\n", + "HOURLYSeaLevelPressure 0 non-null float64\n", + "HOURLYPrecip 2096 non-null float64\n", + "HOURLYAltimeterSetting 46197 non-null float64\n", + "dtypes: float64(13), object(2)\n", + "memory usage: 5.7+ MB\n" + ] + } + ], + "source": [ + "by_station_list[0].info()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [00:07<00:00, 54.83it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(by_station_list))):\n", + " by_station_list[i]['STATION_ID'] = by_station_list[i]['STATION']\n", + " by_station_list[i] = by_station_list[i].set_index(pd.DatetimeIndex(by_station_list[i]['DATE']))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "DatetimeIndex: 46719 entries, 2016-08-01 00:15:00 to 2018-07-31 23:59:00\n", + "Data columns (total 16 columns):\n", + "STATION 46719 non-null object\n", + "DATE 46719 non-null object\n", + "HOURLYVISIBILITY 43438 non-null float64\n", + "HOURLYDRYBULBTEMPC 46196 non-null float64\n", + "HOURLYWETBULBTEMPC 44790 non-null float64\n", + "HOURLYDewPointTempC 46195 non-null float64\n", + "HOURLYRelativeHumidity 46195 non-null float64\n", + "HOURLYWindSpeed 44852 non-null float64\n", + "HOURLYWindGustSpeed 6657 non-null float64\n", + "HOURLYStationPressure 44796 non-null float64\n", + "HOURLYPressureTendency 0 non-null float64\n", + "HOURLYPressureChange 0 non-null float64\n", + "HOURLYSeaLevelPressure 0 non-null float64\n", + "HOURLYPrecip 2096 non-null float64\n", + "HOURLYAltimeterSetting 46197 non-null float64\n", + "STATION_ID 46719 non-null object\n", + "dtypes: float64(13), object(3)\n", + "memory usage: 6.1+ MB\n" + ] + } + ], + "source": [ + "by_station_list[0].info()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [00:05<00:00, 73.63it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(by_station_list))):\n", + " by_station_list[i] = by_station_list[i].resample('60T').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "wban_name = []\n", + "for x in wban_list:\n", + " wban_name.append(x[:4] + x[5:])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'WBAN00184'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wban_name[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'WBAN:00184'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wban_list[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████| 406/406 [00:00<00:00, 675.55it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(by_station_list))):\n", + " by_station_list[i]['STATION'] = wban_list[i] " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HOURLYVISIBILITYHOURLYDRYBULBTEMPCHOURLYWETBULBTEMPCHOURLYDewPointTempCHOURLYRelativeHumidityHOURLYWindSpeedHOURLYWindGustSpeedHOURLYStationPressureHOURLYPressureTendencyHOURLYPressureChangeHOURLYSeaLevelPressureHOURLYPrecipHOURLYAltimeterSettingSTATION
DATE
2016-08-01 00:00:00525.000000243.00000024.400000243.000000100.0000000.000000NaN2999.000000NaNNaNNaNNaN3004.000000WBAN:00184
2016-08-01 01:00:00433.333333240.33333323.900000240.333333100.0000000.000000NaN2999.333333NaNNaNNaNNaN3004.333333WBAN:00184
2016-08-01 02:00:00316.666667237.33333323.500000237.333333100.0000000.000000NaN2998.666667NaNNaNNaNNaN3003.666667WBAN:00184
2016-08-01 03:00:0091.666667237.33333323.900000237.333333100.0000000.000000NaN2998.333333NaNNaNNaNNaN3003.333333WBAN:00184
2016-08-01 04:00:00733.333333235.66666723.500000235.666667100.0000000.000000NaN2999.333333NaNNaNNaNNaN3004.333333WBAN:00184
2016-08-01 05:00:00633.333333237.66666723.900000237.666667100.0000000.000000NaN3000.333333NaNNaNNaNNaN3005.333333WBAN:00184
2016-08-01 06:00:00900.000000249.00000025.000000249.000000100.0000000.000000NaN3001.666667NaNNaNNaNNaN3006.666667WBAN:00184
2016-08-01 07:00:001000.000000264.00000026.366667262.66666799.3333330.000000NaN3003.666667NaNNaNNaNNaN3008.666667WBAN:00184
2016-08-01 08:00:001000.000000283.00000027.133333267.33333391.0000001.666667NaN3004.000000NaNNaNNaNNaN3009.000000WBAN:00184
2016-08-01 09:00:001000.000000296.66666727.333333265.00000083.0000001.000000NaN3004.000000NaNNaNNaNNaN3009.000000WBAN:00184
2016-08-01 10:00:001000.000000301.66666726.833333255.66666776.3333330.000000NaN3005.666667NaNNaNNaNNaN3010.666667WBAN:00184
2016-08-01 11:00:001000.000000285.33333326.033333251.66666782.3333337.00000020.03005.666667NaNNaNNaNNaN3010.666667WBAN:00184
2016-08-01 12:00:001000.000000271.33333325.300000243.33333384.6666673.333333NaN3004.000000NaNNaNNaNNaN3009.000000WBAN:00184
2016-08-01 13:00:001000.000000281.66666726.133333253.33333384.6666670.000000NaN3002.666667NaNNaNNaNNaN3007.666667WBAN:00184
2016-08-01 14:00:00NaN305.33333326.466667248.66666771.6666671.000000NaN3000.333333NaNNaNNaNNaN3005.333333WBAN:00184
2016-08-01 15:00:00NaN314.66666726.933333250.66666769.0000000.000000NaN2999.000000NaNNaNNaNNaN3004.000000WBAN:00184
2016-08-01 16:00:00NaN309.00000026.433333246.66666769.3333330.000000NaN2998.000000NaNNaNNaNNaN3003.000000WBAN:00184
2016-08-01 17:00:001000.000000302.66666726.700000254.66666775.6666671.000000NaN2997.000000NaNNaNNaNNaN3002.000000WBAN:00184
2016-08-01 18:00:001000.000000290.00000026.933333260.00000084.0000000.000000NaN2998.333333NaNNaNNaNNaN3003.333333WBAN:00184
2016-08-01 19:00:001000.000000271.33333326.500000261.33333394.3333332.666667NaN2999.000000NaNNaNNaNNaN3004.000000WBAN:00184
2016-08-01 20:00:001000.000000265.33333326.000000258.33333396.0000000.000000NaN3000.333333NaNNaNNaNNaN3005.333333WBAN:00184
2016-08-01 21:00:001000.000000256.66666725.766667256.00000099.6666670.000000NaN3002.666667NaNNaNNaNNaN3007.666667WBAN:00184
2016-08-01 22:00:001000.000000253.66666725.400000253.666667100.0000000.000000NaN3003.333333NaNNaNNaNNaN3008.333333WBAN:00184
2016-08-01 23:00:001000.000000251.66666725.200000251.666667100.0000000.000000NaN3002.666667NaNNaNNaNNaN3007.666667WBAN:00184
2016-08-02 00:00:001000.000000248.00000024.600000248.000000100.0000000.000000NaN3001.333333NaNNaNNaNNaN3006.333333WBAN:00184
2016-08-02 01:00:001000.000000246.66666724.400000246.666667100.0000000.000000NaN3001.333333NaNNaNNaNNaN3006.333333WBAN:00184
2016-08-02 02:00:00483.333333242.33333324.233333242.333333100.0000000.000000NaN3002.000000NaNNaNNaNNaN3007.000000WBAN:00184
2016-08-02 03:00:00208.333333242.00000024.233333242.000000100.0000000.000000NaN3002.000000NaNNaNNaNNaN3007.000000WBAN:00184
2016-08-02 04:00:00800.000000241.66666724.233333241.666667100.0000000.000000NaN3002.000000NaNNaNNaNNaN3007.000000WBAN:00184
2016-08-02 05:00:00633.333333240.66666723.900000240.666667100.0000000.000000NaN3003.666667NaNNaNNaNNaN3008.666667WBAN:00184
.............................................
2018-07-30 18:00:001000.000000241.33333324.066667241.333333100.0000001.500000NaN2990.000000NaNNaNNaNNaN2995.000000WBAN:00184
2018-07-30 19:00:001000.000000238.00000023.900000238.000000100.0000000.000000NaN2989.666667NaNNaNNaNNaN2994.666667WBAN:00184
2018-07-30 20:00:001000.000000235.33333323.300000235.333333100.0000001.000000NaN2992.333333NaNNaNNaNNaN2997.333333WBAN:00184
2018-07-30 21:00:001000.000000236.00000023.500000236.000000100.0000001.000000NaN2994.000000NaNNaNNaN1.02999.000000WBAN:00184
2018-07-30 22:00:001000.000000241.66666724.066667241.666667100.0000001.666667NaN2994.333333NaNNaNNaNNaN2999.333333WBAN:00184
2018-07-30 23:00:001000.000000238.33333323.700000238.333333100.0000000.000000NaN2993.666667NaNNaNNaNNaN2998.666667WBAN:00184
2018-07-31 00:00:001000.000000236.00000023.500000236.000000100.0000000.000000NaN2992.666667NaNNaNNaNNaN2997.666667WBAN:00184
2018-07-31 01:00:001000.000000235.33333323.500000235.333333100.0000001.500000NaN2992.000000NaNNaNNaNNaN2997.000000WBAN:00184
2018-07-31 02:00:001000.000000239.33333324.066667239.333333100.000000NaNNaN2991.666667NaNNaNNaNNaN2996.666667WBAN:00184
2018-07-31 03:00:00700.000000234.00000023.300000234.000000100.0000000.000000NaN2991.333333NaNNaNNaNNaN2996.333333WBAN:00184
2018-07-31 04:00:00700.000000233.33333323.300000233.333333100.0000002.000000NaN2992.000000NaNNaNNaNNaN2997.000000WBAN:00184
2018-07-31 05:00:00800.000000235.00000023.500000235.000000100.0000000.000000NaN2993.333333NaNNaNNaNNaN2998.333333WBAN:00184
2018-07-31 06:00:00900.000000245.33333324.600000245.333333100.0000000.000000NaN2993.666667NaNNaNNaNNaN2998.666667WBAN:00184
2018-07-31 07:00:001000.000000263.00000026.500000263.000000100.0000000.000000NaN2994.000000NaNNaNNaNNaN2999.000000WBAN:00184
2018-07-31 08:00:00725.000000250.33333324.866667248.00000098.6666676.00000016.02995.666667NaNNaNNaNNaN3000.666667WBAN:00184
2018-07-31 09:00:001000.000000263.66666724.733333242.66666788.3333336.500000NaN2994.333333NaNNaNNaNNaN2999.333333WBAN:00184
2018-07-31 10:00:001000.000000265.33333325.366667247.33333390.0000004.666667NaN2994.666667NaNNaNNaNNaN2999.666667WBAN:00184
2018-07-31 11:00:001000.000000286.00000025.666667245.33333378.6666675.000000NaN2994.666667NaNNaNNaNNaN2999.666667WBAN:00184
2018-07-31 12:00:001000.000000293.33333325.900000245.33333375.3333338.33333331.52992.666667NaNNaNNaNNaN2997.666667WBAN:00184
2018-07-31 13:00:001000.000000283.00000025.050000235.33333375.33333311.50000023.52992.500000NaNNaNNaNNaN2997.333333WBAN:00184
2018-07-31 14:00:00NaN277.333333NaN229.66666775.3333333.000000NaNNaNNaNNaNNaNNaN2995.333333WBAN:00184
2018-07-31 15:00:00NaN272.666667NaN234.00000079.6666671.500000NaNNaNNaNNaNNaNNaN2995.333333WBAN:00184
2018-07-31 16:00:00NaN274.666667NaN237.00000080.0000001.000000NaNNaNNaNNaNNaNNaN2995.333333WBAN:00184
2018-07-31 17:00:00NaN271.000000NaN237.66666782.0000001.000000NaNNaNNaNNaNNaNNaN2994.000000WBAN:00184
2018-07-31 18:00:00NaN259.333333NaN250.00000094.6666670.000000NaNNaNNaNNaNNaNNaN2994.666667WBAN:00184
2018-07-31 19:00:00NaN254.000000NaN253.33333399.6666670.000000NaNNaNNaNNaNNaNNaN2995.333333WBAN:00184
2018-07-31 20:00:00NaN247.000000NaN247.000000100.0000000.000000NaNNaNNaNNaNNaNNaN2995.333333WBAN:00184
2018-07-31 21:00:00NaN238.333333NaN238.333333100.0000000.000000NaNNaNNaNNaNNaNNaN2996.666667WBAN:00184
2018-07-31 22:00:00700.000000234.66666723.300000234.666667100.0000000.000000NaN2993.000000NaNNaNNaNNaN2997.000000WBAN:00184
2018-07-31 23:00:00800.000000232.66666723.300000232.666667100.0000000.000000NaN2993.000000NaNNaNNaNNaN2998.000000WBAN:00184
\n", + "

17520 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " HOURLYVISIBILITY HOURLYDRYBULBTEMPC HOURLYWETBULBTEMPC \\\n", + "DATE \n", + "2016-08-01 00:00:00 525.000000 243.000000 24.400000 \n", + "2016-08-01 01:00:00 433.333333 240.333333 23.900000 \n", + "2016-08-01 02:00:00 316.666667 237.333333 23.500000 \n", + "2016-08-01 03:00:00 91.666667 237.333333 23.900000 \n", + "2016-08-01 04:00:00 733.333333 235.666667 23.500000 \n", + "2016-08-01 05:00:00 633.333333 237.666667 23.900000 \n", + "2016-08-01 06:00:00 900.000000 249.000000 25.000000 \n", + "2016-08-01 07:00:00 1000.000000 264.000000 26.366667 \n", + "2016-08-01 08:00:00 1000.000000 283.000000 27.133333 \n", + "2016-08-01 09:00:00 1000.000000 296.666667 27.333333 \n", + "2016-08-01 10:00:00 1000.000000 301.666667 26.833333 \n", + "2016-08-01 11:00:00 1000.000000 285.333333 26.033333 \n", + "2016-08-01 12:00:00 1000.000000 271.333333 25.300000 \n", + "2016-08-01 13:00:00 1000.000000 281.666667 26.133333 \n", + "2016-08-01 14:00:00 NaN 305.333333 26.466667 \n", + "2016-08-01 15:00:00 NaN 314.666667 26.933333 \n", + "2016-08-01 16:00:00 NaN 309.000000 26.433333 \n", + "2016-08-01 17:00:00 1000.000000 302.666667 26.700000 \n", + "2016-08-01 18:00:00 1000.000000 290.000000 26.933333 \n", + "2016-08-01 19:00:00 1000.000000 271.333333 26.500000 \n", + "2016-08-01 20:00:00 1000.000000 265.333333 26.000000 \n", + "2016-08-01 21:00:00 1000.000000 256.666667 25.766667 \n", + "2016-08-01 22:00:00 1000.000000 253.666667 25.400000 \n", + "2016-08-01 23:00:00 1000.000000 251.666667 25.200000 \n", + "2016-08-02 00:00:00 1000.000000 248.000000 24.600000 \n", + "2016-08-02 01:00:00 1000.000000 246.666667 24.400000 \n", + "2016-08-02 02:00:00 483.333333 242.333333 24.233333 \n", + "2016-08-02 03:00:00 208.333333 242.000000 24.233333 \n", + "2016-08-02 04:00:00 800.000000 241.666667 24.233333 \n", + "2016-08-02 05:00:00 633.333333 240.666667 23.900000 \n", + "... ... ... ... \n", + "2018-07-30 18:00:00 1000.000000 241.333333 24.066667 \n", + "2018-07-30 19:00:00 1000.000000 238.000000 23.900000 \n", + "2018-07-30 20:00:00 1000.000000 235.333333 23.300000 \n", + "2018-07-30 21:00:00 1000.000000 236.000000 23.500000 \n", + "2018-07-30 22:00:00 1000.000000 241.666667 24.066667 \n", + "2018-07-30 23:00:00 1000.000000 238.333333 23.700000 \n", + "2018-07-31 00:00:00 1000.000000 236.000000 23.500000 \n", + "2018-07-31 01:00:00 1000.000000 235.333333 23.500000 \n", + "2018-07-31 02:00:00 1000.000000 239.333333 24.066667 \n", + "2018-07-31 03:00:00 700.000000 234.000000 23.300000 \n", + "2018-07-31 04:00:00 700.000000 233.333333 23.300000 \n", + "2018-07-31 05:00:00 800.000000 235.000000 23.500000 \n", + "2018-07-31 06:00:00 900.000000 245.333333 24.600000 \n", + "2018-07-31 07:00:00 1000.000000 263.000000 26.500000 \n", + "2018-07-31 08:00:00 725.000000 250.333333 24.866667 \n", + "2018-07-31 09:00:00 1000.000000 263.666667 24.733333 \n", + "2018-07-31 10:00:00 1000.000000 265.333333 25.366667 \n", + "2018-07-31 11:00:00 1000.000000 286.000000 25.666667 \n", + "2018-07-31 12:00:00 1000.000000 293.333333 25.900000 \n", + "2018-07-31 13:00:00 1000.000000 283.000000 25.050000 \n", + "2018-07-31 14:00:00 NaN 277.333333 NaN \n", + "2018-07-31 15:00:00 NaN 272.666667 NaN \n", + "2018-07-31 16:00:00 NaN 274.666667 NaN \n", + "2018-07-31 17:00:00 NaN 271.000000 NaN \n", + "2018-07-31 18:00:00 NaN 259.333333 NaN \n", + "2018-07-31 19:00:00 NaN 254.000000 NaN \n", + "2018-07-31 20:00:00 NaN 247.000000 NaN \n", + "2018-07-31 21:00:00 NaN 238.333333 NaN \n", + "2018-07-31 22:00:00 700.000000 234.666667 23.300000 \n", + "2018-07-31 23:00:00 800.000000 232.666667 23.300000 \n", + "\n", + " HOURLYDewPointTempC HOURLYRelativeHumidity \\\n", + "DATE \n", + "2016-08-01 00:00:00 243.000000 100.000000 \n", + "2016-08-01 01:00:00 240.333333 100.000000 \n", + "2016-08-01 02:00:00 237.333333 100.000000 \n", + "2016-08-01 03:00:00 237.333333 100.000000 \n", + "2016-08-01 04:00:00 235.666667 100.000000 \n", + "2016-08-01 05:00:00 237.666667 100.000000 \n", + "2016-08-01 06:00:00 249.000000 100.000000 \n", + "2016-08-01 07:00:00 262.666667 99.333333 \n", + "2016-08-01 08:00:00 267.333333 91.000000 \n", + "2016-08-01 09:00:00 265.000000 83.000000 \n", + "2016-08-01 10:00:00 255.666667 76.333333 \n", + "2016-08-01 11:00:00 251.666667 82.333333 \n", + "2016-08-01 12:00:00 243.333333 84.666667 \n", + "2016-08-01 13:00:00 253.333333 84.666667 \n", + "2016-08-01 14:00:00 248.666667 71.666667 \n", + "2016-08-01 15:00:00 250.666667 69.000000 \n", + "2016-08-01 16:00:00 246.666667 69.333333 \n", + "2016-08-01 17:00:00 254.666667 75.666667 \n", + "2016-08-01 18:00:00 260.000000 84.000000 \n", + "2016-08-01 19:00:00 261.333333 94.333333 \n", + "2016-08-01 20:00:00 258.333333 96.000000 \n", + "2016-08-01 21:00:00 256.000000 99.666667 \n", + "2016-08-01 22:00:00 253.666667 100.000000 \n", + "2016-08-01 23:00:00 251.666667 100.000000 \n", + "2016-08-02 00:00:00 248.000000 100.000000 \n", + "2016-08-02 01:00:00 246.666667 100.000000 \n", + "2016-08-02 02:00:00 242.333333 100.000000 \n", + "2016-08-02 03:00:00 242.000000 100.000000 \n", + "2016-08-02 04:00:00 241.666667 100.000000 \n", + "2016-08-02 05:00:00 240.666667 100.000000 \n", + "... ... ... \n", + "2018-07-30 18:00:00 241.333333 100.000000 \n", + "2018-07-30 19:00:00 238.000000 100.000000 \n", + "2018-07-30 20:00:00 235.333333 100.000000 \n", + "2018-07-30 21:00:00 236.000000 100.000000 \n", + "2018-07-30 22:00:00 241.666667 100.000000 \n", + "2018-07-30 23:00:00 238.333333 100.000000 \n", + "2018-07-31 00:00:00 236.000000 100.000000 \n", + "2018-07-31 01:00:00 235.333333 100.000000 \n", + "2018-07-31 02:00:00 239.333333 100.000000 \n", + "2018-07-31 03:00:00 234.000000 100.000000 \n", + "2018-07-31 04:00:00 233.333333 100.000000 \n", + "2018-07-31 05:00:00 235.000000 100.000000 \n", + "2018-07-31 06:00:00 245.333333 100.000000 \n", + "2018-07-31 07:00:00 263.000000 100.000000 \n", + "2018-07-31 08:00:00 248.000000 98.666667 \n", + "2018-07-31 09:00:00 242.666667 88.333333 \n", + "2018-07-31 10:00:00 247.333333 90.000000 \n", + "2018-07-31 11:00:00 245.333333 78.666667 \n", + "2018-07-31 12:00:00 245.333333 75.333333 \n", + "2018-07-31 13:00:00 235.333333 75.333333 \n", + "2018-07-31 14:00:00 229.666667 75.333333 \n", + "2018-07-31 15:00:00 234.000000 79.666667 \n", + "2018-07-31 16:00:00 237.000000 80.000000 \n", + "2018-07-31 17:00:00 237.666667 82.000000 \n", + "2018-07-31 18:00:00 250.000000 94.666667 \n", + "2018-07-31 19:00:00 253.333333 99.666667 \n", + "2018-07-31 20:00:00 247.000000 100.000000 \n", + "2018-07-31 21:00:00 238.333333 100.000000 \n", + "2018-07-31 22:00:00 234.666667 100.000000 \n", + "2018-07-31 23:00:00 232.666667 100.000000 \n", + "\n", + " HOURLYWindSpeed HOURLYWindGustSpeed \\\n", + "DATE \n", + "2016-08-01 00:00:00 0.000000 NaN \n", + "2016-08-01 01:00:00 0.000000 NaN \n", + "2016-08-01 02:00:00 0.000000 NaN \n", + "2016-08-01 03:00:00 0.000000 NaN \n", + "2016-08-01 04:00:00 0.000000 NaN \n", + "2016-08-01 05:00:00 0.000000 NaN \n", + "2016-08-01 06:00:00 0.000000 NaN \n", + "2016-08-01 07:00:00 0.000000 NaN \n", + "2016-08-01 08:00:00 1.666667 NaN \n", + "2016-08-01 09:00:00 1.000000 NaN \n", + "2016-08-01 10:00:00 0.000000 NaN \n", + "2016-08-01 11:00:00 7.000000 20.0 \n", + "2016-08-01 12:00:00 3.333333 NaN \n", + "2016-08-01 13:00:00 0.000000 NaN \n", + "2016-08-01 14:00:00 1.000000 NaN \n", + "2016-08-01 15:00:00 0.000000 NaN \n", + "2016-08-01 16:00:00 0.000000 NaN \n", + "2016-08-01 17:00:00 1.000000 NaN \n", + "2016-08-01 18:00:00 0.000000 NaN \n", + "2016-08-01 19:00:00 2.666667 NaN \n", + "2016-08-01 20:00:00 0.000000 NaN \n", + "2016-08-01 21:00:00 0.000000 NaN \n", + "2016-08-01 22:00:00 0.000000 NaN \n", + "2016-08-01 23:00:00 0.000000 NaN \n", + "2016-08-02 00:00:00 0.000000 NaN \n", + "2016-08-02 01:00:00 0.000000 NaN \n", + "2016-08-02 02:00:00 0.000000 NaN \n", + "2016-08-02 03:00:00 0.000000 NaN \n", + "2016-08-02 04:00:00 0.000000 NaN \n", + "2016-08-02 05:00:00 0.000000 NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 1.500000 NaN \n", + "2018-07-30 19:00:00 0.000000 NaN \n", + "2018-07-30 20:00:00 1.000000 NaN \n", + "2018-07-30 21:00:00 1.000000 NaN \n", + "2018-07-30 22:00:00 1.666667 NaN \n", + "2018-07-30 23:00:00 0.000000 NaN \n", + "2018-07-31 00:00:00 0.000000 NaN \n", + "2018-07-31 01:00:00 1.500000 NaN \n", + "2018-07-31 02:00:00 NaN NaN \n", + "2018-07-31 03:00:00 0.000000 NaN \n", + "2018-07-31 04:00:00 2.000000 NaN \n", + "2018-07-31 05:00:00 0.000000 NaN \n", + "2018-07-31 06:00:00 0.000000 NaN \n", + "2018-07-31 07:00:00 0.000000 NaN \n", + "2018-07-31 08:00:00 6.000000 16.0 \n", + "2018-07-31 09:00:00 6.500000 NaN \n", + "2018-07-31 10:00:00 4.666667 NaN \n", + "2018-07-31 11:00:00 5.000000 NaN \n", + "2018-07-31 12:00:00 8.333333 31.5 \n", + "2018-07-31 13:00:00 11.500000 23.5 \n", + "2018-07-31 14:00:00 3.000000 NaN \n", + "2018-07-31 15:00:00 1.500000 NaN \n", + "2018-07-31 16:00:00 1.000000 NaN \n", + "2018-07-31 17:00:00 1.000000 NaN \n", + "2018-07-31 18:00:00 0.000000 NaN \n", + "2018-07-31 19:00:00 0.000000 NaN \n", + "2018-07-31 20:00:00 0.000000 NaN \n", + "2018-07-31 21:00:00 0.000000 NaN \n", + "2018-07-31 22:00:00 0.000000 NaN \n", + "2018-07-31 23:00:00 0.000000 NaN \n", + "\n", + " HOURLYStationPressure HOURLYPressureTendency \\\n", + "DATE \n", + "2016-08-01 00:00:00 2999.000000 NaN \n", + "2016-08-01 01:00:00 2999.333333 NaN \n", + "2016-08-01 02:00:00 2998.666667 NaN \n", + "2016-08-01 03:00:00 2998.333333 NaN \n", + "2016-08-01 04:00:00 2999.333333 NaN \n", + "2016-08-01 05:00:00 3000.333333 NaN \n", + "2016-08-01 06:00:00 3001.666667 NaN \n", + "2016-08-01 07:00:00 3003.666667 NaN \n", + "2016-08-01 08:00:00 3004.000000 NaN \n", + "2016-08-01 09:00:00 3004.000000 NaN \n", + "2016-08-01 10:00:00 3005.666667 NaN \n", + "2016-08-01 11:00:00 3005.666667 NaN \n", + "2016-08-01 12:00:00 3004.000000 NaN \n", + "2016-08-01 13:00:00 3002.666667 NaN \n", + "2016-08-01 14:00:00 3000.333333 NaN \n", + "2016-08-01 15:00:00 2999.000000 NaN \n", + "2016-08-01 16:00:00 2998.000000 NaN \n", + "2016-08-01 17:00:00 2997.000000 NaN \n", + "2016-08-01 18:00:00 2998.333333 NaN \n", + "2016-08-01 19:00:00 2999.000000 NaN \n", + "2016-08-01 20:00:00 3000.333333 NaN \n", + "2016-08-01 21:00:00 3002.666667 NaN \n", + "2016-08-01 22:00:00 3003.333333 NaN \n", + "2016-08-01 23:00:00 3002.666667 NaN \n", + "2016-08-02 00:00:00 3001.333333 NaN \n", + "2016-08-02 01:00:00 3001.333333 NaN \n", + "2016-08-02 02:00:00 3002.000000 NaN \n", + "2016-08-02 03:00:00 3002.000000 NaN \n", + "2016-08-02 04:00:00 3002.000000 NaN \n", + "2016-08-02 05:00:00 3003.666667 NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 2990.000000 NaN \n", + "2018-07-30 19:00:00 2989.666667 NaN \n", + "2018-07-30 20:00:00 2992.333333 NaN \n", + "2018-07-30 21:00:00 2994.000000 NaN \n", + "2018-07-30 22:00:00 2994.333333 NaN \n", + "2018-07-30 23:00:00 2993.666667 NaN \n", + "2018-07-31 00:00:00 2992.666667 NaN \n", + "2018-07-31 01:00:00 2992.000000 NaN \n", + "2018-07-31 02:00:00 2991.666667 NaN \n", + "2018-07-31 03:00:00 2991.333333 NaN \n", + "2018-07-31 04:00:00 2992.000000 NaN \n", + "2018-07-31 05:00:00 2993.333333 NaN \n", + "2018-07-31 06:00:00 2993.666667 NaN \n", + "2018-07-31 07:00:00 2994.000000 NaN \n", + "2018-07-31 08:00:00 2995.666667 NaN \n", + "2018-07-31 09:00:00 2994.333333 NaN \n", + "2018-07-31 10:00:00 2994.666667 NaN \n", + "2018-07-31 11:00:00 2994.666667 NaN \n", + "2018-07-31 12:00:00 2992.666667 NaN \n", + "2018-07-31 13:00:00 2992.500000 NaN \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 2993.000000 NaN \n", + "2018-07-31 23:00:00 2993.000000 NaN \n", + "\n", + " HOURLYPressureChange HOURLYSeaLevelPressure \\\n", + "DATE \n", + "2016-08-01 00:00:00 NaN NaN \n", + "2016-08-01 01:00:00 NaN NaN \n", + "2016-08-01 02:00:00 NaN NaN \n", + "2016-08-01 03:00:00 NaN NaN \n", + "2016-08-01 04:00:00 NaN NaN \n", + "2016-08-01 05:00:00 NaN NaN \n", + "2016-08-01 06:00:00 NaN NaN \n", + "2016-08-01 07:00:00 NaN NaN \n", + "2016-08-01 08:00:00 NaN NaN \n", + "2016-08-01 09:00:00 NaN NaN \n", + "2016-08-01 10:00:00 NaN NaN \n", + "2016-08-01 11:00:00 NaN NaN \n", + "2016-08-01 12:00:00 NaN NaN \n", + "2016-08-01 13:00:00 NaN NaN \n", + "2016-08-01 14:00:00 NaN NaN \n", + "2016-08-01 15:00:00 NaN NaN \n", + "2016-08-01 16:00:00 NaN NaN \n", + "2016-08-01 17:00:00 NaN NaN \n", + "2016-08-01 18:00:00 NaN NaN \n", + "2016-08-01 19:00:00 NaN NaN \n", + "2016-08-01 20:00:00 NaN NaN \n", + "2016-08-01 21:00:00 NaN NaN \n", + "2016-08-01 22:00:00 NaN NaN \n", + "2016-08-01 23:00:00 NaN NaN \n", + "2016-08-02 00:00:00 NaN NaN \n", + "2016-08-02 01:00:00 NaN NaN \n", + "2016-08-02 02:00:00 NaN NaN \n", + "2016-08-02 03:00:00 NaN NaN \n", + "2016-08-02 04:00:00 NaN NaN \n", + "2016-08-02 05:00:00 NaN NaN \n", + "... ... ... \n", + "2018-07-30 18:00:00 NaN NaN \n", + "2018-07-30 19:00:00 NaN NaN \n", + "2018-07-30 20:00:00 NaN NaN \n", + "2018-07-30 21:00:00 NaN NaN \n", + "2018-07-30 22:00:00 NaN NaN \n", + "2018-07-30 23:00:00 NaN NaN \n", + "2018-07-31 00:00:00 NaN NaN \n", + "2018-07-31 01:00:00 NaN NaN \n", + "2018-07-31 02:00:00 NaN NaN \n", + "2018-07-31 03:00:00 NaN NaN \n", + "2018-07-31 04:00:00 NaN NaN \n", + "2018-07-31 05:00:00 NaN NaN \n", + "2018-07-31 06:00:00 NaN NaN \n", + "2018-07-31 07:00:00 NaN NaN \n", + "2018-07-31 08:00:00 NaN NaN \n", + "2018-07-31 09:00:00 NaN NaN \n", + "2018-07-31 10:00:00 NaN NaN \n", + "2018-07-31 11:00:00 NaN NaN \n", + "2018-07-31 12:00:00 NaN NaN \n", + "2018-07-31 13:00:00 NaN NaN \n", + "2018-07-31 14:00:00 NaN NaN \n", + "2018-07-31 15:00:00 NaN NaN \n", + "2018-07-31 16:00:00 NaN NaN \n", + "2018-07-31 17:00:00 NaN NaN \n", + "2018-07-31 18:00:00 NaN NaN \n", + "2018-07-31 19:00:00 NaN NaN \n", + "2018-07-31 20:00:00 NaN NaN \n", + "2018-07-31 21:00:00 NaN NaN \n", + "2018-07-31 22:00:00 NaN NaN \n", + "2018-07-31 23:00:00 NaN NaN \n", + "\n", + " HOURLYPrecip HOURLYAltimeterSetting STATION \n", + "DATE \n", + "2016-08-01 00:00:00 NaN 3004.000000 WBAN:00184 \n", + "2016-08-01 01:00:00 NaN 3004.333333 WBAN:00184 \n", + "2016-08-01 02:00:00 NaN 3003.666667 WBAN:00184 \n", + "2016-08-01 03:00:00 NaN 3003.333333 WBAN:00184 \n", + "2016-08-01 04:00:00 NaN 3004.333333 WBAN:00184 \n", + "2016-08-01 05:00:00 NaN 3005.333333 WBAN:00184 \n", + "2016-08-01 06:00:00 NaN 3006.666667 WBAN:00184 \n", + "2016-08-01 07:00:00 NaN 3008.666667 WBAN:00184 \n", + "2016-08-01 08:00:00 NaN 3009.000000 WBAN:00184 \n", + "2016-08-01 09:00:00 NaN 3009.000000 WBAN:00184 \n", + "2016-08-01 10:00:00 NaN 3010.666667 WBAN:00184 \n", + "2016-08-01 11:00:00 NaN 3010.666667 WBAN:00184 \n", + "2016-08-01 12:00:00 NaN 3009.000000 WBAN:00184 \n", + "2016-08-01 13:00:00 NaN 3007.666667 WBAN:00184 \n", + "2016-08-01 14:00:00 NaN 3005.333333 WBAN:00184 \n", + "2016-08-01 15:00:00 NaN 3004.000000 WBAN:00184 \n", + "2016-08-01 16:00:00 NaN 3003.000000 WBAN:00184 \n", + "2016-08-01 17:00:00 NaN 3002.000000 WBAN:00184 \n", + "2016-08-01 18:00:00 NaN 3003.333333 WBAN:00184 \n", + "2016-08-01 19:00:00 NaN 3004.000000 WBAN:00184 \n", + "2016-08-01 20:00:00 NaN 3005.333333 WBAN:00184 \n", + "2016-08-01 21:00:00 NaN 3007.666667 WBAN:00184 \n", + "2016-08-01 22:00:00 NaN 3008.333333 WBAN:00184 \n", + "2016-08-01 23:00:00 NaN 3007.666667 WBAN:00184 \n", + "2016-08-02 00:00:00 NaN 3006.333333 WBAN:00184 \n", + "2016-08-02 01:00:00 NaN 3006.333333 WBAN:00184 \n", + "2016-08-02 02:00:00 NaN 3007.000000 WBAN:00184 \n", + "2016-08-02 03:00:00 NaN 3007.000000 WBAN:00184 \n", + "2016-08-02 04:00:00 NaN 3007.000000 WBAN:00184 \n", + "2016-08-02 05:00:00 NaN 3008.666667 WBAN:00184 \n", + "... ... ... ... \n", + "2018-07-30 18:00:00 NaN 2995.000000 WBAN:00184 \n", + "2018-07-30 19:00:00 NaN 2994.666667 WBAN:00184 \n", + "2018-07-30 20:00:00 NaN 2997.333333 WBAN:00184 \n", + "2018-07-30 21:00:00 1.0 2999.000000 WBAN:00184 \n", + "2018-07-30 22:00:00 NaN 2999.333333 WBAN:00184 \n", + "2018-07-30 23:00:00 NaN 2998.666667 WBAN:00184 \n", + "2018-07-31 00:00:00 NaN 2997.666667 WBAN:00184 \n", + "2018-07-31 01:00:00 NaN 2997.000000 WBAN:00184 \n", + "2018-07-31 02:00:00 NaN 2996.666667 WBAN:00184 \n", + "2018-07-31 03:00:00 NaN 2996.333333 WBAN:00184 \n", + "2018-07-31 04:00:00 NaN 2997.000000 WBAN:00184 \n", + "2018-07-31 05:00:00 NaN 2998.333333 WBAN:00184 \n", + "2018-07-31 06:00:00 NaN 2998.666667 WBAN:00184 \n", + "2018-07-31 07:00:00 NaN 2999.000000 WBAN:00184 \n", + "2018-07-31 08:00:00 NaN 3000.666667 WBAN:00184 \n", + "2018-07-31 09:00:00 NaN 2999.333333 WBAN:00184 \n", + "2018-07-31 10:00:00 NaN 2999.666667 WBAN:00184 \n", + "2018-07-31 11:00:00 NaN 2999.666667 WBAN:00184 \n", + "2018-07-31 12:00:00 NaN 2997.666667 WBAN:00184 \n", + "2018-07-31 13:00:00 NaN 2997.333333 WBAN:00184 \n", + "2018-07-31 14:00:00 NaN 2995.333333 WBAN:00184 \n", + "2018-07-31 15:00:00 NaN 2995.333333 WBAN:00184 \n", + "2018-07-31 16:00:00 NaN 2995.333333 WBAN:00184 \n", + "2018-07-31 17:00:00 NaN 2994.000000 WBAN:00184 \n", + "2018-07-31 18:00:00 NaN 2994.666667 WBAN:00184 \n", + "2018-07-31 19:00:00 NaN 2995.333333 WBAN:00184 \n", + "2018-07-31 20:00:00 NaN 2995.333333 WBAN:00184 \n", + "2018-07-31 21:00:00 NaN 2996.666667 WBAN:00184 \n", + "2018-07-31 22:00:00 NaN 2997.000000 WBAN:00184 \n", + "2018-07-31 23:00:00 NaN 2998.000000 WBAN:00184 \n", + "\n", + "[17520 rows x 14 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_station_list[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [02:11<00:00, 3.10it/s]\n" + ] + } + ], + "source": [ + "for i in tqdm(range(len(wban_name))):\n", + " by_station_list[i].to_csv('D:/Nico/Desktop/processed_data/{}.csv'.format(wban_name[i]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Current Issues\n", + "\n", + "1) data is not synced across time-zone and all entries start at midnight local time. \n", + "\n", + "2) some stations have multiple entries per hour and need to be reduced.\n", + "\n", + "Solutions\n", + "\n", + "Remove rows from data based on timezone to sync times\n", + "limit only 1 entry per hour for a station" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}