Initial commit

2018-09-17 17:16:39 -05:00
parent 9ac35413de
commit 953507a22a
12 changed files with 5185 additions and 0 deletions
--- a/Final/.ipynb_checkpoints/Project-checkpoint.ipynb
+++ b/Final/.ipynb_checkpoints/Project-checkpoint.ipynb
@@ -0,0 +1,532 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Project Notebook\n",
+    "This is the full and complete notebook that takes in the data from NOAA and processes it into frames to be used in the PredNet architecture and produce a resulting prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Getting a list of files in raw data folder\n",
+    "filenames = os.listdir('D:/Nico/Desktop/processed_data')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "header_wanted = [\n",
+    " 'HOURLYVISIBILITY',\n",
+    " 'HOURLYDRYBULBTEMPC',\n",
+    " 'HOURLYWETBULBTEMPC',\n",
+    " 'HOURLYDewPointTempC',\n",
+    " 'HOURLYRelativeHumidity',\n",
+    " 'HOURLYWindSpeed',\n",
+    " 'HOURLYWindGustSpeed',\n",
+    " 'HOURLYStationPressure',\n",
+    " 'HOURLYPressureTendency',\n",
+    " 'HOURLYPressureChange',\n",
+    " 'HOURLYSeaLevelPressure',\n",
+    " 'HOURLYPrecip',\n",
+    " 'HOURLYAltimeterSetting']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "usecols = ['DATE','STATION'] + header_wanted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Loading all files into a pandas Dataframe\n",
+    "tqdm.pandas()\n",
+    "df = pd.concat([pd.read_csv('D:/Nico/Desktop/processed_data/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point all the data has been loaded into a single dataframe and any data changes have been made. The next step is to break the data up by WBAN and place in a 2D array at the appropriate grid cell. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stations = pd.read_csv(\"../Playground/stations_unique.csv\", usecols = ['STATION_ID', 'LON_SCALED', 'LAT_SCALED'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "height = 20\n",
+    "width = 40"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mask = [([0] * width) for i in range(height)]\n",
+    "\n",
+    "wban_loc = dict(zip(stations.STATION_ID,zip(stations.LON_SCALED,stations.LAT_SCALED)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid = [([pd.DataFrame()] * width) for i in range(height)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for key, value in tqdm(wban_loc.items()):\n",
+    "    mask[value[1]][value[0]] = 1\n",
+    "    grid[value[1]][value[0]] = df.loc[df.STATION == key]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TODO Handle different sized data some stacks too short\n",
+    "def create_frames(data,height, width, depth):\n",
+    "    days = []\n",
+    "    frames = []\n",
+    "    for i in tqdm(range(depth)):\n",
+    "        frame = np.zeros((height,width,12))\n",
+    "        for y in range(height):\n",
+    "            for x in range(width):\n",
+    "                if(not data[y][x].empty):\n",
+    "                    frame[y][x] = data[y][x].iloc[[i],1:13].values.flatten()\n",
+    "        if((i+1)%24 != 0):\n",
+    "            frames.append(frame)\n",
+    "        else:\n",
+    "            frames.append(frame)\n",
+    "            days.append(frames)\n",
+    "            frames = []\n",
+    "    return days"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def average_grid_fill(mask,data, height, width):\n",
+    "        \n",
+    "    for i in range(height):\n",
+    "        for j in range(width):\n",
+    "            if(mask[i][j] != 1):\n",
+    "                neighbors = get_neighbors(j,i,data)\n",
+    "                data[i][j] = np.mean(neighbors)\n",
+    "            \n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_neighbors(x,y,g):\n",
+    "    neighbors = []\n",
+    "    for i in [y-1,y,y+1]:\n",
+    "        for j in [x-1,x,x+1]:\n",
+    "            if(i >= 0 and j >= 0):\n",
+    "                if(i != y or j != x ):\n",
+    "                    try:\n",
+    "                        neighbors.append(g[i][j])\n",
+    "                    except:\n",
+    "                        pass\n",
+    "    return neighbors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_sequence(frames):\n",
+    "    import hickle as hkl\n",
+    "    source_list = []\n",
+    "    \n",
+    "    for days in range(len(frames)):\n",
+    "        for day in range(len(frames[days])):\n",
+    "            source_list += '{}'.format(days)\n",
+    "    \n",
+    "    hkl.dump(frames, './data/train/x_train.hkl')\n",
+    "    hkl.dump(source_list, './data/train/x_sources.hkl')\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Splits is a dictionary holding train, test, val\n",
+    "the values for train, test, and val are lists of tuples holding category and folder name\n",
+    "in the end each image gets a source associated with it\n",
+    "there is only one data and one source hickle dump for each of train test and val"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames = create_frames(grid, height, width,504)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TODO use loop to average each frame\n",
+    "for x in tqdm(range(len(frames))):\n",
+    "    for y in range(len(frames[0])):\n",
+    "        frames[x][y] = average_grid_fill(mask, frames[x][y], height, width )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store_sequence(frames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np_frames = np.array(frames)\n",
+    "np_frames.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store_sequence(np_frames)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point I have processed the data and made it into discrete frames of data and it is time to run it through the PredNet architecture for training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "np.random.seed(123)\n",
+    "from six.moves import cPickle\n",
+    "\n",
+    "from keras import backend as K\n",
+    "from keras.models import Model\n",
+    "from keras.layers import Input, Dense, Flatten\n",
+    "from keras.layers import LSTM\n",
+    "from keras.layers import TimeDistributed\n",
+    "from keras.callbacks import LearningRateScheduler, ModelCheckpoint\n",
+    "from keras.optimizers import Adam\n",
+    "\n",
+    "from prednet import PredNet\n",
+    "from data_utils import SequenceGenerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "WEIGHTS_DIR = './weights/'\n",
+    "DATA_DIR = './data/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_model = True  # if weights will be saved\n",
+    "weights_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_weights.hdf5')  # where weights will be saved\n",
+    "json_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_model.json')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data files\n",
+    "#TODO: Use the files from NOAA and process them into proper frames\n",
+    "train_file = os.path.join(DATA_DIR,'train/', 'x_train.hkl')\n",
+    "train_sources = os.path.join(DATA_DIR, 'train/', 'x_sources.hkl')\n",
+    "#val_file = os.path.join(DATA_DIR, 'X_val.hkl')\n",
+    "#val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Training parameters\n",
+    "nb_epoch = 1\n",
+    "batch_size = 4\n",
+    "samples_per_epoch = 500\n",
+    "N_seq_val = 100  # number of sequences to use for validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Model parameters\n",
+    "n_channels, im_height, im_width = (12, 20, 40)\n",
+    "input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels)\n",
+    "stack_sizes = (n_channels, 48, 96)\n",
+    "R_stack_sizes = stack_sizes\n",
+    "A_filt_sizes = (3, 3)\n",
+    "Ahat_filt_sizes = (3, 3, 3)\n",
+    "R_filt_sizes = (3, 3, 3)\n",
+    "layer_loss_weights = np.array([1., 0., 0.])  # weighting for each layer in final loss; \"L_0\" model:  [1, 0, 0, 0], \"L_all\": [1, 0.1, 0.1, 0.1]\n",
+    "layer_loss_weights = np.expand_dims(layer_loss_weights, 1)\n",
+    "nt = 24  # number of timesteps used for sequences in training\n",
+    "time_loss_weights = 1./ (nt - 1) * np.ones((nt,1))  # equally weight all timesteps except the first\n",
+    "time_loss_weights[0] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prednet = PredNet(stack_sizes, R_stack_sizes,\n",
+    "                  A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,\n",
+    "                  output_mode='error', return_sequences=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs = Input(shape=(nt,) + input_shape)\n",
+    "errors = prednet(inputs)  # errors will be (batch_size, nt, nb_layers)\n",
+    "errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors)  # calculate weighted error by layer\n",
+    "errors_by_time = Flatten()(errors_by_time)  # will be (batch_size, nt)\n",
+    "final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time)  # weight errors by time\n",
+    "model = Model(inputs=inputs, outputs=final_errors)\n",
+    "model.compile(loss='mean_absolute_error', optimizer='adam')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_1 (InputLayer)         (None, 24, 20, 40, 12)    0         \n",
+      "_________________________________________________________________\n",
+      "pred_net_1 (PredNet)         (None, 24, 3)             1645548   \n",
+      "_________________________________________________________________\n",
+      "time_distributed_1 (TimeDist (None, 24, 1)             4         \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 24)                0         \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 1)                 25        \n",
+      "=================================================================\n",
+      "Total params: 1,645,577\n",
+      "Trainable params: 1,645,548\n",
+      "Non-trainable params: 29\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "truth = []\n",
+    "for i in range(20):\n",
+    "    truth.append(np.random.randint(255,size=(1)))\n",
+    "output = np.array(truth)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001    # start with lr of 0.001 and then drop to 0.0001 after 75 epochs\n",
+    "callbacks = [LearningRateScheduler(lr_schedule)]\n",
+    "#history = model.fit(np_frames, output ,batch_size, nb_epoch, callbacks=callbacks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1\n"
+     ]
+    }
+   ],
+   "source": [
+    "history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Final/Project.ipynb
+++ b/Final/Project.ipynb
@@ -0,0 +1,532 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Project Notebook\n",
+    "This is the full and complete notebook that takes in the data from NOAA and processes it into frames to be used in the PredNet architecture and produce a resulting prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Getting a list of files in raw data folder\n",
+    "filenames = os.listdir('D:/Nico/Desktop/processed_data')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "header_wanted = [\n",
+    " 'HOURLYVISIBILITY',\n",
+    " 'HOURLYDRYBULBTEMPC',\n",
+    " 'HOURLYWETBULBTEMPC',\n",
+    " 'HOURLYDewPointTempC',\n",
+    " 'HOURLYRelativeHumidity',\n",
+    " 'HOURLYWindSpeed',\n",
+    " 'HOURLYWindGustSpeed',\n",
+    " 'HOURLYStationPressure',\n",
+    " 'HOURLYPressureTendency',\n",
+    " 'HOURLYPressureChange',\n",
+    " 'HOURLYSeaLevelPressure',\n",
+    " 'HOURLYPrecip',\n",
+    " 'HOURLYAltimeterSetting']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "usecols = ['DATE','STATION'] + header_wanted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Loading all files into a pandas Dataframe\n",
+    "tqdm.pandas()\n",
+    "df = pd.concat([pd.read_csv('D:/Nico/Desktop/processed_data/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point all the data has been loaded into a single dataframe and any data changes have been made. The next step is to break the data up by WBAN and place in a 2D array at the appropriate grid cell. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stations = pd.read_csv(\"../Playground/stations_unique.csv\", usecols = ['STATION_ID', 'LON_SCALED', 'LAT_SCALED'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "height = 20\n",
+    "width = 40"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mask = [([0] * width) for i in range(height)]\n",
+    "\n",
+    "wban_loc = dict(zip(stations.STATION_ID,zip(stations.LON_SCALED,stations.LAT_SCALED)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid = [([pd.DataFrame()] * width) for i in range(height)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for key, value in tqdm(wban_loc.items()):\n",
+    "    mask[value[1]][value[0]] = 1\n",
+    "    grid[value[1]][value[0]] = df.loc[df.STATION == key]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TODO Handle different sized data some stacks too short\n",
+    "def create_frames(data,height, width, depth):\n",
+    "    days = []\n",
+    "    frames = []\n",
+    "    for i in tqdm(range(depth)):\n",
+    "        frame = np.zeros((height,width,12))\n",
+    "        for y in range(height):\n",
+    "            for x in range(width):\n",
+    "                if(not data[y][x].empty):\n",
+    "                    frame[y][x] = data[y][x].iloc[[i],1:13].values.flatten()\n",
+    "        if((i+1)%24 != 0):\n",
+    "            frames.append(frame)\n",
+    "        else:\n",
+    "            frames.append(frame)\n",
+    "            days.append(frames)\n",
+    "            frames = []\n",
+    "    return days"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def average_grid_fill(mask,data, height, width):\n",
+    "        \n",
+    "    for i in range(height):\n",
+    "        for j in range(width):\n",
+    "            if(mask[i][j] != 1):\n",
+    "                neighbors = get_neighbors(j,i,data)\n",
+    "                data[i][j] = np.mean(neighbors)\n",
+    "            \n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_neighbors(x,y,g):\n",
+    "    neighbors = []\n",
+    "    for i in [y-1,y,y+1]:\n",
+    "        for j in [x-1,x,x+1]:\n",
+    "            if(i >= 0 and j >= 0):\n",
+    "                if(i != y or j != x ):\n",
+    "                    try:\n",
+    "                        neighbors.append(g[i][j])\n",
+    "                    except:\n",
+    "                        pass\n",
+    "    return neighbors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_sequence(frames):\n",
+    "    import hickle as hkl\n",
+    "    source_list = []\n",
+    "    \n",
+    "    for days in range(len(frames)):\n",
+    "        for day in range(len(frames[days])):\n",
+    "            source_list += '{}'.format(days)\n",
+    "    \n",
+    "    hkl.dump(frames, './data/train/x_train.hkl')\n",
+    "    hkl.dump(source_list, './data/train/x_sources.hkl')\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Splits is a dictionary holding train, test, val\n",
+    "the values for train, test, and val are lists of tuples holding category and folder name\n",
+    "in the end each image gets a source associated with it\n",
+    "there is only one data and one source hickle dump for each of train test and val"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames = create_frames(grid, height, width,504)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TODO use loop to average each frame\n",
+    "for x in tqdm(range(len(frames))):\n",
+    "    for y in range(len(frames[0])):\n",
+    "        frames[x][y] = average_grid_fill(mask, frames[x][y], height, width )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store_sequence(frames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np_frames = np.array(frames)\n",
+    "np_frames.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store_sequence(np_frames)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point I have processed the data and made it into discrete frames of data and it is time to run it through the PredNet architecture for training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "np.random.seed(123)\n",
+    "from six.moves import cPickle\n",
+    "\n",
+    "from keras import backend as K\n",
+    "from keras.models import Model\n",
+    "from keras.layers import Input, Dense, Flatten\n",
+    "from keras.layers import LSTM\n",
+    "from keras.layers import TimeDistributed\n",
+    "from keras.callbacks import LearningRateScheduler, ModelCheckpoint\n",
+    "from keras.optimizers import Adam\n",
+    "\n",
+    "from prednet import PredNet\n",
+    "from data_utils import SequenceGenerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "WEIGHTS_DIR = './weights/'\n",
+    "DATA_DIR = './data/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_model = True  # if weights will be saved\n",
+    "weights_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_weights.hdf5')  # where weights will be saved\n",
+    "json_file = os.path.join(WEIGHTS_DIR, 'prednet_weather_model.json')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data files\n",
+    "#TODO: Use the files from NOAA and process them into proper frames\n",
+    "train_file = os.path.join(DATA_DIR,'train/', 'x_train.hkl')\n",
+    "train_sources = os.path.join(DATA_DIR, 'train/', 'x_sources.hkl')\n",
+    "#val_file = os.path.join(DATA_DIR, 'X_val.hkl')\n",
+    "#val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Training parameters\n",
+    "nb_epoch = 1\n",
+    "batch_size = 4\n",
+    "samples_per_epoch = 500\n",
+    "N_seq_val = 100  # number of sequences to use for validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Model parameters\n",
+    "n_channels, im_height, im_width = (12, 20, 40)\n",
+    "input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels)\n",
+    "stack_sizes = (n_channels, 48, 96)\n",
+    "R_stack_sizes = stack_sizes\n",
+    "A_filt_sizes = (3, 3)\n",
+    "Ahat_filt_sizes = (3, 3, 3)\n",
+    "R_filt_sizes = (3, 3, 3)\n",
+    "layer_loss_weights = np.array([1., 0., 0.])  # weighting for each layer in final loss; \"L_0\" model:  [1, 0, 0, 0], \"L_all\": [1, 0.1, 0.1, 0.1]\n",
+    "layer_loss_weights = np.expand_dims(layer_loss_weights, 1)\n",
+    "nt = 24  # number of timesteps used for sequences in training\n",
+    "time_loss_weights = 1./ (nt - 1) * np.ones((nt,1))  # equally weight all timesteps except the first\n",
+    "time_loss_weights[0] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prednet = PredNet(stack_sizes, R_stack_sizes,\n",
+    "                  A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,\n",
+    "                  output_mode='error', return_sequences=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs = Input(shape=(nt,) + input_shape)\n",
+    "errors = prednet(inputs)  # errors will be (batch_size, nt, nb_layers)\n",
+    "errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors)  # calculate weighted error by layer\n",
+    "errors_by_time = Flatten()(errors_by_time)  # will be (batch_size, nt)\n",
+    "final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time)  # weight errors by time\n",
+    "model = Model(inputs=inputs, outputs=final_errors)\n",
+    "model.compile(loss='mean_absolute_error', optimizer='adam')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_1 (InputLayer)         (None, 24, 20, 40, 12)    0         \n",
+      "_________________________________________________________________\n",
+      "pred_net_1 (PredNet)         (None, 24, 3)             1645548   \n",
+      "_________________________________________________________________\n",
+      "time_distributed_1 (TimeDist (None, 24, 1)             4         \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 24)                0         \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 1)                 25        \n",
+      "=================================================================\n",
+      "Total params: 1,645,577\n",
+      "Trainable params: 1,645,548\n",
+      "Non-trainable params: 29\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "truth = []\n",
+    "for i in range(20):\n",
+    "    truth.append(np.random.randint(255,size=(1)))\n",
+    "output = np.array(truth)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001    # start with lr of 0.001 and then drop to 0.0001 after 75 epochs\n",
+    "callbacks = [LearningRateScheduler(lr_schedule)]\n",
+    "#history = model.fit(np_frames, output ,batch_size, nb_epoch, callbacks=callbacks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1\n"
+     ]
+    }
+   ],
+   "source": [
+    "history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Final/pycache/data_utils.cpython-36.pyc
+++ b/Final/pycache/data_utils.cpython-36.pyc
--- a/Final/pycache/keras_utils.cpython-36.pyc
+++ b/Final/pycache/keras_utils.cpython-36.pyc
--- a/Final/pycache/prednet.cpython-36.pyc
+++ b/Final/pycache/prednet.cpython-36.pyc
--- a/Final/data/train/x_sources.hkl
+++ b/Final/data/train/x_sources.hkl
--- a/Final/data/train/x_train.hkl
+++ b/Final/data/train/x_train.hkl
--- a/Final/data_utils.py
+++ b/Final/data_utils.py
@@ -0,0 +1,66 @@
+import hickle as hkl
+import numpy as np
+from keras import backend as K
+from keras.preprocessing.image import Iterator
+
+# Data generator that creates sequences for input into PredNet.
+class SequenceGenerator(Iterator):
+    def __init__(self, data_file, source_file, nt,
+                 batch_size=8, shuffle=False, seed=None,
+                 output_mode='error', sequence_start_mode='all', N_seq=None,
+                 data_format=K.image_data_format()):
+        self.X = hkl.load(data_file)  # X will be like (n_images, nb_cols, nb_rows, nb_channels)
+        self.sources = hkl.load(source_file) # source for each image so when creating sequences can assure that consecutive frames are from same video
+        self.nt = nt
+        self.batch_size = batch_size
+        self.data_format = data_format
+        assert sequence_start_mode in {'all', 'unique'}, 'sequence_start_mode must be in {all, unique}'
+        self.sequence_start_mode = sequence_start_mode
+        assert output_mode in {'error', 'prediction'}, 'output_mode must be in {error, prediction}'
+        self.output_mode = output_mode
+
+        if self.data_format == 'channels_first':
+            self.X = np.transpose(self.X, (0, 3, 1, 2))
+        self.im_shape = self.X[0].shape
+
+        if self.sequence_start_mode == 'all':  # allow for any possible sequence, starting from any frame
+            self.possible_starts = np.array([i for i in range(self.X.shape[0] - self.nt) if self.sources[i] == self.sources[i + self.nt - 1]])
+        elif self.sequence_start_mode == 'unique':  #create sequences where each unique frame is in at most one sequence
+            curr_location = 0
+            possible_starts = []
+            while curr_location < self.X.shape[0] - self.nt + 1:
+                if self.sources[curr_location] == self.sources[curr_location + self.nt - 1]:
+                    possible_starts.append(curr_location)
+                    curr_location += self.nt
+                else:
+                    curr_location += 1
+            self.possible_starts = possible_starts
+
+        if shuffle:
+            self.possible_starts = np.random.permutation(self.possible_starts)
+        if N_seq is not None and len(self.possible_starts) > N_seq:  # select a subset of sequences if want to
+            self.possible_starts = self.possible_starts[:N_seq]
+        self.N_sequences = len(self.possible_starts)
+        super(SequenceGenerator, self).__init__(len(self.possible_starts), batch_size, shuffle, seed)
+
+    def next(self):
+        with self.lock:
+            index_array, current_index, current_batch_size = next(self.index_generator)
+        batch_x = np.zeros((current_batch_size, self.nt) + self.im_shape, np.float32)
+        for i, idx in enumerate(index_array):
+            idx = self.possible_starts[idx]
+            batch_x[i] = self.preprocess(self.X[idx:idx+self.nt])
+        if self.output_mode == 'error':  # model outputs errors, so y should be zeros
+            batch_y = np.zeros(current_batch_size, np.float32)
+        elif self.output_mode == 'prediction':  # output actual pixels
+            batch_y = batch_x
+        return batch_x, batch_y
+
+    def preprocess(self, X):
+        return X.astype(np.float32) / 255
+
+    def create_all(self):
+        X_all = np.zeros((self.N_sequences, self.nt) + self.im_shape, np.float32)
+        for i, idx in enumerate(self.possible_starts):
+            X_all[i] = self.preprocess(self.X[idx:idx+self.nt])
+        return X_all
--- a/Final/keras_utils.py
+++ b/Final/keras_utils.py
@@ -0,0 +1,58 @@
+import os
+import numpy as np
+
+from keras import backend as K
+from keras.legacy.interfaces import generate_legacy_interface, recurrent_args_preprocessor
+from keras.models import model_from_json
+
+legacy_prednet_support = generate_legacy_interface(
+    allowed_positional_args=['stack_sizes', 'R_stack_sizes',
+                            'A_filt_sizes', 'Ahat_filt_sizes', 'R_filt_sizes'],
+    conversions=[('dim_ordering', 'data_format'),
+                 ('consume_less', 'implementation')],
+    value_conversions={'dim_ordering': {'tf': 'channels_last',
+                                        'th': 'channels_first',
+                                        'default': None},
+                        'consume_less': {'cpu': 0,
+                                        'mem': 1,
+                                        'gpu': 2}},
+    preprocessor=recurrent_args_preprocessor)
+
+# Convert old Keras (1.2) json models and weights to Keras 2.0
+def convert_model_to_keras2(old_json_file, old_weights_file, new_json_file, new_weights_file):
+    from prednet import PredNet
+    # If using tensorflow, it doesn't allow you to load the old weights.
+    if K.backend() != 'theano':
+        os.environ['KERAS_BACKEND'] = backend
+        reload(K)
+
+    f = open(old_json_file, 'r')
+    json_string = f.read()
+    f.close()
+    model = model_from_json(json_string, custom_objects = {'PredNet': PredNet})
+    model.load_weights(old_weights_file)
+
+    weights = model.layers[1].get_weights()
+    if weights[0].shape[0] == model.layers[1].stack_sizes[1]:
+        for i, w in enumerate(weights):
+            if w.ndim == 4:
+                weights[i] = np.transpose(w, (2, 3, 1, 0))
+        model.set_weights(weights)
+
+    model.save_weights(new_weights_file)
+    json_string = model.to_json()
+    with open(new_json_file, "w") as f:
+        f.write(json_string)
+
+
+if __name__ == '__main__':
+    old_dir = './model_data/'
+    new_dir = './model_data_keras2/'
+    if not os.path.exists(new_dir):
+        os.mkdir(new_dir)
+    for w_tag in ['', '-Lall', '-extrapfinetuned']:
+        m_tag = '' if w_tag == '-Lall' else w_tag
+        convert_model_to_keras2(old_dir + 'prednet_kitti_model' + m_tag + '.json',
+                                old_dir + 'prednet_kitti_weights' + w_tag + '.hdf5',
+                                new_dir + 'prednet_kitti_model' + m_tag + '.json',
+                                new_dir + 'prednet_kitti_weights' + w_tag + '.hdf5')
--- a/Final/prednet.py
+++ b/Final/prednet.py
@@ -0,0 +1,311 @@
+import numpy as np
+
+from keras import backend as K
+from keras import activations
+from keras.layers import Recurrent
+from keras.layers import Conv2D, UpSampling2D, MaxPooling2D
+from keras.engine import InputSpec
+from keras_utils import legacy_prednet_support
+
+class PredNet(Recurrent):
+    '''PredNet architecture - Lotter 2016.
+        Stacked convolutional LSTM inspired by predictive coding principles.
+
+    # Arguments
+        stack_sizes: number of channels in targets (A) and predictions (Ahat) in each layer of the architecture.
+            Length is the number of layers in the architecture.
+            First element is the number of channels in the input.
+            Ex. (3, 16, 32) would correspond to a 3 layer architecture that takes in RGB images and has 16 and 32
+                channels in the second and third layers, respectively.
+        R_stack_sizes: number of channels in the representation (R) modules.
+            Length must equal length of stack_sizes, but the number of channels per layer can be different.
+        A_filt_sizes: filter sizes for the target (A) modules.
+            Has length of 1 - len(stack_sizes).
+            Ex. (3, 3) would mean that targets for layers 2 and 3 are computed by a 3x3 convolution of the errors (E)
+                from the layer below (followed by max-pooling)
+        Ahat_filt_sizes: filter sizes for the prediction (Ahat) modules.
+            Has length equal to length of stack_sizes.
+            Ex. (3, 3, 3) would mean that the predictions for each layer are computed by a 3x3 convolution of the
+                representation (R) modules at each layer.
+        R_filt_sizes: filter sizes for the representation (R) modules.
+            Has length equal to length of stack_sizes.
+            Corresponds to the filter sizes for all convolutions in the LSTM.
+        pixel_max: the maximum pixel value.
+            Used to clip the pixel-layer prediction.
+        error_activation: activation function for the error (E) units.
+        A_activation: activation function for the target (A) and prediction (A_hat) units.
+        LSTM_activation: activation function for the cell and hidden states of the LSTM.
+        LSTM_inner_activation: activation function for the gates in the LSTM.
+        output_mode: either 'error', 'prediction', 'all' or layer specification (ex. R2, see below).
+            Controls what is outputted by the PredNet.
+            If 'error', the mean response of the error (E) units of each layer will be outputted.
+                That is, the output shape will be (batch_size, nb_layers).
+            If 'prediction', the frame prediction will be outputted.
+            If 'all', the output will be the frame prediction concatenated with the mean layer errors.
+                The frame prediction is flattened before concatenation.
+                Nomenclature of 'all' is kept for backwards compatibility, but should not be confused with returning all of the layers of the model
+            For returning the features of a particular layer, output_mode should be of the form unit_type + layer_number.
+                For instance, to return the features of the LSTM "representational" units in the lowest layer, output_mode should be specificied as 'R0'.
+                The possible unit types are 'R', 'Ahat', 'A', and 'E' corresponding to the 'representation', 'prediction', 'target', and 'error' units respectively.
+        extrap_start_time: time step for which model will start extrapolating.
+            Starting at this time step, the prediction from the previous time step will be treated as the "actual"
+        data_format: 'channels_first' or 'channels_last'.
+            It defaults to the `image_data_format` value found in your
+            Keras config file at `~/.keras/keras.json`.
+
+    # References
+        - [Deep predictive coding networks for video prediction and unsupervised learning](https://arxiv.org/abs/1605.08104)
+        - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf)
+        - [Convolutional LSTM network: a machine learning approach for precipitation nowcasting](http://arxiv.org/abs/1506.04214)
+        - [Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects](http://www.nature.com/neuro/journal/v2/n1/pdf/nn0199_79.pdf)
+    '''
+    @legacy_prednet_support
+    def __init__(self, stack_sizes, R_stack_sizes,
+                 A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,
+                 pixel_max=1., error_activation='relu', A_activation='relu',
+                 LSTM_activation='tanh', LSTM_inner_activation='hard_sigmoid',
+                 output_mode='error', extrap_start_time=None,
+                 data_format=K.image_data_format(), **kwargs):
+        self.stack_sizes = stack_sizes
+        self.nb_layers = len(stack_sizes)
+        assert len(R_stack_sizes) == self.nb_layers, 'len(R_stack_sizes) must equal len(stack_sizes)'
+        self.R_stack_sizes = R_stack_sizes
+        assert len(A_filt_sizes) == (self.nb_layers - 1), 'len(A_filt_sizes) must equal len(stack_sizes) - 1'
+        self.A_filt_sizes = A_filt_sizes
+        assert len(Ahat_filt_sizes) == self.nb_layers, 'len(Ahat_filt_sizes) must equal len(stack_sizes)'
+        self.Ahat_filt_sizes = Ahat_filt_sizes
+        assert len(R_filt_sizes) == (self.nb_layers), 'len(R_filt_sizes) must equal len(stack_sizes)'
+        self.R_filt_sizes = R_filt_sizes
+
+        self.pixel_max = pixel_max
+        self.error_activation = activations.get(error_activation)
+        self.A_activation = activations.get(A_activation)
+        self.LSTM_activation = activations.get(LSTM_activation)
+        self.LSTM_inner_activation = activations.get(LSTM_inner_activation)
+
+        default_output_modes = ['prediction', 'error', 'all']
+        layer_output_modes = [layer + str(n) for n in range(self.nb_layers) for layer in ['R', 'E', 'A', 'Ahat']]
+        assert output_mode in default_output_modes + layer_output_modes, 'Invalid output_mode: ' + str(output_mode)
+        self.output_mode = output_mode
+        if self.output_mode in layer_output_modes:
+            self.output_layer_type = self.output_mode[:-1]
+            self.output_layer_num = int(self.output_mode[-1])
+        else:
+            self.output_layer_type = None
+            self.output_layer_num = None
+        self.extrap_start_time = extrap_start_time
+
+        assert data_format in {'channels_last', 'channels_first'}, 'data_format must be in {channels_last, channels_first}'
+        self.data_format = data_format
+        self.channel_axis = -3 if data_format == 'channels_first' else -1
+        self.row_axis = -2 if data_format == 'channels_first' else -3
+        self.column_axis = -1 if data_format == 'channels_first' else -2
+        super(PredNet, self).__init__(**kwargs)
+        self.input_spec = [InputSpec(ndim=5)]
+
+    def compute_output_shape(self, input_shape):
+        if self.output_mode == 'prediction':
+            out_shape = input_shape[2:]
+        elif self.output_mode == 'error':
+            out_shape = (self.nb_layers,)
+        elif self.output_mode == 'all':
+            out_shape = (np.prod(input_shape[2:]) + self.nb_layers,)
+        else:
+            stack_str = 'R_stack_sizes' if self.output_layer_type == 'R' else 'stack_sizes'
+            stack_mult = 2 if self.output_layer_type == 'E' else 1
+            out_stack_size = stack_mult * getattr(self, stack_str)[self.output_layer_num]
+            out_nb_row = input_shape[self.row_axis] / 2**self.output_layer_num
+            out_nb_col = input_shape[self.column_axis] / 2**self.output_layer_num
+            if self.data_format == 'channels_first':
+                out_shape = (out_stack_size, out_nb_row, out_nb_col)
+            else:
+                out_shape = (out_nb_row, out_nb_col, out_stack_size)
+
+        if self.return_sequences:
+            return (input_shape[0], input_shape[1]) + out_shape
+        else:
+            return (input_shape[0],) + out_shape
+
+    def get_initial_state(self, x):
+        input_shape = self.input_spec[0].shape
+        init_nb_row = input_shape[self.row_axis]
+        init_nb_col = input_shape[self.column_axis]
+
+        base_initial_state = K.zeros_like(x)  # (samples, timesteps) + image_shape
+        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
+        for _ in range(2):
+            base_initial_state = K.sum(base_initial_state, axis=non_channel_axis)
+        base_initial_state = K.sum(base_initial_state, axis=1)  # (samples, nb_channels)
+
+        initial_states = []
+        states_to_pass = ['r', 'c', 'e']
+        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
+        if self.extrap_start_time is not None:
+           states_to_pass.append('ahat')  # pass prediction in states so can use as actual for t+1 when extrapolating
+           nlayers_to_pass['ahat'] = 1
+        for u in states_to_pass:
+            for l in range(nlayers_to_pass[u]):
+                ds_factor = 2 ** l
+                nb_row = init_nb_row // ds_factor
+                nb_col = init_nb_col // ds_factor
+                if u in ['r', 'c']:
+                    stack_size = self.R_stack_sizes[l]
+                elif u == 'e':
+                    stack_size = 2 * self.stack_sizes[l]
+                elif u == 'ahat':
+                    stack_size = self.stack_sizes[l]
+                output_size = stack_size * nb_row * nb_col  # flattened size
+
+                reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size)
+                initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
+                if self.data_format == 'channels_first':
+                    output_shp = (-1, stack_size, nb_row, nb_col)
+                else:
+                    output_shp = (-1, nb_row, nb_col, stack_size)
+                initial_state = K.reshape(initial_state, output_shp)
+                initial_states += [initial_state]
+
+        if K._BACKEND == 'theano':
+            from theano import tensor as T
+            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
+            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
+            initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states]
+
+        if self.extrap_start_time is not None:
+            initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')]  # the last state will correspond to the current timestep
+        return initial_states
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']}
+
+        for l in range(self.nb_layers):
+            for c in ['i', 'f', 'c', 'o']:
+                act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation
+                self.conv_layers[c].append(Conv2D(self.R_stack_sizes[l], self.R_filt_sizes[l], padding='same', activation=act, data_format=self.data_format))
+
+            act = 'relu' if l == 0 else self.A_activation
+            self.conv_layers['ahat'].append(Conv2D(self.stack_sizes[l], self.Ahat_filt_sizes[l], padding='same', activation=act, data_format=self.data_format))
+
+            if l < self.nb_layers - 1:
+                self.conv_layers['a'].append(Conv2D(self.stack_sizes[l+1], self.A_filt_sizes[l], padding='same', activation=self.A_activation, data_format=self.data_format))
+
+        self.upsample = UpSampling2D(data_format=self.data_format)
+        self.pool = MaxPooling2D(data_format=self.data_format)
+
+        self.trainable_weights = []
+        nb_row, nb_col = (input_shape[-2], input_shape[-1]) if self.data_format == 'channels_first' else (input_shape[-3], input_shape[-2])
+        for c in sorted(self.conv_layers.keys()):
+            for l in range(len(self.conv_layers[c])):
+                ds_factor = 2 ** l
+                if c == 'ahat':
+                    nb_channels = self.R_stack_sizes[l]
+                elif c == 'a':
+                    nb_channels = 2 * self.R_stack_sizes[l]
+                else:
+                    nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[l]
+                    if l < self.nb_layers - 1:
+                        nb_channels += self.R_stack_sizes[l+1]
+                in_shape = (input_shape[0], nb_channels, nb_row // ds_factor, nb_col // ds_factor)
+                if self.data_format == 'channels_last': in_shape = (in_shape[0], in_shape[2], in_shape[3], in_shape[1])
+                with K.name_scope('layer_' + c + '_' + str(l)):
+                    self.conv_layers[c][l].build(in_shape)
+                self.trainable_weights += self.conv_layers[c][l].trainable_weights
+
+        self.states = [None] * self.nb_layers*3
+
+        if self.extrap_start_time is not None:
+            self.t_extrap = K.variable(self.extrap_start_time, int if K.backend() != 'tensorflow' else 'int32')
+            self.states += [None] * 2  # [previous frame prediction, timestep]
+
+    def step(self, a, states):
+        r_tm1 = states[:self.nb_layers]
+        c_tm1 = states[self.nb_layers:2*self.nb_layers]
+        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]
+
+        if self.extrap_start_time is not None:
+            t = states[-1]
+            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual
+
+        c = []
+        r = []
+        e = []
+
+        # Update R units starting from the top
+        for l in reversed(range(self.nb_layers)):
+            inputs = [r_tm1[l], e_tm1[l]]
+            if l < self.nb_layers - 1:
+                inputs.append(r_up)
+
+            inputs = K.concatenate(inputs, axis=self.channel_axis)
+            i = self.conv_layers['i'][l].call(inputs)
+            f = self.conv_layers['f'][l].call(inputs)
+            o = self.conv_layers['o'][l].call(inputs)
+            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
+            _r = o * self.LSTM_activation(_c)
+            c.insert(0, _c)
+            r.insert(0, _r)
+
+            if l > 0:
+                r_up = self.upsample.call(_r)
+
+        # Update feedforward path starting from the bottom
+        for l in range(self.nb_layers):
+            ahat = self.conv_layers['ahat'][l].call(r[l])
+            if l == 0:
+                ahat = K.minimum(ahat, self.pixel_max)
+                frame_prediction = ahat
+
+            # compute errors
+            e_up = self.error_activation(ahat - a)
+            e_down = self.error_activation(a - ahat)
+
+            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))
+
+            if self.output_layer_num == l:
+                if self.output_layer_type == 'A':
+                    output = a
+                elif self.output_layer_type == 'Ahat':
+                    output = ahat
+                elif self.output_layer_type == 'R':
+                    output = r[l]
+                elif self.output_layer_type == 'E':
+                    output = e[l]
+
+            if l < self.nb_layers - 1:
+                a = self.conv_layers['a'][l].call(e[l])
+                a = self.pool.call(a)  # target for next layer
+
+        if self.output_layer_type is None:
+            if self.output_mode == 'prediction':
+                output = frame_prediction
+            else:
+                for l in range(self.nb_layers):
+                    layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
+                    all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
+                if self.output_mode == 'error':
+                    output = all_error
+                else:
+                    output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)
+
+        states = r + c + e
+        if self.extrap_start_time is not None:
+            states += [frame_prediction, t + 1]
+        return output, states
+
+    def get_config(self):
+        config = {'stack_sizes': self.stack_sizes,
+                  'R_stack_sizes': self.R_stack_sizes,
+                  'A_filt_sizes': self.A_filt_sizes,
+                  'Ahat_filt_sizes': self.Ahat_filt_sizes,
+                  'R_filt_sizes': self.R_filt_sizes,
+                  'pixel_max': self.pixel_max,
+                  'error_activation': self.error_activation.__name__,
+                  'A_activation': self.A_activation.__name__,
+                  'LSTM_activation': self.LSTM_activation.__name__,
+                  'LSTM_inner_activation': self.LSTM_inner_activation.__name__,
+                  'data_format': self.data_format,
+                  'extrap_start_time': self.extrap_start_time,
+                  'output_mode': self.output_mode}
+        base_config = super(PredNet, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/preprocessing/.ipynb_checkpoints/preprocess_data-checkpoint.ipynb
+++ b/preprocessing/.ipynb_checkpoints/preprocess_data-checkpoint.ipynb
--- a/preprocessing/preprocess_data.ipynb
+++ b/preprocessing/preprocess_data.ipynb