{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocess the raw data from NOAA\n", "This notebook is setup to take in the CSV from NOAA and remove the unneccasary data. This will also seperate out each station for later positioning." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Getting a list of files in raw data folder\n", "filenames = os.listdir('./full_grid')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "header_wanted = [\n", " 'HOURLYVISIBILITY',\n", " 'HOURLYDRYBULBTEMPC',\n", " 'HOURLYWETBULBTEMPC',\n", " 'HOURLYDewPointTempC',\n", " 'HOURLYRelativeHumidity',\n", " 'HOURLYWindSpeed',\n", " 'HOURLYWindGustSpeed',\n", " 'HOURLYStationPressure',\n", " 'HOURLYPressureTendency',\n", " 'HOURLYPressureChange',\n", " 'HOURLYSeaLevelPressure',\n", " 'HOURLYPrecip',\n", " 'HOURLYAltimeterSetting']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "usecols = ['DATE','STATION'] + header_wanted" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████████| 82/82 [02:07<00:00, 1.55s/it]\n" ] } ], "source": [ "#Loading all files into a pandas Dataframe\n", "tqdm.pandas()\n", "df = pd.concat([pd.read_csv('./full_grid/{}'.format(x), usecols=usecols, low_memory=False) for x in tqdm(filenames)])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "406" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Getting the station names\n", "stations = pd.read_csv(\"./stations_unique.csv\", usecols = ['STATION_ID','LAT_SCALED', 'LON_SCALED'])\n", "wban_list = stations['STATION_ID'].tolist()\n", "len(wban_list)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#removes any letters or oddities that appear with numbers for any of the data\n", "def remove_letters(headers,dataframes):\n", " for i in tqdm(headers):\n", " dataframes[i].replace(regex=True,inplace=True,to_replace=r'\\D',value=r'')\n", " dataframes[i] = dataframes[i].apply(pd.to_numeric)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|███████████████████████████████████████████████████████████████████████████████| 13/13 [1:18:33<00:00, 362.54s/it]\n" ] } ], "source": [ "remove_letters(header_wanted,df)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [10:29<00:00, 1.55s/it]\n" ] } ], "source": [ "by_station_list = []\n", "# Divides dataframe into smaller dataframes of a single WBAN and stores in a list in order of wban_list\n", "for i in tqdm(wban_list):\n", " by_station_list.append(df.loc[df.STATION == i])\n", "del df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 46719 entries, 109127 to 155845\n", "Data columns (total 15 columns):\n", "STATION 46719 non-null object\n", "DATE 46719 non-null object\n", "HOURLYVISIBILITY 43438 non-null float64\n", "HOURLYDRYBULBTEMPC 46196 non-null float64\n", "HOURLYWETBULBTEMPC 44790 non-null float64\n", "HOURLYDewPointTempC 46195 non-null float64\n", "HOURLYRelativeHumidity 46195 non-null float64\n", "HOURLYWindSpeed 44852 non-null float64\n", "HOURLYWindGustSpeed 6657 non-null float64\n", "HOURLYStationPressure 44796 non-null float64\n", "HOURLYPressureTendency 0 non-null float64\n", "HOURLYPressureChange 0 non-null float64\n", "HOURLYSeaLevelPressure 0 non-null float64\n", "HOURLYPrecip 2096 non-null float64\n", "HOURLYAltimeterSetting 46197 non-null float64\n", "dtypes: float64(13), object(2)\n", "memory usage: 5.7+ MB\n" ] } ], "source": [ "by_station_list[0].info()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [00:21<00:00, 18.75it/s]\n" ] } ], "source": [ "#Sets the date info as the index\n", "for i in tqdm(range(len(by_station_list))):\n", " by_station_list[i]['STATION_ID'] = by_station_list[i]['STATION']\n", " by_station_list[i] = by_station_list[i].set_index(pd.DatetimeIndex(by_station_list[i]['DATE']))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STATIONDATEHOURLYVISIBILITYHOURLYDRYBULBTEMPCHOURLYWETBULBTEMPCHOURLYDewPointTempCHOURLYRelativeHumidityHOURLYWindSpeedHOURLYWindGustSpeedHOURLYStationPressureHOURLYPressureTendencyHOURLYPressureChangeHOURLYSeaLevelPressureHOURLYPrecipHOURLYAltimeterSettingSTATION_ID
DATE
2016-08-01 00:15:00WBAN:001842016-08-01 00:151000.0245.024.4245.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 00:35:00WBAN:001842016-08-01 00:35175.0242.024.4242.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 00:55:00WBAN:001842016-08-01 00:55400.0242.024.4242.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 01:15:00WBAN:001842016-08-01 01:15100.0240.023.9240.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 01:35:00WBAN:001842016-08-01 01:35500.0240.023.9240.0100.00.0NaN3000.0NaNNaNNaNNaN3005.0WBAN:00184
2016-08-01 01:55:00WBAN:001842016-08-01 01:55700.0241.023.9241.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 02:15:00WBAN:001842016-08-01 02:15700.0241.023.9241.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 02:35:00WBAN:001842016-08-01 02:35200.0236.023.3236.0100.00.0NaN2998.0NaNNaNNaNNaN3003.0WBAN:00184
2016-08-01 02:55:00WBAN:001842016-08-01 02:5550.0235.023.3235.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 03:15:00WBAN:001842016-08-01 03:15150.0237.023.9237.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 03:35:00WBAN:001842016-08-01 03:3550.0238.023.9238.0100.00.0NaN2998.0NaNNaNNaNNaN3003.0WBAN:00184
2016-08-01 03:55:00WBAN:001842016-08-01 03:5575.0237.023.9237.0100.00.0NaN2998.0NaNNaNNaNNaN3003.0WBAN:00184
2016-08-01 04:15:00WBAN:001842016-08-01 04:15700.0233.023.3233.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 04:35:00WBAN:001842016-08-01 04:35500.0234.023.3234.0100.00.0NaN3000.0NaNNaNNaNNaN3005.0WBAN:00184
2016-08-01 04:55:00WBAN:001842016-08-01 04:551000.0240.023.9240.0100.00.0NaN2999.0NaNNaNNaNNaN3004.0WBAN:00184
2016-08-01 05:15:00WBAN:001842016-08-01 05:15700.0237.023.9237.0100.00.0NaN3000.0NaNNaNNaNNaN3005.0WBAN:00184
2016-08-01 05:35:00WBAN:001842016-08-01 05:35500.0237.023.9237.0100.00.0NaN3001.0NaNNaNNaNNaN3006.0WBAN:00184
2016-08-01 05:55:00WBAN:001842016-08-01 05:55700.0239.023.9239.0100.00.0NaN3000.0NaNNaNNaNNaN3005.0WBAN:00184
2016-08-01 06:15:00WBAN:001842016-08-01 06:15700.0245.024.4245.0100.00.0NaN3001.0NaNNaNNaNNaN3006.0WBAN:00184
2016-08-01 06:35:00WBAN:001842016-08-01 06:351000.0249.025.0249.0100.00.0NaN3002.0NaNNaNNaNNaN3007.0WBAN:00184
2016-08-01 06:55:00WBAN:001842016-08-01 06:551000.0253.025.6253.0100.00.0NaN3002.0NaNNaNNaNNaN3007.0WBAN:00184
2016-08-01 07:15:00WBAN:001842016-08-01 07:151000.0256.025.6256.0100.00.0NaN3003.0NaNNaNNaNNaN3008.0WBAN:00184
2016-08-01 07:35:00WBAN:001842016-08-01 07:351000.0265.026.7265.0100.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 07:55:00WBAN:001842016-08-01 07:551000.0271.026.8267.098.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 08:15:00WBAN:001842016-08-01 08:151000.0276.027.0269.096.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 08:35:00WBAN:001842016-08-01 08:351000.0283.027.1264.089.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 08:55:00WBAN:001842016-08-01 08:551000.0290.027.3269.088.05.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 09:15:00WBAN:001842016-08-01 09:151000.0292.027.4268.087.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 09:35:00WBAN:001842016-08-01 09:351000.0296.027.4267.084.03.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
2016-08-01 09:55:00WBAN:001842016-08-01 09:55NaN302.027.2260.078.00.0NaN3004.0NaNNaNNaNNaN3009.0WBAN:00184
...................................................
2018-07-31 14:35:00WBAN:001842018-07-31 14:35NaN278.0NaN228.074.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 14:55:00WBAN:001842018-07-31 14:55NaN277.0NaN231.076.06.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 15:15:00WBAN:001842018-07-31 15:15NaN273.0NaN229.077.0NaNNaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 15:35:00WBAN:001842018-07-31 15:35NaN273.0NaN237.081.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 15:55:00WBAN:001842018-07-31 15:55NaN272.0NaN236.081.03.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 16:15:00WBAN:001842018-07-31 16:15NaN274.0NaN238.081.03.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 16:35:00WBAN:001842018-07-31 16:35NaN275.0NaN237.080.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 16:55:00WBAN:001842018-07-31 16:55NaN275.0NaN236.079.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 17:15:00WBAN:001842018-07-31 17:15NaN274.0NaN235.079.00.0NaNNaNNaNNaNNaNNaN2994.0WBAN:00184
2018-07-31 17:35:00WBAN:001842018-07-31 17:35NaN272.0NaN238.082.00.0NaNNaNNaNNaNNaNNaN2994.0WBAN:00184
2018-07-31 17:55:00WBAN:001842018-07-31 17:55NaN267.0NaN240.085.03.0NaNNaNNaNNaNNaNNaN2994.0WBAN:00184
2018-07-31 18:15:00WBAN:001842018-07-31 18:15NaN262.0NaN244.090.00.0NaNNaNNaNNaNNaNNaN2994.0WBAN:00184
2018-07-31 18:35:00WBAN:001842018-07-31 18:35NaN259.0NaN249.094.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 18:55:00WBAN:001842018-07-31 18:55NaN257.0NaN257.0100.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 19:15:00WBAN:001842018-07-31 19:15NaN257.0NaN255.099.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 19:35:00WBAN:001842018-07-31 19:35NaN254.0NaN254.0100.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 19:55:00WBAN:001842018-07-31 19:55NaN251.0NaN251.0100.00.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 20:15:00WBAN:001842018-07-31 20:15NaN250.0NaN250.0100.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 20:35:00WBAN:001842018-07-31 20:35NaN248.0NaN248.0100.00.0NaNNaNNaNNaNNaNNaN2995.0WBAN:00184
2018-07-31 20:55:00WBAN:001842018-07-31 20:55NaN243.0NaN243.0100.00.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 21:15:00WBAN:001842018-07-31 21:15NaN240.0NaN240.0100.00.0NaNNaNNaNNaNNaNNaN2997.0WBAN:00184
2018-07-31 21:35:00WBAN:001842018-07-31 21:35NaN239.0NaN239.0100.00.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 21:55:00WBAN:001842018-07-31 21:55NaN236.0NaN236.0100.00.0NaNNaNNaNNaNNaNNaN2997.0WBAN:00184
2018-07-31 22:15:00WBAN:001842018-07-31 22:15NaN235.0NaN235.0100.00.0NaNNaNNaNNaNNaNNaN2996.0WBAN:00184
2018-07-31 22:35:00WBAN:001842018-07-31 22:35NaN234.0NaN234.0100.00.0NaNNaNNaNNaNNaNNaN2997.0WBAN:00184
2018-07-31 22:55:00WBAN:001842018-07-31 22:55700.0235.023.3235.0100.0NaNNaN2993.0NaNNaNNaNNaN2998.0WBAN:00184
2018-07-31 23:15:00WBAN:001842018-07-31 23:15700.0233.023.3233.0100.0NaNNaN2993.0NaNNaNNaNNaN2998.0WBAN:00184
2018-07-31 23:35:00WBAN:001842018-07-31 23:351000.0232.023.3232.0100.00.0NaN2993.0NaNNaNNaNNaN2998.0WBAN:00184
2018-07-31 23:55:00WBAN:001842018-07-31 23:55700.0233.023.3233.0100.00.0NaN2993.0NaNNaNNaNNaN2998.0WBAN:00184
2018-07-31 23:59:00WBAN:001842018-07-31 23:59NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNWBAN:00184
\n", "

46719 rows × 16 columns

\n", "
" ], "text/plain": [ " STATION DATE HOURLYVISIBILITY \\\n", "DATE \n", "2016-08-01 00:15:00 WBAN:00184 2016-08-01 00:15 1000.0 \n", "2016-08-01 00:35:00 WBAN:00184 2016-08-01 00:35 175.0 \n", "2016-08-01 00:55:00 WBAN:00184 2016-08-01 00:55 400.0 \n", "2016-08-01 01:15:00 WBAN:00184 2016-08-01 01:15 100.0 \n", "2016-08-01 01:35:00 WBAN:00184 2016-08-01 01:35 500.0 \n", "2016-08-01 01:55:00 WBAN:00184 2016-08-01 01:55 700.0 \n", "2016-08-01 02:15:00 WBAN:00184 2016-08-01 02:15 700.0 \n", "2016-08-01 02:35:00 WBAN:00184 2016-08-01 02:35 200.0 \n", "2016-08-01 02:55:00 WBAN:00184 2016-08-01 02:55 50.0 \n", "2016-08-01 03:15:00 WBAN:00184 2016-08-01 03:15 150.0 \n", "2016-08-01 03:35:00 WBAN:00184 2016-08-01 03:35 50.0 \n", "2016-08-01 03:55:00 WBAN:00184 2016-08-01 03:55 75.0 \n", "2016-08-01 04:15:00 WBAN:00184 2016-08-01 04:15 700.0 \n", "2016-08-01 04:35:00 WBAN:00184 2016-08-01 04:35 500.0 \n", "2016-08-01 04:55:00 WBAN:00184 2016-08-01 04:55 1000.0 \n", "2016-08-01 05:15:00 WBAN:00184 2016-08-01 05:15 700.0 \n", "2016-08-01 05:35:00 WBAN:00184 2016-08-01 05:35 500.0 \n", "2016-08-01 05:55:00 WBAN:00184 2016-08-01 05:55 700.0 \n", "2016-08-01 06:15:00 WBAN:00184 2016-08-01 06:15 700.0 \n", "2016-08-01 06:35:00 WBAN:00184 2016-08-01 06:35 1000.0 \n", "2016-08-01 06:55:00 WBAN:00184 2016-08-01 06:55 1000.0 \n", "2016-08-01 07:15:00 WBAN:00184 2016-08-01 07:15 1000.0 \n", "2016-08-01 07:35:00 WBAN:00184 2016-08-01 07:35 1000.0 \n", "2016-08-01 07:55:00 WBAN:00184 2016-08-01 07:55 1000.0 \n", "2016-08-01 08:15:00 WBAN:00184 2016-08-01 08:15 1000.0 \n", "2016-08-01 08:35:00 WBAN:00184 2016-08-01 08:35 1000.0 \n", "2016-08-01 08:55:00 WBAN:00184 2016-08-01 08:55 1000.0 \n", "2016-08-01 09:15:00 WBAN:00184 2016-08-01 09:15 1000.0 \n", "2016-08-01 09:35:00 WBAN:00184 2016-08-01 09:35 1000.0 \n", "2016-08-01 09:55:00 WBAN:00184 2016-08-01 09:55 NaN \n", "... ... ... ... \n", "2018-07-31 14:35:00 WBAN:00184 2018-07-31 14:35 NaN \n", "2018-07-31 14:55:00 WBAN:00184 2018-07-31 14:55 NaN \n", "2018-07-31 15:15:00 WBAN:00184 2018-07-31 15:15 NaN \n", "2018-07-31 15:35:00 WBAN:00184 2018-07-31 15:35 NaN \n", "2018-07-31 15:55:00 WBAN:00184 2018-07-31 15:55 NaN \n", "2018-07-31 16:15:00 WBAN:00184 2018-07-31 16:15 NaN \n", "2018-07-31 16:35:00 WBAN:00184 2018-07-31 16:35 NaN \n", "2018-07-31 16:55:00 WBAN:00184 2018-07-31 16:55 NaN \n", "2018-07-31 17:15:00 WBAN:00184 2018-07-31 17:15 NaN \n", "2018-07-31 17:35:00 WBAN:00184 2018-07-31 17:35 NaN \n", "2018-07-31 17:55:00 WBAN:00184 2018-07-31 17:55 NaN \n", "2018-07-31 18:15:00 WBAN:00184 2018-07-31 18:15 NaN \n", "2018-07-31 18:35:00 WBAN:00184 2018-07-31 18:35 NaN \n", "2018-07-31 18:55:00 WBAN:00184 2018-07-31 18:55 NaN \n", "2018-07-31 19:15:00 WBAN:00184 2018-07-31 19:15 NaN \n", "2018-07-31 19:35:00 WBAN:00184 2018-07-31 19:35 NaN \n", "2018-07-31 19:55:00 WBAN:00184 2018-07-31 19:55 NaN \n", "2018-07-31 20:15:00 WBAN:00184 2018-07-31 20:15 NaN \n", "2018-07-31 20:35:00 WBAN:00184 2018-07-31 20:35 NaN \n", "2018-07-31 20:55:00 WBAN:00184 2018-07-31 20:55 NaN \n", "2018-07-31 21:15:00 WBAN:00184 2018-07-31 21:15 NaN \n", "2018-07-31 21:35:00 WBAN:00184 2018-07-31 21:35 NaN \n", "2018-07-31 21:55:00 WBAN:00184 2018-07-31 21:55 NaN \n", "2018-07-31 22:15:00 WBAN:00184 2018-07-31 22:15 NaN \n", "2018-07-31 22:35:00 WBAN:00184 2018-07-31 22:35 NaN \n", "2018-07-31 22:55:00 WBAN:00184 2018-07-31 22:55 700.0 \n", "2018-07-31 23:15:00 WBAN:00184 2018-07-31 23:15 700.0 \n", "2018-07-31 23:35:00 WBAN:00184 2018-07-31 23:35 1000.0 \n", "2018-07-31 23:55:00 WBAN:00184 2018-07-31 23:55 700.0 \n", "2018-07-31 23:59:00 WBAN:00184 2018-07-31 23:59 NaN \n", "\n", " HOURLYDRYBULBTEMPC HOURLYWETBULBTEMPC \\\n", "DATE \n", "2016-08-01 00:15:00 245.0 24.4 \n", "2016-08-01 00:35:00 242.0 24.4 \n", "2016-08-01 00:55:00 242.0 24.4 \n", "2016-08-01 01:15:00 240.0 23.9 \n", "2016-08-01 01:35:00 240.0 23.9 \n", "2016-08-01 01:55:00 241.0 23.9 \n", "2016-08-01 02:15:00 241.0 23.9 \n", "2016-08-01 02:35:00 236.0 23.3 \n", "2016-08-01 02:55:00 235.0 23.3 \n", "2016-08-01 03:15:00 237.0 23.9 \n", "2016-08-01 03:35:00 238.0 23.9 \n", "2016-08-01 03:55:00 237.0 23.9 \n", "2016-08-01 04:15:00 233.0 23.3 \n", "2016-08-01 04:35:00 234.0 23.3 \n", "2016-08-01 04:55:00 240.0 23.9 \n", "2016-08-01 05:15:00 237.0 23.9 \n", "2016-08-01 05:35:00 237.0 23.9 \n", "2016-08-01 05:55:00 239.0 23.9 \n", "2016-08-01 06:15:00 245.0 24.4 \n", "2016-08-01 06:35:00 249.0 25.0 \n", "2016-08-01 06:55:00 253.0 25.6 \n", "2016-08-01 07:15:00 256.0 25.6 \n", "2016-08-01 07:35:00 265.0 26.7 \n", "2016-08-01 07:55:00 271.0 26.8 \n", "2016-08-01 08:15:00 276.0 27.0 \n", "2016-08-01 08:35:00 283.0 27.1 \n", "2016-08-01 08:55:00 290.0 27.3 \n", "2016-08-01 09:15:00 292.0 27.4 \n", "2016-08-01 09:35:00 296.0 27.4 \n", "2016-08-01 09:55:00 302.0 27.2 \n", "... ... ... \n", "2018-07-31 14:35:00 278.0 NaN \n", "2018-07-31 14:55:00 277.0 NaN \n", "2018-07-31 15:15:00 273.0 NaN \n", "2018-07-31 15:35:00 273.0 NaN \n", "2018-07-31 15:55:00 272.0 NaN \n", "2018-07-31 16:15:00 274.0 NaN \n", "2018-07-31 16:35:00 275.0 NaN \n", "2018-07-31 16:55:00 275.0 NaN \n", "2018-07-31 17:15:00 274.0 NaN \n", "2018-07-31 17:35:00 272.0 NaN \n", "2018-07-31 17:55:00 267.0 NaN \n", "2018-07-31 18:15:00 262.0 NaN \n", "2018-07-31 18:35:00 259.0 NaN \n", "2018-07-31 18:55:00 257.0 NaN \n", "2018-07-31 19:15:00 257.0 NaN \n", "2018-07-31 19:35:00 254.0 NaN \n", "2018-07-31 19:55:00 251.0 NaN \n", "2018-07-31 20:15:00 250.0 NaN \n", "2018-07-31 20:35:00 248.0 NaN \n", "2018-07-31 20:55:00 243.0 NaN \n", "2018-07-31 21:15:00 240.0 NaN \n", "2018-07-31 21:35:00 239.0 NaN \n", "2018-07-31 21:55:00 236.0 NaN \n", "2018-07-31 22:15:00 235.0 NaN \n", "2018-07-31 22:35:00 234.0 NaN \n", "2018-07-31 22:55:00 235.0 23.3 \n", "2018-07-31 23:15:00 233.0 23.3 \n", "2018-07-31 23:35:00 232.0 23.3 \n", "2018-07-31 23:55:00 233.0 23.3 \n", "2018-07-31 23:59:00 NaN NaN \n", "\n", " HOURLYDewPointTempC HOURLYRelativeHumidity \\\n", "DATE \n", "2016-08-01 00:15:00 245.0 100.0 \n", "2016-08-01 00:35:00 242.0 100.0 \n", "2016-08-01 00:55:00 242.0 100.0 \n", "2016-08-01 01:15:00 240.0 100.0 \n", "2016-08-01 01:35:00 240.0 100.0 \n", "2016-08-01 01:55:00 241.0 100.0 \n", "2016-08-01 02:15:00 241.0 100.0 \n", "2016-08-01 02:35:00 236.0 100.0 \n", "2016-08-01 02:55:00 235.0 100.0 \n", "2016-08-01 03:15:00 237.0 100.0 \n", "2016-08-01 03:35:00 238.0 100.0 \n", "2016-08-01 03:55:00 237.0 100.0 \n", "2016-08-01 04:15:00 233.0 100.0 \n", "2016-08-01 04:35:00 234.0 100.0 \n", "2016-08-01 04:55:00 240.0 100.0 \n", "2016-08-01 05:15:00 237.0 100.0 \n", "2016-08-01 05:35:00 237.0 100.0 \n", "2016-08-01 05:55:00 239.0 100.0 \n", "2016-08-01 06:15:00 245.0 100.0 \n", "2016-08-01 06:35:00 249.0 100.0 \n", "2016-08-01 06:55:00 253.0 100.0 \n", "2016-08-01 07:15:00 256.0 100.0 \n", "2016-08-01 07:35:00 265.0 100.0 \n", "2016-08-01 07:55:00 267.0 98.0 \n", "2016-08-01 08:15:00 269.0 96.0 \n", "2016-08-01 08:35:00 264.0 89.0 \n", "2016-08-01 08:55:00 269.0 88.0 \n", "2016-08-01 09:15:00 268.0 87.0 \n", "2016-08-01 09:35:00 267.0 84.0 \n", "2016-08-01 09:55:00 260.0 78.0 \n", "... ... ... \n", "2018-07-31 14:35:00 228.0 74.0 \n", "2018-07-31 14:55:00 231.0 76.0 \n", "2018-07-31 15:15:00 229.0 77.0 \n", "2018-07-31 15:35:00 237.0 81.0 \n", "2018-07-31 15:55:00 236.0 81.0 \n", "2018-07-31 16:15:00 238.0 81.0 \n", "2018-07-31 16:35:00 237.0 80.0 \n", "2018-07-31 16:55:00 236.0 79.0 \n", "2018-07-31 17:15:00 235.0 79.0 \n", "2018-07-31 17:35:00 238.0 82.0 \n", "2018-07-31 17:55:00 240.0 85.0 \n", "2018-07-31 18:15:00 244.0 90.0 \n", "2018-07-31 18:35:00 249.0 94.0 \n", "2018-07-31 18:55:00 257.0 100.0 \n", "2018-07-31 19:15:00 255.0 99.0 \n", "2018-07-31 19:35:00 254.0 100.0 \n", "2018-07-31 19:55:00 251.0 100.0 \n", "2018-07-31 20:15:00 250.0 100.0 \n", "2018-07-31 20:35:00 248.0 100.0 \n", "2018-07-31 20:55:00 243.0 100.0 \n", "2018-07-31 21:15:00 240.0 100.0 \n", "2018-07-31 21:35:00 239.0 100.0 \n", "2018-07-31 21:55:00 236.0 100.0 \n", "2018-07-31 22:15:00 235.0 100.0 \n", "2018-07-31 22:35:00 234.0 100.0 \n", "2018-07-31 22:55:00 235.0 100.0 \n", "2018-07-31 23:15:00 233.0 100.0 \n", "2018-07-31 23:35:00 232.0 100.0 \n", "2018-07-31 23:55:00 233.0 100.0 \n", "2018-07-31 23:59:00 NaN NaN \n", "\n", " HOURLYWindSpeed HOURLYWindGustSpeed \\\n", "DATE \n", "2016-08-01 00:15:00 0.0 NaN \n", "2016-08-01 00:35:00 0.0 NaN \n", "2016-08-01 00:55:00 0.0 NaN \n", "2016-08-01 01:15:00 0.0 NaN \n", "2016-08-01 01:35:00 0.0 NaN \n", "2016-08-01 01:55:00 0.0 NaN \n", "2016-08-01 02:15:00 0.0 NaN \n", "2016-08-01 02:35:00 0.0 NaN \n", "2016-08-01 02:55:00 0.0 NaN \n", "2016-08-01 03:15:00 0.0 NaN \n", "2016-08-01 03:35:00 0.0 NaN \n", "2016-08-01 03:55:00 0.0 NaN \n", "2016-08-01 04:15:00 0.0 NaN \n", "2016-08-01 04:35:00 0.0 NaN \n", "2016-08-01 04:55:00 0.0 NaN \n", "2016-08-01 05:15:00 0.0 NaN \n", "2016-08-01 05:35:00 0.0 NaN \n", "2016-08-01 05:55:00 0.0 NaN \n", "2016-08-01 06:15:00 0.0 NaN \n", "2016-08-01 06:35:00 0.0 NaN \n", "2016-08-01 06:55:00 0.0 NaN \n", "2016-08-01 07:15:00 0.0 NaN \n", "2016-08-01 07:35:00 0.0 NaN \n", "2016-08-01 07:55:00 0.0 NaN \n", "2016-08-01 08:15:00 0.0 NaN \n", "2016-08-01 08:35:00 0.0 NaN \n", "2016-08-01 08:55:00 5.0 NaN \n", "2016-08-01 09:15:00 0.0 NaN \n", "2016-08-01 09:35:00 3.0 NaN \n", "2016-08-01 09:55:00 0.0 NaN \n", "... ... ... \n", "2018-07-31 14:35:00 0.0 NaN \n", "2018-07-31 14:55:00 6.0 NaN \n", "2018-07-31 15:15:00 NaN NaN \n", "2018-07-31 15:35:00 0.0 NaN \n", "2018-07-31 15:55:00 3.0 NaN \n", "2018-07-31 16:15:00 3.0 NaN \n", "2018-07-31 16:35:00 0.0 NaN \n", "2018-07-31 16:55:00 0.0 NaN \n", "2018-07-31 17:15:00 0.0 NaN \n", "2018-07-31 17:35:00 0.0 NaN \n", "2018-07-31 17:55:00 3.0 NaN \n", "2018-07-31 18:15:00 0.0 NaN \n", "2018-07-31 18:35:00 0.0 NaN \n", "2018-07-31 18:55:00 0.0 NaN \n", "2018-07-31 19:15:00 0.0 NaN \n", "2018-07-31 19:35:00 0.0 NaN \n", "2018-07-31 19:55:00 0.0 NaN \n", "2018-07-31 20:15:00 0.0 NaN \n", "2018-07-31 20:35:00 0.0 NaN \n", "2018-07-31 20:55:00 0.0 NaN \n", "2018-07-31 21:15:00 0.0 NaN \n", "2018-07-31 21:35:00 0.0 NaN \n", "2018-07-31 21:55:00 0.0 NaN \n", "2018-07-31 22:15:00 0.0 NaN \n", "2018-07-31 22:35:00 0.0 NaN \n", "2018-07-31 22:55:00 NaN NaN \n", "2018-07-31 23:15:00 NaN NaN \n", "2018-07-31 23:35:00 0.0 NaN \n", "2018-07-31 23:55:00 0.0 NaN \n", "2018-07-31 23:59:00 NaN NaN \n", "\n", " HOURLYStationPressure HOURLYPressureTendency \\\n", "DATE \n", "2016-08-01 00:15:00 2999.0 NaN \n", "2016-08-01 00:35:00 2999.0 NaN \n", "2016-08-01 00:55:00 2999.0 NaN \n", "2016-08-01 01:15:00 2999.0 NaN \n", "2016-08-01 01:35:00 3000.0 NaN \n", "2016-08-01 01:55:00 2999.0 NaN \n", "2016-08-01 02:15:00 2999.0 NaN \n", "2016-08-01 02:35:00 2998.0 NaN \n", "2016-08-01 02:55:00 2999.0 NaN \n", "2016-08-01 03:15:00 2999.0 NaN \n", "2016-08-01 03:35:00 2998.0 NaN \n", "2016-08-01 03:55:00 2998.0 NaN \n", "2016-08-01 04:15:00 2999.0 NaN \n", "2016-08-01 04:35:00 3000.0 NaN \n", "2016-08-01 04:55:00 2999.0 NaN \n", "2016-08-01 05:15:00 3000.0 NaN \n", "2016-08-01 05:35:00 3001.0 NaN \n", "2016-08-01 05:55:00 3000.0 NaN \n", "2016-08-01 06:15:00 3001.0 NaN \n", "2016-08-01 06:35:00 3002.0 NaN \n", "2016-08-01 06:55:00 3002.0 NaN \n", "2016-08-01 07:15:00 3003.0 NaN \n", "2016-08-01 07:35:00 3004.0 NaN \n", "2016-08-01 07:55:00 3004.0 NaN \n", "2016-08-01 08:15:00 3004.0 NaN \n", "2016-08-01 08:35:00 3004.0 NaN \n", "2016-08-01 08:55:00 3004.0 NaN \n", "2016-08-01 09:15:00 3004.0 NaN \n", "2016-08-01 09:35:00 3004.0 NaN \n", "2016-08-01 09:55:00 3004.0 NaN \n", "... ... ... \n", "2018-07-31 14:35:00 NaN NaN \n", "2018-07-31 14:55:00 NaN NaN \n", "2018-07-31 15:15:00 NaN NaN \n", "2018-07-31 15:35:00 NaN NaN \n", "2018-07-31 15:55:00 NaN NaN \n", "2018-07-31 16:15:00 NaN NaN \n", "2018-07-31 16:35:00 NaN NaN \n", "2018-07-31 16:55:00 NaN NaN \n", "2018-07-31 17:15:00 NaN NaN \n", "2018-07-31 17:35:00 NaN NaN \n", "2018-07-31 17:55:00 NaN NaN \n", "2018-07-31 18:15:00 NaN NaN \n", "2018-07-31 18:35:00 NaN NaN \n", "2018-07-31 18:55:00 NaN NaN \n", "2018-07-31 19:15:00 NaN NaN \n", "2018-07-31 19:35:00 NaN NaN \n", "2018-07-31 19:55:00 NaN NaN \n", "2018-07-31 20:15:00 NaN NaN \n", "2018-07-31 20:35:00 NaN NaN \n", "2018-07-31 20:55:00 NaN NaN \n", "2018-07-31 21:15:00 NaN NaN \n", "2018-07-31 21:35:00 NaN NaN \n", "2018-07-31 21:55:00 NaN NaN \n", "2018-07-31 22:15:00 NaN NaN \n", "2018-07-31 22:35:00 NaN NaN \n", "2018-07-31 22:55:00 2993.0 NaN \n", "2018-07-31 23:15:00 2993.0 NaN \n", "2018-07-31 23:35:00 2993.0 NaN \n", "2018-07-31 23:55:00 2993.0 NaN \n", "2018-07-31 23:59:00 NaN NaN \n", "\n", " HOURLYPressureChange HOURLYSeaLevelPressure \\\n", "DATE \n", "2016-08-01 00:15:00 NaN NaN \n", "2016-08-01 00:35:00 NaN NaN \n", "2016-08-01 00:55:00 NaN NaN \n", "2016-08-01 01:15:00 NaN NaN \n", "2016-08-01 01:35:00 NaN NaN \n", "2016-08-01 01:55:00 NaN NaN \n", "2016-08-01 02:15:00 NaN NaN \n", "2016-08-01 02:35:00 NaN NaN \n", "2016-08-01 02:55:00 NaN NaN \n", "2016-08-01 03:15:00 NaN NaN \n", "2016-08-01 03:35:00 NaN NaN \n", "2016-08-01 03:55:00 NaN NaN \n", "2016-08-01 04:15:00 NaN NaN \n", "2016-08-01 04:35:00 NaN NaN \n", "2016-08-01 04:55:00 NaN NaN \n", "2016-08-01 05:15:00 NaN NaN \n", "2016-08-01 05:35:00 NaN NaN \n", "2016-08-01 05:55:00 NaN NaN \n", "2016-08-01 06:15:00 NaN NaN \n", "2016-08-01 06:35:00 NaN NaN \n", "2016-08-01 06:55:00 NaN NaN \n", "2016-08-01 07:15:00 NaN NaN \n", "2016-08-01 07:35:00 NaN NaN \n", "2016-08-01 07:55:00 NaN NaN \n", "2016-08-01 08:15:00 NaN NaN \n", "2016-08-01 08:35:00 NaN NaN \n", "2016-08-01 08:55:00 NaN NaN \n", "2016-08-01 09:15:00 NaN NaN \n", "2016-08-01 09:35:00 NaN NaN \n", "2016-08-01 09:55:00 NaN NaN \n", "... ... ... \n", "2018-07-31 14:35:00 NaN NaN \n", "2018-07-31 14:55:00 NaN NaN \n", "2018-07-31 15:15:00 NaN NaN \n", "2018-07-31 15:35:00 NaN NaN \n", "2018-07-31 15:55:00 NaN NaN \n", "2018-07-31 16:15:00 NaN NaN \n", "2018-07-31 16:35:00 NaN NaN \n", "2018-07-31 16:55:00 NaN NaN \n", "2018-07-31 17:15:00 NaN NaN \n", "2018-07-31 17:35:00 NaN NaN \n", "2018-07-31 17:55:00 NaN NaN \n", "2018-07-31 18:15:00 NaN NaN \n", "2018-07-31 18:35:00 NaN NaN \n", "2018-07-31 18:55:00 NaN NaN \n", "2018-07-31 19:15:00 NaN NaN \n", "2018-07-31 19:35:00 NaN NaN \n", "2018-07-31 19:55:00 NaN NaN \n", "2018-07-31 20:15:00 NaN NaN \n", "2018-07-31 20:35:00 NaN NaN \n", "2018-07-31 20:55:00 NaN NaN \n", "2018-07-31 21:15:00 NaN NaN \n", "2018-07-31 21:35:00 NaN NaN \n", "2018-07-31 21:55:00 NaN NaN \n", "2018-07-31 22:15:00 NaN NaN \n", "2018-07-31 22:35:00 NaN NaN \n", "2018-07-31 22:55:00 NaN NaN \n", "2018-07-31 23:15:00 NaN NaN \n", "2018-07-31 23:35:00 NaN NaN \n", "2018-07-31 23:55:00 NaN NaN \n", "2018-07-31 23:59:00 NaN NaN \n", "\n", " HOURLYPrecip HOURLYAltimeterSetting STATION_ID \n", "DATE \n", "2016-08-01 00:15:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 00:35:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 00:55:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 01:15:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 01:35:00 NaN 3005.0 WBAN:00184 \n", "2016-08-01 01:55:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 02:15:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 02:35:00 NaN 3003.0 WBAN:00184 \n", "2016-08-01 02:55:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 03:15:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 03:35:00 NaN 3003.0 WBAN:00184 \n", "2016-08-01 03:55:00 NaN 3003.0 WBAN:00184 \n", "2016-08-01 04:15:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 04:35:00 NaN 3005.0 WBAN:00184 \n", "2016-08-01 04:55:00 NaN 3004.0 WBAN:00184 \n", "2016-08-01 05:15:00 NaN 3005.0 WBAN:00184 \n", "2016-08-01 05:35:00 NaN 3006.0 WBAN:00184 \n", "2016-08-01 05:55:00 NaN 3005.0 WBAN:00184 \n", "2016-08-01 06:15:00 NaN 3006.0 WBAN:00184 \n", "2016-08-01 06:35:00 NaN 3007.0 WBAN:00184 \n", "2016-08-01 06:55:00 NaN 3007.0 WBAN:00184 \n", "2016-08-01 07:15:00 NaN 3008.0 WBAN:00184 \n", "2016-08-01 07:35:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 07:55:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 08:15:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 08:35:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 08:55:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 09:15:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 09:35:00 NaN 3009.0 WBAN:00184 \n", "2016-08-01 09:55:00 NaN 3009.0 WBAN:00184 \n", "... ... ... ... \n", "2018-07-31 14:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 14:55:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 15:15:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 15:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 15:55:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 16:15:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 16:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 16:55:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 17:15:00 NaN 2994.0 WBAN:00184 \n", "2018-07-31 17:35:00 NaN 2994.0 WBAN:00184 \n", "2018-07-31 17:55:00 NaN 2994.0 WBAN:00184 \n", "2018-07-31 18:15:00 NaN 2994.0 WBAN:00184 \n", "2018-07-31 18:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 18:55:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 19:15:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 19:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 19:55:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 20:15:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 20:35:00 NaN 2995.0 WBAN:00184 \n", "2018-07-31 20:55:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 21:15:00 NaN 2997.0 WBAN:00184 \n", "2018-07-31 21:35:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 21:55:00 NaN 2997.0 WBAN:00184 \n", "2018-07-31 22:15:00 NaN 2996.0 WBAN:00184 \n", "2018-07-31 22:35:00 NaN 2997.0 WBAN:00184 \n", "2018-07-31 22:55:00 NaN 2998.0 WBAN:00184 \n", "2018-07-31 23:15:00 NaN 2998.0 WBAN:00184 \n", "2018-07-31 23:35:00 NaN 2998.0 WBAN:00184 \n", "2018-07-31 23:55:00 NaN 2998.0 WBAN:00184 \n", "2018-07-31 23:59:00 NaN NaN WBAN:00184 \n", "\n", "[46719 rows x 16 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "by_station_list[0]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [00:06<00:00, 64.90it/s]\n" ] } ], "source": [ "#Some stations take reading multiple times per hour this reduces to one reading per hour by taking the mean of readings\n", "#in an hour\n", "for i in tqdm(range(len(by_station_list))):\n", " by_station_list[i] = by_station_list[i].resample('60T').mean()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "#making a list of filenames by removing the colon \n", "wban_filename = []\n", "for x in wban_list:\n", " wban_filename.append(x[:4] + x[5:])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'WBAN00184'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wban_filename[0]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'WBAN:00184'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wban_list[0]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|███████████████████████████████████████████████████████████████████████████████| 406/406 [00:00<00:00, 619.85it/s]\n" ] } ], "source": [ "#adding back WBAN station since it was absorbed when data was set as the index\n", "for i in tqdm(range(len(by_station_list))):\n", " by_station_list[i]['STATION'] = wban_list[i] " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17520" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(by_station_list[0])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "rows = 24 * 365 * 2 \n", "stations_used = []\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 406/406 [02:04<00:00, 3.27it/s]\n" ] } ], "source": [ "for i in tqdm(range(len(wban_filename))):\n", " if(len(by_station_list[i]) >= rows):\n", " by_station_list[i].to_csv('../Project Final/Transformation/processed_data/{}.csv'.format(wban_filename[i]))\n", " stations_used.append(stations.loc[stations.STATION_ID == wban_list[i]])" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "stations_used_df = pd.concat(stations_used)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "stations_used_df.to_csv('../Project Final/Transformation/stations_mask.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Current Issues\n", "\n", "1) data is not synced across time-zone and all entries start at midnight local time. \n", "\n", "2) some stations have multiple entries per hour and need to be reduced.\n", "\n", "Solutions\n", "\n", "Remove rows from data based on timezone to sync times\n", "limit only 1 entry per hour for a station" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }