diff --git a/ConvNet.ipynb b/ConvNet.ipynb new file mode 100644 index 0000000..f81dd1c --- /dev/null +++ b/ConvNet.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 47 total answer categories.\n", + "There are 5000 total math images.\n", + "There are 3000 training math images.\n", + "There are 1000 validation math images.\n", + "There are 1000 test math images.\n" + ] + } + ], + "source": [ + "from sklearn.datasets import load_files \n", + "from keras.utils import np_utils\n", + "import numpy as np\n", + "from glob import glob\n", + "\n", + "# define function to load train, test, and validation datasets\n", + "def load_dataset(path):\n", + " data = load_files(path, shuffle=False)\n", + " problem_files = np.array(data['filenames'])\n", + " problem_answers = np_utils.to_categorical(np.array(data['target']), 47)\n", + " return problem_files, problem_answers\n", + "\n", + "# load train, test, and validation datasets\n", + "train_files, train_targets = load_dataset('./data/train')\n", + "valid_files, valid_targets = load_dataset('./data/validate')\n", + "test_files, test_targets = load_dataset('./data/test')\n", + "\n", + "# load list of math answers\n", + "math_answers = [item[13:-1] for item in glob(\"./data/train/*/\")]\n", + "\n", + "# print statistics about the dataset\n", + "print('There are %d total answer categories.' % len(math_answers))\n", + "print('There are %s total math images.' % len(np.hstack([train_files, valid_files, test_files])))\n", + "print('There are %d training math images.' % len(train_files))\n", + "print('There are %d validation math images.' % len(valid_files))\n", + "print('There are %d test math images.'% len(test_files))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from keras.preprocessing import image \n", + "from tqdm import tqdm\n", + "\n", + "def path_to_tensor(img_path):\n", + " # loads RGB image as PIL.Image.Image type\n", + " img = image.load_img(img_path, target_size=(128, 128))\n", + " # convert PIL.Image.Image type to 3D tensor with shape (128, 128, 1)\n", + " x = image.img_to_array(img)\n", + " # convert 3D tensor to 4D tensor with shape (1, 128, 128, 1) and return 4D tensor\n", + " return np.expand_dims(x, axis=0)\n", + "\n", + "def paths_to_tensor(img_paths):\n", + " list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]\n", + " return np.vstack(list_of_tensors)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:01<00:00, 1714.31it/s]\n", + "100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1808.34it/s]\n", + "100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1821.52it/s]\n" + ] + } + ], + "source": [ + "from PIL import ImageFile \n", + "ImageFile.LOAD_TRUNCATED_IMAGES = True \n", + "\n", + "# pre-process the data for Keras\n", + "train_tensors = paths_to_tensor(train_files).astype('float32')/255\n", + "valid_tensors = paths_to_tensor(valid_files).astype('float32')/255\n", + "test_tensors = paths_to_tensor(test_files).astype('float32')/255" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "conv2d_1 (Conv2D) (None, 128, 128, 8) 104 \n", + "_________________________________________________________________\n", + "max_pooling2d_1 (MaxPooling2 (None, 64, 64, 8) 0 \n", + "_________________________________________________________________\n", + "conv2d_2 (Conv2D) (None, 64, 64, 16) 528 \n", + "_________________________________________________________________\n", + "max_pooling2d_2 (MaxPooling2 (None, 32, 32, 16) 0 \n", + "_________________________________________________________________\n", + "conv2d_3 (Conv2D) (None, 32, 32, 32) 2080 \n", + "_________________________________________________________________\n", + "max_pooling2d_3 (MaxPooling2 (None, 16, 16, 32) 0 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 8192) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 256) 2097408 \n", + "_________________________________________________________________\n", + "dropout_1 (Dropout) (None, 256) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 47) 12079 \n", + "=================================================================\n", + "Total params: 2,112,199\n", + "Trainable params: 2,112,199\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D\n", + "from keras.layers import Dropout, Flatten, Dense\n", + "from keras.models import Sequential\n", + "\n", + "model = Sequential()\n", + "\n", + "### TODO: Define your architecture.\n", + "model.add(Conv2D(filters=8, kernel_size=2, padding='same', activation='relu', input_shape=train_tensors.shape[1:]))\n", + "model.add(MaxPooling2D(pool_size=2))\n", + "model.add(Conv2D(filters=16, kernel_size=2,padding='same', activation='relu'))\n", + "model.add(MaxPooling2D(pool_size=2))\n", + "model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))\n", + "model.add(MaxPooling2D(pool_size=2))\n", + "model.add(Flatten())\n", + "model.add(Dense(256, activation='relu'))\n", + "model.add(Dropout(0.3))\n", + "model.add(Dense(47, activation='softmax'))\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 3000 samples, validate on 1000 samples\n", + "Epoch 1/10\n", + " - 6s - loss: 2.9525 - acc: 0.1733 - val_loss: 2.3098 - val_acc: 0.2620\n", + "\n", + "Epoch 00001: val_loss improved from inf to 2.30976, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 2/10\n", + " - 3s - loss: 1.8501 - acc: 0.3977 - val_loss: 1.3184 - val_acc: 0.5700\n", + "\n", + "Epoch 00002: val_loss improved from 2.30976 to 1.31842, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 3/10\n", + " - 3s - loss: 0.9732 - acc: 0.6790 - val_loss: 0.4666 - val_acc: 0.8830\n", + "\n", + "Epoch 00003: val_loss improved from 1.31842 to 0.46665, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 4/10\n", + " - 3s - loss: 0.4287 - acc: 0.8663 - val_loss: 0.1272 - val_acc: 0.9720\n", + "\n", + "Epoch 00004: val_loss improved from 0.46665 to 0.12724, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 5/10\n", + " - 3s - loss: 0.1949 - acc: 0.9407 - val_loss: 0.0425 - val_acc: 0.9930\n", + "\n", + "Epoch 00005: val_loss improved from 0.12724 to 0.04253, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 6/10\n", + " - 3s - loss: 0.0887 - acc: 0.9750 - val_loss: 0.0076 - val_acc: 1.0000\n", + "\n", + "Epoch 00006: val_loss improved from 0.04253 to 0.00764, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 7/10\n", + " - 3s - loss: 0.0657 - acc: 0.9800 - val_loss: 0.0028 - val_acc: 1.0000\n", + "\n", + "Epoch 00007: val_loss improved from 0.00764 to 0.00281, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 8/10\n", + " - 3s - loss: 0.0332 - acc: 0.9920 - val_loss: 8.5030e-04 - val_acc: 1.0000\n", + "\n", + "Epoch 00008: val_loss improved from 0.00281 to 0.00085, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 9/10\n", + " - 3s - loss: 0.0373 - acc: 0.9890 - val_loss: 3.2931e-04 - val_acc: 1.0000\n", + "\n", + "Epoch 00009: val_loss improved from 0.00085 to 0.00033, saving model to saved_models/weights.best.from_scratch.hdf5\n", + "Epoch 10/10\n", + " - 3s - loss: 0.0234 - acc: 0.9913 - val_loss: 1.4510e-04 - val_acc: 1.0000\n", + "\n", + "Epoch 00010: val_loss improved from 0.00033 to 0.00015, saving model to saved_models/weights.best.from_scratch.hdf5\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from keras.callbacks import ModelCheckpoint \n", + "\n", + "### TODO: specify the number of epochs that you would like to use to train the model.\n", + "\n", + "epochs = 20\n", + "\n", + "### Do NOT modify the code below this line.\n", + "\n", + "checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', \n", + " verbose=1, save_best_only=True)\n", + "\n", + "model.fit(train_tensors, train_targets, \n", + " validation_data=(valid_tensors, valid_targets),\n", + " epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model.load_weights('saved_models/weights.best.from_scratch.hdf5')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy: 100.0000%\n" + ] + } + ], + "source": [ + "# get index of predicted dog breed for each image in test set\n", + "math_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]\n", + "\n", + "# report test accuracy\n", + "test_accuracy = 100*np.sum(np.array(math_predictions)==np.argmax(test_targets, axis=1))/len(math_predictions)\n", + "print('Test accuracy: %.4f%%' % test_accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "categories = list(set(list(map(int, math_answers))))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "categories[np.argmax(model.predict(path_to_tensor('./human/test.png')))]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.argmax(model.predict(path_to_tensor('./human/test.png')))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Data Generator.ipynb b/Data Generator.ipynb new file mode 100644 index 0000000..372740c --- /dev/null +++ b/Data Generator.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image, ImageDraw, ImageFont\n", + "from random import randint\n", + "import os\n", + "import uuid" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "def make_img(top,bottom,operand,mode):\n", + " img = Image.new('1', (128,128), color = (1))\n", + " fnt = ImageFont.truetype('arial.ttf', 50)\n", + " d = ImageDraw.Draw(img)\n", + " answer = get_answer(top,bottom,operand)\n", + " my_string = str(str(top) + '\\n' + operand + str(bottom))\n", + " d.multiline_text((30,5), my_string, font=fnt, align='right')\n", + " d.line([(30,110),(115,110)], width=3)\n", + " if not os.path.exists('./data/{}/{}'.format(mode,answer)):\n", + " os.makedirs('./data/{}/{}'.format(mode,answer))\n", + " img.save('./data/{}/{}/{}.png'.format(mode,answer,uuid.uuid4().hex))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "operators = ['x','+', '-']" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def get_answer(top, bottom, operand):\n", + " if(operand == 'x'):\n", + " return top * bottom\n", + " elif(operand == '+'):\n", + " return top + bottom\n", + " else:\n", + " return top - bottom " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(1000):\n", + " int_one = randint(0,10)\n", + " int_two = randint(0,10)\n", + " if(i/1000 < .6):\n", + " mode = 'train'\n", + " elif(i/1000 >= .6 and i/1000 < .8):\n", + " mode = 'validate'\n", + " else:\n", + " mode = 'test'\n", + " if(int_one > int_two):\n", + " make_img(int_one,int_two, operators[randint(0,2)],mode)\n", + " else:\n", + " make_img(int_two,int_one, operators[randint(0,2)],mode)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "my_set = []\n", + "for i in range(11):\n", + " for j in range(11):\n", + " my_set.append(i+j)\n", + " my_set.append(i*j)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "242" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(my_set)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0,\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 4,\n", + " 5,\n", + " 6,\n", + " 7,\n", + " 8,\n", + " 9,\n", + " 10,\n", + " 11,\n", + " 12,\n", + " 13,\n", + " 14,\n", + " 15,\n", + " 16,\n", + " 17,\n", + " 18,\n", + " 19,\n", + " 20,\n", + " 21,\n", + " 24,\n", + " 25,\n", + " 27,\n", + " 28,\n", + " 30,\n", + " 32,\n", + " 35,\n", + " 36,\n", + " 40,\n", + " 42,\n", + " 45,\n", + " 48,\n", + " 49,\n", + " 50,\n", + " 54,\n", + " 56,\n", + " 60,\n", + " 63,\n", + " 64,\n", + " 70,\n", + " 72,\n", + " 80,\n", + " 81,\n", + " 90,\n", + " 100}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_set = set(my_set)\n", + "my_set" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "my_list = list(my_set)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "63" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list[39]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "img = Image.new('1', (128,128), color = (1))\n", + "fnt = ImageFont.truetype('arial.ttf', 50)\n", + "d = ImageDraw.Draw(img)\n", + "top = 10\n", + "bottom = 10\n", + "operand = 'x'\n", + "answer = get_answer(top,bottom,operand)\n", + "my_string = str(str(top) + '\\n' + operand + str(bottom))\n", + "d.multiline_text((30,5), my_string, font=fnt, align='right')\n", + "d.line([(30,110),(115,110)], width=3)\n", + "img.save('./human/test.png')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "for answer in range(100):\n", + " for mode in ['train','validate','test']:\n", + " if not os.path.exists('./data/{}/{}'.format(mode,answer)):\n", + " os.makedirs('./data/{}/{}'.format(mode,answer))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/human/test.png b/human/test.png new file mode 100644 index 0000000..b8276c2 Binary files /dev/null and b/human/test.png differ diff --git a/saved_models/weights.best.from_scratch.hdf5 b/saved_models/weights.best.from_scratch.hdf5 new file mode 100644 index 0000000..47ab89c Binary files /dev/null and b/saved_models/weights.best.from_scratch.hdf5 differ