diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..763513e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints diff --git a/.pytest_cache/v/cache/lastfailed b/.pytest_cache/v/cache/lastfailed new file mode 100644 index 0000000..1d7a895 --- /dev/null +++ b/.pytest_cache/v/cache/lastfailed @@ -0,0 +1,4 @@ +{ + "testing.py::test_answer": true, + "testing.py::test_matrix_multiplication": true +} \ No newline at end of file diff --git a/.pytest_cache/v/cache/nodeids b/.pytest_cache/v/cache/nodeids new file mode 100644 index 0000000..21b61c4 --- /dev/null +++ b/.pytest_cache/v/cache/nodeids @@ -0,0 +1,3 @@ +[ + "testing.py::test_data_frame_work" +] \ No newline at end of file diff --git a/Titanic.ipynb b/Titanic.ipynb new file mode 100644 index 0000000..ee860d0 --- /dev/null +++ b/Titanic.ipynb @@ -0,0 +1,2551 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 200, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 201, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": {}, + "outputs": [], + "source": [ + "df['Age']=df.Age.fillna(value=df.Age.median())\n", + "\n", + "#not enough values for cabin" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(891, 12)" + ] + }, + "execution_count": 204, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
PassengerId891.0446.000000257.3538421.00223.5000446.0000668.5891.0000
Survived891.00.3838380.4865920.000.00000.00001.01.0000
Pclass891.02.3086420.8360711.002.00003.00003.03.0000
Age891.029.36158213.0196970.4222.000028.000035.080.0000
SibSp891.00.5230081.1027430.000.00000.00001.08.0000
Parch891.00.3815940.8060570.000.00000.00000.06.0000
Fare891.032.20420849.6934290.007.910414.454231.0512.3292
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% \\\n", + "PassengerId 891.0 446.000000 257.353842 1.00 223.5000 446.0000 668.5 \n", + "Survived 891.0 0.383838 0.486592 0.00 0.0000 0.0000 1.0 \n", + "Pclass 891.0 2.308642 0.836071 1.00 2.0000 3.0000 3.0 \n", + "Age 891.0 29.361582 13.019697 0.42 22.0000 28.0000 35.0 \n", + "SibSp 891.0 0.523008 1.102743 0.00 0.0000 0.0000 1.0 \n", + "Parch 891.0 0.381594 0.806057 0.00 0.0000 0.0000 0.0 \n", + "Fare 891.0 32.204208 49.693429 0.00 7.9104 14.4542 31.0 \n", + "\n", + " max \n", + "PassengerId 891.0000 \n", + "Survived 1.0000 \n", + "Pclass 3.0000 \n", + "Age 80.0000 \n", + "SibSp 8.0000 \n", + "Parch 6.0000 \n", + "Fare 512.3292 " + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe().T" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',\n", + " 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],\n", + " dtype='object')" + ] + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=891, step=1)" + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": {}, + "outputs": [], + "source": [ + "group_names = ['Child','Adult','Senior']\n", + "bins = [0,17,65,100]\n", + "df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=group_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedAge_Group
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSAdult
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CAdult
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSAdult
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SAdult
4503Allen, Mr. William Henrymale35.0003734508.0500NaNSAdult
5603Moran, Mr. Jamesmale28.0003308778.4583NaNQAdult
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46SAdult
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNSChild
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNSAdult
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNCChild
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6SChild
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103SAdult
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.0500NaNSAdult
131403Andersson, Mr. Anders Johanmale39.01534708231.2750NaNSAdult
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.8542NaNSChild
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.0000NaNSAdult
161703Rice, Master. Eugenemale2.04138265229.1250NaNQChild
171812Williams, Mr. Charles Eugenemale28.00024437313.0000NaNSAdult
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.0000NaNSAdult
192013Masselmani, Mrs. Fatimafemale28.00026497.2250NaNCAdult
202102Fynney, Mr. Joseph Jmale35.00023986526.0000NaNSAdult
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56SAdult
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.0292NaNQChild
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6SAdult
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNSChild
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.3875NaNSAdult
262703Emir, Mr. Farred Chehabmale28.00026317.2250NaNCAdult
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27SAdult
282913O'Dwyer, Miss. Ellen \"Nellie\"female28.0003309597.8792NaNQAdult
293003Todoroff, Mr. Laliomale28.0003492167.8958NaNSAdult
..........................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.5000NaNSAdult
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17SAdult
86386403Sage, Miss. Dorothy Edith \"Dolly\"female28.082CA. 234369.5500NaNSAdult
86486502Gill, Mr. John Williammale24.00023386613.0000NaNSAdult
86586612Bystrom, Mrs. (Karolina)female42.00023685213.0000NaNSAdult
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.8583NaNCAdult
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24SAdult
86886903van Melkebeke, Mr. Philemonmale28.0003457779.5000NaNSAdult
86987013Johnson, Master. Harold Theodormale4.01134774211.1333NaNSChild
87087103Balkic, Mr. Cerinmale26.0003492487.8958NaNSAdult
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35SAdult
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55SAdult
87387403Vander Cruyssen, Mr. Victormale47.0003457659.0000NaNSAdult
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.0000NaNCAdult
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.2250NaNCChild
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.8458NaNSAdult
87787803Petroff, Mr. Nedeliomale19.0003492127.8958NaNSAdult
87887903Laleff, Mr. Kristomale28.0003492177.8958NaNSAdult
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50CAdult
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.0000NaNSAdult
88188203Markun, Mr. Johannmale33.0003492577.8958NaNSAdult
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.5167NaNSAdult
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.5000NaNSAdult
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.0500NaNSAdult
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.1250NaNQAdult
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNSAdult
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42SAdult
88888903Johnston, Miss. Catherine Helen \"Carrie\"female28.012W./C. 660723.4500NaNSAdult
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148CAdult
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQAdult
\n", + "

891 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + "12 13 0 3 \n", + "13 14 0 3 \n", + "14 15 0 3 \n", + "15 16 1 2 \n", + "16 17 0 3 \n", + "17 18 1 2 \n", + "18 19 0 3 \n", + "19 20 1 3 \n", + "20 21 0 2 \n", + "21 22 1 2 \n", + "22 23 1 3 \n", + "23 24 1 1 \n", + "24 25 0 3 \n", + "25 26 1 3 \n", + "26 27 0 3 \n", + "27 28 0 1 \n", + "28 29 1 3 \n", + "29 30 0 3 \n", + ".. ... ... ... \n", + "861 862 0 2 \n", + "862 863 1 1 \n", + "863 864 0 3 \n", + "864 865 0 2 \n", + "865 866 1 2 \n", + "866 867 1 2 \n", + "867 868 0 1 \n", + "868 869 0 3 \n", + "869 870 1 3 \n", + "870 871 0 3 \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "873 874 0 3 \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "876 877 0 3 \n", + "877 878 0 3 \n", + "878 879 0 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "881 882 0 3 \n", + "882 883 0 3 \n", + "883 884 0 2 \n", + "884 885 0 3 \n", + "885 886 0 3 \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male 28.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + "12 Saundercock, Mr. William Henry male 20.0 0 \n", + "13 Andersson, Mr. Anders Johan male 39.0 1 \n", + "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", + "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", + "16 Rice, Master. Eugene male 2.0 4 \n", + "17 Williams, Mr. Charles Eugene male 28.0 0 \n", + "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", + "19 Masselmani, Mrs. Fatima female 28.0 0 \n", + "20 Fynney, Mr. Joseph J male 35.0 0 \n", + "21 Beesley, Mr. Lawrence male 34.0 0 \n", + "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", + "23 Sloper, Mr. William Thompson male 28.0 0 \n", + "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", + "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", + "26 Emir, Mr. Farred Chehab male 28.0 0 \n", + "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", + "28 O'Dwyer, Miss. Ellen \"Nellie\" female 28.0 0 \n", + "29 Todoroff, Mr. Lalio male 28.0 0 \n", + ".. ... ... ... ... \n", + "861 Giles, Mr. Frederick Edward male 21.0 1 \n", + "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", + "863 Sage, Miss. Dorothy Edith \"Dolly\" female 28.0 8 \n", + "864 Gill, Mr. John William male 24.0 0 \n", + "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", + "866 Duran y More, Miss. Asuncion female 27.0 1 \n", + "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", + "868 van Melkebeke, Mr. Philemon male 28.0 0 \n", + "869 Johnson, Master. Harold Theodor male 4.0 1 \n", + "870 Balkic, Mr. Cerin male 26.0 0 \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", + "877 Petroff, Mr. Nedelio male 19.0 0 \n", + "878 Laleff, Mr. Kristo male 28.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "881 Markun, Mr. Johann male 33.0 0 \n", + "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", + "883 Banfield, Mr. Frederick James male 28.0 0 \n", + "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", + "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female 28.0 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked Age_Group \n", + "0 0 A/5 21171 7.2500 NaN S Adult \n", + "1 0 PC 17599 71.2833 C85 C Adult \n", + "2 0 STON/O2. 3101282 7.9250 NaN S Adult \n", + "3 0 113803 53.1000 C123 S Adult \n", + "4 0 373450 8.0500 NaN S Adult \n", + "5 0 330877 8.4583 NaN Q Adult \n", + "6 0 17463 51.8625 E46 S Adult \n", + "7 1 349909 21.0750 NaN S Child \n", + "8 2 347742 11.1333 NaN S Adult \n", + "9 0 237736 30.0708 NaN C Child \n", + "10 1 PP 9549 16.7000 G6 S Child \n", + "11 0 113783 26.5500 C103 S Adult \n", + "12 0 A/5. 2151 8.0500 NaN S Adult \n", + "13 5 347082 31.2750 NaN S Adult \n", + "14 0 350406 7.8542 NaN S Child \n", + "15 0 248706 16.0000 NaN S Adult \n", + "16 1 382652 29.1250 NaN Q Child \n", + "17 0 244373 13.0000 NaN S Adult \n", + "18 0 345763 18.0000 NaN S Adult \n", + "19 0 2649 7.2250 NaN C Adult \n", + "20 0 239865 26.0000 NaN S Adult \n", + "21 0 248698 13.0000 D56 S Adult \n", + "22 0 330923 8.0292 NaN Q Child \n", + "23 0 113788 35.5000 A6 S Adult \n", + "24 1 349909 21.0750 NaN S Child \n", + "25 5 347077 31.3875 NaN S Adult \n", + "26 0 2631 7.2250 NaN C Adult \n", + "27 2 19950 263.0000 C23 C25 C27 S Adult \n", + "28 0 330959 7.8792 NaN Q Adult \n", + "29 0 349216 7.8958 NaN S Adult \n", + ".. ... ... ... ... ... ... \n", + "861 0 28134 11.5000 NaN S Adult \n", + "862 0 17466 25.9292 D17 S Adult \n", + "863 2 CA. 2343 69.5500 NaN S Adult \n", + "864 0 233866 13.0000 NaN S Adult \n", + "865 0 236852 13.0000 NaN S Adult \n", + "866 0 SC/PARIS 2149 13.8583 NaN C Adult \n", + "867 0 PC 17590 50.4958 A24 S Adult \n", + "868 0 345777 9.5000 NaN S Adult \n", + "869 1 347742 11.1333 NaN S Child \n", + "870 0 349248 7.8958 NaN S Adult \n", + "871 1 11751 52.5542 D35 S Adult \n", + "872 0 695 5.0000 B51 B53 B55 S Adult \n", + "873 0 345765 9.0000 NaN S Adult \n", + "874 0 P/PP 3381 24.0000 NaN C Adult \n", + "875 0 2667 7.2250 NaN C Child \n", + "876 0 7534 9.8458 NaN S Adult \n", + "877 0 349212 7.8958 NaN S Adult \n", + "878 0 349217 7.8958 NaN S Adult \n", + "879 1 11767 83.1583 C50 C Adult \n", + "880 1 230433 26.0000 NaN S Adult \n", + "881 0 349257 7.8958 NaN S Adult \n", + "882 0 7552 10.5167 NaN S Adult \n", + "883 0 C.A./SOTON 34068 10.5000 NaN S Adult \n", + "884 0 SOTON/OQ 392076 7.0500 NaN S Adult \n", + "885 5 382652 29.1250 NaN Q Adult \n", + "886 0 211536 13.0000 NaN S Adult \n", + "887 0 112053 30.0000 B42 S Adult \n", + "888 2 W./C. 6607 23.4500 NaN S Adult \n", + "889 0 111369 30.0000 C148 C Adult \n", + "890 0 370376 7.7500 NaN Q Adult \n", + "\n", + "[891 rows x 13 columns]" + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 210, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAENRJREFUeJzt3X+s3XV9x/Hn21bUta5Fqjek7VYWayKRqHhDuphst9a4ggvlD1gwOApp1sSxxU2y2W3J3K8/YAtjgRj1bjiKQQtzc22AzZDCCXNZme1QCjLDFTu4ltBpy92uqFvne3+cT8213HK/9/y4397PfT6Sm/P9fr6fcz6f9+nt637v55zzvZGZSJLq9aq2JyBJGi6DXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS55W1PAGDNmjW5YcOGnu773e9+lxUrVgx2Qmc5a14arHlp6KfmQ4cOfTsz3zhXv7Mi6Dds2MDBgwd7um+n02FsbGywEzrLWfPSYM1LQz81R8R/NOnn0o0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXurPhkbD8Of2uK63bd38rYR256fyvjStJ8eEYvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekirXKOgj4khEHI6Ir0TEwdL2hoh4MCKeLrfnlvaIiNsiYiIiHo+Ii4dZgCTplc3njH5zZr4jM0fL/i5gf2ZuBPaXfYBLgY3layfwiUFNVpI0f/0s3WwDdpft3cAVM9rvyq4DwOqIOL+PcSRJfYjMnLtTxDeBE0ACn8rM8Yh4MTNXz+hzIjPPjYj7gJsy80ulfT/w0cw8eNpj7qR7xs/IyMi79uzZ01MBx45P8cL3erpr3y5au6qVcaenp1m5cmUrY7fFmpcGa56fzZs3H5qxynJGTf+U4Lsz82hEvAl4MCL+/RX6xixtL/tpkpnjwDjA6Ohojo2NNZzKj7v97r3ccridv4h45JqxVsbtdDr0+nwtVta8NFjzcDRausnMo+X2GPAF4BLghVNLMuX2WOk+Cayfcfd1wNFBTViSND9zBn1ErIiI15/aBt4HPAHsA7aXbtuBvWV7H3BteffNJmAqM58f+MwlSY00WfMYAb4QEaf6fzYz/zEivgzcGxE7gGeBq0r/B4DLgAngJeD6gc9aktTYnEGfmc8Ab5+l/TvAllnaE7hhILOTJPXNT8ZKUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpco2DPiKWRcRjEXFf2b8gIh6NiKcj4p6IOKe0v6bsT5TjG4YzdUlSE/M5o/8w8NSM/ZuBWzNzI3AC2FHadwAnMvPNwK2lnySpJY2CPiLWAe8H/qrsB/Ae4POly27girK9rexTjm8p/SVJLWh6Rv8XwG8DPyz75wEvZubJsj8JrC3ba4HnAMrxqdJfktSC5XN1iIhfBI5l5qGIGDvVPEvXbHBs5uPuBHYCjIyM0Ol0msz3ZUZeBzdedHLujkPQ65z7NT093drYbbHmpcGah2POoAfeDVweEZcBrwV+ku4Z/uqIWF7O2tcBR0v/SWA9MBkRy4FVwPHTHzQzx4FxgNHR0RwbG+upgNvv3ssth5uUMXhHrhlrZdxOp0Ovz9diZc1LgzUPx5xLN5n5O5m5LjM3AFcDD2XmNcDDwJWl23Zgb9neV/Ypxx/KzJed0UuSFkY/76P/KPCRiJiguwZ/R2m/AzivtH8E2NXfFCVJ/ZjXmkdmdoBO2X4GuGSWPt8HrhrA3CRJA+AnYyWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJaly7fxVbUk6i2zYdX9rY9+5dcXQx/CMXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqtycQR8Rr42If42Ir0bEkxHxh6X9goh4NCKejoh7IuKc0v6asj9Rjm8YbgmSpFfS5Iz+B8B7MvPtwDuArRGxCbgZuDUzNwIngB2l/w7gRGa+Gbi19JMktWTOoM+u6bL76vKVwHuAz5f23cAVZXtb2acc3xIRMbAZS5LmpdEafUQsi4ivAMeAB4FvAC9m5snSZRJYW7bXAs8BlONTwHmDnLQkqbnIzOadI1YDXwB+H/jrsjxDRKwHHsjMiyLiSeAXMnOyHPsGcElmfue0x9oJ7AQYGRl51549e3oq4NjxKV74Xk937dtFa1e1Mu709DQrV65sZey2WPPS0FbNh781teBjnnLBqmU917x58+ZDmTk6V795/eGRzHwxIjrAJmB1RCwvZ+3rgKOl2ySwHpiMiOXAKuD4LI81DowDjI6O5tjY2Hym8iO3372XWw638/dTjlwz1sq4nU6HXp+vxcqal4a2ar6u5T88Muyam7zr5o3lTJ6IeB3wXuAp4GHgytJtO7C3bO8r+5TjD+V8fm2QJA1Uk1Ph84HdEbGM7g+GezPzvoj4GrAnIv4EeAy4o/S/A/hMREzQPZO/egjzliQ1NGfQZ+bjwDtnaX8GuGSW9u8DVw1kdpKkvvnJWEmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekys0Z9BGxPiIejoinIuLJiPhwaX9DRDwYEU+X23NLe0TEbRExERGPR8TFwy5CknRmTc7oTwI3ZuZbgU3ADRFxIbAL2J+ZG4H9ZR/gUmBj+doJfGLgs5YkNTZn0Gfm85n5b2X7v4GngLXANmB36bYbuKJsbwPuyq4DwOqIOH/gM5ckNRKZ2bxzxAbgEeBtwLOZuXrGsROZeW5E3AfclJlfKu37gY9m5sHTHmsn3TN+RkZG3rVnz56eCjh2fIoXvtfTXft20dpVrYw7PT3NypUrWxm7Lda8NLRV8+FvTS34mKdcsGpZzzVv3rz5UGaOztVvedMHjIiVwN8Cv5GZ/xURZ+w6S9vLfppk5jgwDjA6OppjY2NNp/Jjbr97L7ccblzGQB25ZqyVcTudDr0+X4uVNS8NbdV83a77F3zMU+7cumLoNTd6101EvJpuyN+dmX9Xml84tSRTbo+V9klg/Yy7rwOODma6kqT5avKumwDuAJ7KzD+fcWgfsL1sbwf2zmi/trz7ZhMwlZnPD3DOkqR5aLLm8W7gl4HDEfGV0va7wE3AvRGxA3gWuKocewC4DJgAXgKuH+iMJUnzMmfQlxdVz7Qgv2WW/gnc0Oe8JEkD4idjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLk5gz4iPh0RxyLiiRltb4iIByPi6XJ7bmmPiLgtIiYi4vGIuHiYk5ckza3JGf2dwNbT2nYB+zNzI7C/7ANcCmwsXzuBTwxmmpKkXs0Z9Jn5CHD8tOZtwO6yvRu4Ykb7Xdl1AFgdEecParKSpPnrdY1+JDOfByi3byrta4HnZvSbLG2SpJYsH/DjxSxtOWvHiJ10l3cYGRmh0+n0NODI6+DGi072dN9+9Trnfk1PT7c2dluseWloq+a2MgQWpuZeg/6FiDg/M58vSzPHSvsksH5Gv3XA0dkeIDPHgXGA0dHRHBsb62kit9+9l1sOD/rnVTNHrhlrZdxOp0Ovz9diZc1LQ1s1X7fr/gUf85Q7t64Yes29Lt3sA7aX7e3A3hnt15Z332wCpk4t8UiS2jHnqXBEfA4YA9ZExCTwMeAm4N6I2AE8C1xVuj8AXAZMAC8B1w9hzpKkeZgz6DPzA2c4tGWWvgnc0O+kJEmD4ydjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqN5Sgj4itEfH1iJiIiF3DGEOS1MzAgz4ilgEfBy4FLgQ+EBEXDnocSVIzwzijvwSYyMxnMvN/gD3AtiGMI0lqYBhBvxZ4bsb+ZGmTJLVg+RAeM2Zpy5d1itgJ7Cy70xHx9R7HWwN8u8f79iVubmNUoMWaW2TNS8OSq3nzzX3V/NNNOg0j6CeB9TP21wFHT++UmePAeL+DRcTBzBzt93EWE2teGqx5aViImoexdPNlYGNEXBAR5wBXA/uGMI4kqYGBn9Fn5smI+DXgi8Ay4NOZ+eSgx5EkNTOMpRsy8wHggWE89iz6Xv5ZhKx5abDmpWHoNUfmy14nlSRVxEsgSFLlFk3Qz3VZhYh4TUTcU44/GhEbFn6Wg9Wg5o9ExNci4vGI2B8Rjd5qdTZrevmMiLgyIjIiFv07NJrUHBG/VP6tn4yIzy70HAetwff2T0XEwxHxWPn+vqyNeQ5KRHw6Io5FxBNnOB4RcVt5Ph6PiIsHOoHMPOu/6L6o+w3gZ4BzgK8CF57W51eBT5btq4F72p73AtS8GfiJsv2hpVBz6fd64BHgADDa9rwX4N95I/AYcG7Zf1Pb816AmseBD5XtC4Ejbc+7z5p/DrgYeOIMxy8D/oHu55A2AY8OcvzFckbf5LIK24DdZfvzwJaImO3DW4vFnDVn5sOZ+VLZPUD3MwuLWdPLZ/wx8KfA9xdyckPSpOZfAT6emScAMvPYAs9x0JrUnMBPlu1VzPJZnMUkMx8Bjr9Cl23AXdl1AFgdEecPavzFEvRNLqvwoz6ZeRKYAs5bkNkNx3wvJbGD7hnBYjZnzRHxTmB9Zt63kBMboib/zm8B3hIR/xwRByJi64LNbjia1PwHwAcjYpLuO/h+fWGm1pqhXjpmKG+vHIIml1VodOmFRaRxPRHxQWAU+Pmhzmj4XrHmiHgVcCtw3UJNaAE0+XdeTnf5Zozub23/FBFvy8wXhzy3YWlS8weAOzPzloj4WeAzpeYfDn96rRhqfi2WM/oml1X4UZ+IWE73171X+lXpbNfoUhIR8V7g94DLM/MHCzS3YZmr5tcDbwM6EXGE7lrmvkX+gmzT7+29mfm/mflN4Ot0g3+xalLzDuBegMz8F+C1dK+DU6tG/997tViCvsllFfYB28v2lcBDWV7lWKTmrLksY3yKbsgv9nVbmKPmzJzKzDWZuSEzN9B9XeLyzDzYznQHosn39t/TfeGdiFhDdynnmQWd5WA1qflZYAtARLyVbtD/54LOcmHtA64t777ZBExl5vODevBFsXSTZ7isQkT8EXAwM/cBd9D99W6C7pn81e3NuH8Na/4zYCXwN+V152cz8/LWJt2nhjVXpWHNXwTeFxFfA/4P+K3M/E57s+5Pw5pvBP4yIn6T7hLGdYv5xC0iPkd36W1Ned3hY8CrATLzk3Rfh7gMmABeAq4f6PiL+LmTJDWwWJZuJEk9MuglqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6Sarc/wOuFeo7bmQhLgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y.hist()" + ] + }, + { + "cell_type": "code", + "execution_count": 211, + "metadata": {}, + "outputs": [], + "source": [ + "Age_Group = pd.get_dummies(df.Age_Group)\n", + "df = pd.concat([df, Age_Group], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": {}, + "outputs": [], + "source": [ + "Sex = pd.get_dummies(df.Sex)\n", + "df = pd.concat([df, Sex], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassAgeSibSpParchFareChildAdultSeniorfemalemale
PassengerId1.000000-0.005007-0.0351440.034212-0.057527-0.0016520.012658-0.0248740.0231950.003516-0.0429390.042939
Survived-0.0050071.000000-0.338481-0.064910-0.0353220.0816290.2573070.122239-0.104791-0.0506610.543351-0.543351
Pclass-0.035144-0.3384811.000000-0.3398980.0830810.018443-0.5495000.125620-0.104503-0.063635-0.1319000.131900
Age0.034212-0.064910-0.3398981.000000-0.233296-0.1724820.096688-0.5951430.4931130.308695-0.0811630.081163
SibSp-0.057527-0.0353220.083081-0.2332961.0000000.4148380.1596510.324068-0.305330-0.0343740.114631-0.114631
Parch-0.0016520.0816290.018443-0.1724820.4148381.0000000.2162250.317670-0.300231-0.0303170.245489-0.245489
Fare0.0126580.257307-0.5495000.0966880.1596510.2162251.000000-0.0075460.009702-0.0086120.182333-0.182333
Child-0.0248740.1222390.125620-0.5951430.3240680.317670-0.0075461.000000-0.961396-0.0362760.107150-0.107150
Adult0.023195-0.104791-0.1045030.493113-0.305330-0.3002310.009702-0.9613961.000000-0.240114-0.0847490.084749
Senior0.003516-0.050661-0.0636350.308695-0.034374-0.030317-0.008612-0.036276-0.2401141.000000-0.0702170.070217
female-0.0429390.543351-0.131900-0.0811630.1146310.2454890.1823330.107150-0.084749-0.0702171.000000-1.000000
male0.042939-0.5433510.1319000.081163-0.114631-0.245489-0.182333-0.1071500.0847490.070217-1.0000001.000000
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Age SibSp Parch \\\n", + "PassengerId 1.000000 -0.005007 -0.035144 0.034212 -0.057527 -0.001652 \n", + "Survived -0.005007 1.000000 -0.338481 -0.064910 -0.035322 0.081629 \n", + "Pclass -0.035144 -0.338481 1.000000 -0.339898 0.083081 0.018443 \n", + "Age 0.034212 -0.064910 -0.339898 1.000000 -0.233296 -0.172482 \n", + "SibSp -0.057527 -0.035322 0.083081 -0.233296 1.000000 0.414838 \n", + "Parch -0.001652 0.081629 0.018443 -0.172482 0.414838 1.000000 \n", + "Fare 0.012658 0.257307 -0.549500 0.096688 0.159651 0.216225 \n", + "Child -0.024874 0.122239 0.125620 -0.595143 0.324068 0.317670 \n", + "Adult 0.023195 -0.104791 -0.104503 0.493113 -0.305330 -0.300231 \n", + "Senior 0.003516 -0.050661 -0.063635 0.308695 -0.034374 -0.030317 \n", + "female -0.042939 0.543351 -0.131900 -0.081163 0.114631 0.245489 \n", + "male 0.042939 -0.543351 0.131900 0.081163 -0.114631 -0.245489 \n", + "\n", + " Fare Child Adult Senior female male \n", + "PassengerId 0.012658 -0.024874 0.023195 0.003516 -0.042939 0.042939 \n", + "Survived 0.257307 0.122239 -0.104791 -0.050661 0.543351 -0.543351 \n", + "Pclass -0.549500 0.125620 -0.104503 -0.063635 -0.131900 0.131900 \n", + "Age 0.096688 -0.595143 0.493113 0.308695 -0.081163 0.081163 \n", + "SibSp 0.159651 0.324068 -0.305330 -0.034374 0.114631 -0.114631 \n", + "Parch 0.216225 0.317670 -0.300231 -0.030317 0.245489 -0.245489 \n", + "Fare 1.000000 -0.007546 0.009702 -0.008612 0.182333 -0.182333 \n", + "Child -0.007546 1.000000 -0.961396 -0.036276 0.107150 -0.107150 \n", + "Adult 0.009702 -0.961396 1.000000 -0.240114 -0.084749 0.084749 \n", + "Senior -0.008612 -0.036276 -0.240114 1.000000 -0.070217 0.070217 \n", + "female 0.182333 0.107150 -0.084749 -0.070217 1.000000 -1.000000 \n", + "male -0.182333 -0.107150 0.084749 0.070217 -1.000000 1.000000 " + ] + }, + "execution_count": 213, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 214, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y= df.Survived\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchChildAdultSeniorfemale
0322.0100100
1138.0100101
2326.0000101
3135.0100101
4335.0000100
5328.0000100
6154.0000100
732.0311000
8327.0020101
9214.0101001
\n", + "
" + ], + "text/plain": [ + " Pclass Age SibSp Parch Child Adult Senior female\n", + "0 3 22.0 1 0 0 1 0 0\n", + "1 1 38.0 1 0 0 1 0 1\n", + "2 3 26.0 0 0 0 1 0 1\n", + "3 1 35.0 1 0 0 1 0 1\n", + "4 3 35.0 0 0 0 1 0 0\n", + "5 3 28.0 0 0 0 1 0 0\n", + "6 1 54.0 0 0 0 1 0 0\n", + "7 3 2.0 3 1 1 0 0 0\n", + "8 3 27.0 0 2 0 1 0 1\n", + "9 2 14.0 1 0 1 0 0 1" + ] + }, + "execution_count": 215, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = df.drop(columns= ['Survived','Name','male','Sex','PassengerId','Age_Group','Ticket','Cabin','Fare','Embarked'])\n", + "X.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: Survived R-squared: 0.398
Model: OLS Adj. R-squared: 0.394
Method: Least Squares F-statistic: 83.49
Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.03e-93
Time: 00:39:32 Log-Likelihood: -395.66
No. Observations: 891 AIC: 807.3
Df Residuals: 883 BIC: 845.7
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Pclass -0.1803 0.017 -10.922 0.000 -0.213 -0.148
Age -0.0039 0.001 -2.842 0.005 -0.007 -0.001
SibSp -0.0463 0.013 -3.557 0.000 -0.072 -0.021
Parch -0.0244 0.018 -1.347 0.178 -0.060 0.011
Child 0.8839 0.065 13.505 0.000 0.755 1.012
Adult 0.7508 0.070 10.728 0.000 0.613 0.888
Senior 0.7306 0.175 4.184 0.000 0.388 1.073
female 0.5150 0.028 18.492 0.000 0.460 0.570
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 37.368 Durbin-Watson: 1.939
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.047
Skew: 0.520 Prob(JB): 1.22e-09
Kurtosis: 3.156 Cond. No. 463.


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Survived R-squared: 0.398\n", + "Model: OLS Adj. R-squared: 0.394\n", + "Method: Least Squares F-statistic: 83.49\n", + "Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.03e-93\n", + "Time: 00:39:32 Log-Likelihood: -395.66\n", + "No. Observations: 891 AIC: 807.3\n", + "Df Residuals: 883 BIC: 845.7\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Pclass -0.1803 0.017 -10.922 0.000 -0.213 -0.148\n", + "Age -0.0039 0.001 -2.842 0.005 -0.007 -0.001\n", + "SibSp -0.0463 0.013 -3.557 0.000 -0.072 -0.021\n", + "Parch -0.0244 0.018 -1.347 0.178 -0.060 0.011\n", + "Child 0.8839 0.065 13.505 0.000 0.755 1.012\n", + "Adult 0.7508 0.070 10.728 0.000 0.613 0.888\n", + "Senior 0.7306 0.175 4.184 0.000 0.388 1.073\n", + "female 0.5150 0.028 18.492 0.000 0.460 0.570\n", + "==============================================================================\n", + "Omnibus: 37.368 Durbin-Watson: 1.939\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.047\n", + "Skew: 0.520 Prob(JB): 1.22e-09\n", + "Kurtosis: 3.156 Cond. No. 463.\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 216, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = sm.OLS(y, X)\n", + "results = model.fit()\n", + "results.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((596, 8), (295, 8))" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.shape, X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 220, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8203389830508474" + ] + }, + "execution_count": 221, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "709 0.029158\n", + "439 0.268393\n", + "840 0.131377\n", + "720 0.990337\n", + "39 0.756755\n", + "290 0.983319\n", + "300 0.614933\n", + "333 0.187566\n", + "208 0.795212\n", + "136 0.961940\n", + "137 0.378748\n", + "696 0.037025\n", + "485 0.451527\n", + "244 0.092063\n", + "344 0.248737\n", + "853 1.131285\n", + "621 0.359091\n", + "653 0.614933\n", + "886 0.284119\n", + "110 0.385754\n", + "294 0.115651\n", + "447 0.436861\n", + "192 0.603996\n", + "682 0.131377\n", + "538 0.099926\n", + "819 0.115937\n", + "30 0.413273\n", + "673 0.268393\n", + "63 0.139525\n", + "396 0.603139\n", + " ... \n", + "456 0.314990\n", + "500 0.276273\n", + "430 0.460449\n", + "445 0.639005\n", + "650 0.099926\n", + "172 0.783413\n", + "450 0.153520\n", + "314 0.150449\n", + "332 0.396686\n", + "801 0.712633\n", + "90 0.095995\n", + "834 0.139239\n", + "181 0.280187\n", + "581 0.861443\n", + "795 0.236943\n", + "69 0.015150\n", + "131 0.131377\n", + "334 0.929137\n", + "597 0.017368\n", + "135 0.299844\n", + "164 0.129449\n", + "28 0.614933\n", + "783 0.004709\n", + "193 0.440805\n", + "869 0.256612\n", + "715 0.135308\n", + "525 0.050784\n", + "381 0.805283\n", + "140 0.566035\n", + "173 0.127445\n", + "Length: 295, dtype: float64" + ] + }, + "execution_count": 222, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_hat = results.predict(X_test)\n", + "y_hat" + ] + }, + { + "cell_type": "code", + "execution_count": 223, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.46716833764938936" + ] + }, + "execution_count": 223, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r2_score(y_test, y_hat)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/__pycache__/assessment.cpython-36.pyc b/__pycache__/assessment.cpython-36.pyc new file mode 100644 index 0000000..2ea9230 Binary files /dev/null and b/__pycache__/assessment.cpython-36.pyc differ diff --git a/__pycache__/testing.cpython-36-PYTEST.pyc b/__pycache__/testing.cpython-36-PYTEST.pyc new file mode 100644 index 0000000..8e0cd6c Binary files /dev/null and b/__pycache__/testing.cpython-36-PYTEST.pyc differ diff --git a/alice.txt b/alice.txt new file mode 100644 index 0000000..84bf3cc --- /dev/null +++ b/alice.txt @@ -0,0 +1,17 @@ +Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' +So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. +There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. +In another moment down went Alice after it, never once considering how in the world she was to get out again. +The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. +Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. +'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.) +Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) +Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.' +Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. +Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof. +There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again. +Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted! +Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. +There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here before,' said Alice,) and round the neck of the bottle was a paper label, with the words 'DRINK ME' beautifully printed on it in large letters. +It was all very well to say 'Drink me,' but the wise little Alice was not going to do THAT in a hurry. 'No, I'll look first,' she said, 'and see whether it's marked "poison" or not'; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they WOULD not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger VERY deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked 'poison,' it is almost certain to disagree with you, sooner or later. +However, this bottle was NOT marked 'poison,' so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off. diff --git a/assessment.py b/assessment.py index 281675d..ffb054d 100644 --- a/assessment.py +++ b/assessment.py @@ -1,142 +1,178 @@ import numpy as np import pandas as pd - -# PYTHON SECTION +##PYTHON SECTION def count_characters(string): - ''' - INPUT: STRING - OUTPUT: DICT (with counts of each character in input string) +# ''' +# INPUT: STRING +# OUTPUT: DICT (with counts of each character in input string) - Return a dictionary which contains - a count of the number of times each character appears in the string. - Characters which with a count of 0 should not be included in the - output dictionary. - ''' - pass +# Return a dictionary which contains +# a count of the number of times each character appears in the string. +# Characters which with a count of 0 should not be included in the +# output dictionary. +# ''' + s = string.lower() #so you don't have to worry about upper and lower cases + letters = set(set(s) | set(s)) # get the unique set of input + return({letters:s.count(letters) for letters in letters}) def invert_dictionary(d): - ''' - INPUT: DICT - OUTPUT: DICT (of sets of input keys indexing the same input values - indexed by the input values) - - Given a dictionary d, return a new dictionary with d's values - as keys and the value for a given key being - the set of d's keys which shared the same value. - e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}} - ''' - pass + # ''' + # INPUT: DICT + # OUTPUT: DICT (of sets of input keys indexing the same input values + # indexed by the input values) + + # Given a dictionary d, return a new dictionary with d's values + # as keys and the value for a given key being + # the set of d's keys which shared the same value. + # e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}} + # ''' + d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} + inv = {} + for k,v in d.items(): + if v not in inv: + inv[v] = set(k) + else: + inv[v].update(k) + return(inv) def word_count(filename): - ''' - INPUT: STRING - OUTPUT: INT, INT, INT (a tuple with line, word, - and character count of named INPUT file) - - The INPUT filename is the name of a text file. - The OUTPUT is a tuple containting (in order) - the following stats for the text file: - 1. number of lines - 2. number of words (broken by whitespace) - 3. number of characters - ''' - pass + ''' + INPUT: STRING + OUTPUT: INT, INT, INT (a tuple with line, word, + and character count of named INPUT file) + + The INPUT filename is the name of a text file. + The OUTPUT is a tuple containting (in order) + the following stats for the text file: + 1. number of lines + 2. number of words (broken by whitespace) + 3. number of characters + ''' + num_lines = 0 + num_words = 0 + num_chars = 0 + + with open('alice.txt') as f: + + for line in f: + words = line.split() + + num_lines += 1 + num_words += len(words) + num_chars += len(line) + return(num_lines,num_words, num_chars) def matrix_multiplication(A, B): - ''' - INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS, - LIST (of length n) OF LIST (of length n) OF INTEGERS - OUTPUT: LIST OF LIST OF INTEGERS - (storing the product of a matrix multiplication operation) + ''' + INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS, + LIST (of length n) OF LIST (of length n) OF INTEGERS + OUTPUT: LIST OF LIST OF INTEGERS + (storing the product of a matrix multiplication operation) - Return the matrix which is the product of matrix A and matrix B - where A and B will be (a) integer valued (b) square matrices - (c) of size n-by-n (d) encoded as lists of lists. + Return the matrix which is the product of matrix A and matrix B + where A and B will be (a) integer valued (b) square matrices + (c) of size n-by-n (d) encoded as lists of lists. - For example: - A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] corresponds to the matrix + For example: + A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] corresponds to the matrix - | 2 3 4 | - | 6 4 2 | - |-1 2 0 | + | 2 3 4 | + | 6 4 2 | + |-1 2 0 | - Please do not use numpy. Write your solution in straight python. - ''' - pass + Please do not use numpy. Write your solution in straight python. + ''' + c = [] + for i in range(0,len(A)): + temp=[] + for j in range(0,len(B[0])): + s = 0 + for k in range(0,len(A[0])): + s += A[i][k]*B[k][j] + temp.append(s) + c.append(temp) + return c -# NumPy SECTION -def array_work(rows, cols, scalar, matrixA): - ''' - INPUT: INT, INT, INT, NUMPY ARRAY - OUTPUT: NUMPY ARRAY - (of matrix product of r-by-c matrix of "scalar"'s time matrixA) + # NumPy SECTION - Create matrix of size (rows, cols) with elements initialized to the scalar - value. Right multiply that matrix with the passed matrixA (i.e. AB, not - BA). Return the result of the multiplication. You needn't check for - matrix compatibililty, but you accomplish this in a single line. - E.g., array_work(2, 3, 5, [[3, 4], [5, 6], [7, 8]]) - [[3, 4], [[5, 5, 5], - [5, 6], * [5, 5, 5]] - [7, 8]] - ''' - pass +def array_work(rows, cols, scalar, matrixA): + ''' + INPUT: INT, INT, INT, NUMPY ARRAY + OUTPUT: NUMPY ARRAY + (of matrix product of r-by-c matrix of "scalar"'s time matrixA) + + Create matrix of size (rows, cols) with elements initialized to the scalar + value. Right multiply that matrix with the passed matrixA (i.e. AB, not + BA). Return the result of the multiplication. You needn't check for + matrix compatibililty, but you accomplish this in a single line. + + E.g., array_work(2, 3, 5, [[3, 4], [5, 6], [7, 8]]) + [[3, 4], [[5, 5, 5], + [5, 6], * [5, 5, 5]] + [7, 8]] + ''' + matrixB = np.full((rows, cols), scalar) + result1 = np.matmul(matrixA, matrixB ) + return result1 def boolean_indexing(arr, minimum): - ''' - INPUT: NUMPY ARRAY, INT - OUTPUT: NUMPY ARRAY - (of just elements in "arr" greater or equal to "minimum") + ''' + INPUT: NUMPY ARRAY, INT + OUTPUT: NUMPY ARRAY + (of just elements in "arr" greater or equal to "minimum") - Return an array of only the elements of "arr" that are greater than or - equal to "minimum" + Return an array of only the elements of "arr" that are greater than or + equal to "minimum" - Ex: - In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7) - Out[1]: array([7, 8]) - ''' - pass + Ex: + In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7) + Out[1]: array([7, 8]) + ''' + return arr[arr >= minimum] -# Pandas SECTION + # Pandas SECTION def make_series(start, length, index): - ''' - INPUTS: INT, INT, LIST (of length "length") - OUTPUT: PANDAS SERIES (of "length" sequential integers - beginning with "start" and with index "index") - - Create a pandas Series of length "length" with index "index" - and with elements that are sequential integers starting from "start". - You may assume the length of index will be "length". - - E.g., - In [1]: make_series(5, 3, ['a', 'b', 'c']) - Out[1]: - a 5 - b 6 - c 7 - dtype: int64 - ''' - pass + ''' + INPUTS: INT, INT, LIST (of length "length") + OUTPUT: PANDAS SERIES (of "length" sequential integers + beginning with "start" and with index "index") + + Create a pandas Series of length "length" with index "index" + and with elements that are sequential integers starting from "start". + You may assume the length of index will be "length". + + E.g., + In [1]: make_series(5, 3, ['a', 'b', 'c']) + Out[1]: + a 5 + b 6 + c 7 + dtype: int64 + ''' + return( pd.Series(range(start, start+length), index=list(index))) def data_frame_work(df, colA, colB, colC): - ''' - INPUT: DATAFRAME, STR, STR, STR - OUTPUT: None - - Insert a column (colC) into the dataframe that is the sum of colA and colB. - Assume that df contains columns colA and colB and that these are numeric. - ''' - pass + ''' + INPUT: DATAFRAME, STR, STR, STR + OUTPUT: None + + Insert a column (colC) into the dataframe that is the sum of colA and colB. + Assume that df contains columns colA and colB and that these are numeric. + ''' + ##insert a new column + df[colC] = df[colA] +df[colB] + + return(df) diff --git a/testing.py b/testing.py index c68b010..ebda921 100644 --- a/testing.py +++ b/testing.py @@ -1,77 +1,87 @@ -def test_count_characters(self): - string = "abafdcggfaabe" - answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} - result = a.count_characters(string) - self.assertEqual(result, answer) +import assessment as a +import numpy as np +import pandas as pd +import pytest -def test_invert_dictionary(self): +def test_count_characters(): + + string = "abafdcggfaabe" + answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} + result = a.count_characters(string) + print(result) + assert result == answer + + +def test_invert_dictionary(): d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}} - self.assertEqual(a.invert_dictionary(d), result) - - -def test_word_count(self): - self.assertEqual(a.word_count('data/alice.txt'), (17, 1615, 8449)) - - -def test_matrix_multiplication(self): - A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] - B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]] - answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]] - self.assertEqual(a.matrix_multiplication(A, B), answer) - - -def test_array_work(self): - matrixA = np.array([[-4, -2], - [0, -3], - [-4, -1], - [-1, 1], - [-3, 0]]) - answer1 = np.array([[-24, -24, -24], - [-12, -12, -12], - [-20, -20, -20], - [0, 0, 0], - [-12, -12, -12]]) - result1 = a.array_work(2, 3, 4, matrixA) - self.assertTrue(np.all(answer1 == result1)) - - answer2 = np.array([[-36, -36], - [-18, -18], - [-30, -30], - [0, 0], - [-18, -18]]) - result2 = a.array_work(2, 2, 6, matrixA) - self.assertTrue(np.all(answer2 == result2)) - - -def test_make_series(self): - result = a.make_series(7, 4, ['a', 'b', 'c', 'd']) - self.assertTrue(isinstance(result, pd.Series)) - self.assertEqual(result['a'], 7) - self.assertEqual(result['d'], 10) - - result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi']) - self.assertEqual(result['a'], 22) - self.assertEqual(result['d'], 25) - self.assertEqual(result['hi'], 26) - - -def test_data_frame_work(self): - df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) - colA, colB, colC = ('a', 'b', 'c') - a.data_frame_work(df, colA, colB, colC) - self.assertTrue(colC in df.columns.tolist()) - self.assertEqual(df[colC].tolist(), [5, 7, 9]) - - -def test_boolean_indexing(self): - arr = np.array([[-4, -4, -3], - [-1, 16, -4], - [-3, 6, 4]]) - result1 = a.boolean_indexing(arr, 0) - answer1 = np.array([16, 6, 4]) - self.assertTrue(np.all(result1 == answer1)) - result2 = a.boolean_indexing(arr, 10) - answer2 = np.array([16]) - self.assertTrue(np.all(result2 == answer2)) + assert a.invert_dictionary(d) == result + + +def test_word_count(): + assert a.word_count('data/alice.txt') == (17, 1615, 8461) + + +def test_matrix_multiplication(): + A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] + B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]] + answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]] + assert a.matrix_multiplication(A, B) == answer + + +def test_array_work(): + matrixA = np.array([[-4, -2], + [0, -3], + [-4, -1], + [-1, 1], + [-3, 0]]) + answer1 = np.array([[-24, -24, -24], + [-12, -12, -12], + [-20, -20, -20], + [0, 0, 0], + [-12, -12, -12]]) + result1 = a.array_work(2, 3, 4, matrixA) + assert np.all(answer1 == result1) + + answer2 = np.array([[-36, -36], + [-18, -18], + [-30, -30], + [0, 0], + [-18, -18]]) + result2 = a.array_work(2, 2, 6, matrixA) + assert np.all(answer2 == result2) + +def test_boolean_indexing(): + arr = np.array([[-4, -4, -3], + [-1, 16, -4], + [-3, 6, 4]]) + result1 = a.boolean_indexing(arr, 0) + answer1 = np.array([16, 6, 4]) + assert np.all(result1 == answer1) + result2 = a.boolean_indexing(arr, 10) + answer2 = np.array([16]) + assert np.all(result2 == answer2) + + +def test_make_series(): + result = a.make_series(7, 4, ['a', 'b', 'c', 'd']) + assert isinstance(result, pd.Series) + assert result['a'] == 7 + assert result['d'] == 10 + + result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi']) + assert result['a'] == 22 + assert result['d'] == 25 + assert result['hi'] == 26 + + +def test_data_frame_work(): + df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + colA, colB, colC = ('a', 'b', 'c') + a.data_frame_work(df, colA, colB, colC) + assert colC in df.columns.tolist() + assert df[colC].tolist() == [5, 7, 9] + + +