diff --git a/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb b/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb
new file mode 100644
index 0000000..533e7db
--- /dev/null
+++ b/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb
@@ -0,0 +1,821 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 339,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import statsmodels.api as sm\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import r2_score\n",
+ "from pandas.plotting import scatter_matrix\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 340,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv('train.csv')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 714 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 342,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 0\n",
+ "1 1\n",
+ "2 1\n",
+ "3 1\n",
+ "4 0\n",
+ "Name: Survived, dtype: int64"
+ ]
+ },
+ "execution_count": 342,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y = df.Survived\n",
+ "y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 343,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 343,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df.Age.isna()]\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 344,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Age_mean = df.Age.mean()\n",
+ "df['Age'] = df.Age.fillna(Age_mean)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 345,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 891 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 361,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 6 columns):\n",
+ "Pclass 891 non-null int64\n",
+ "Sex 891 non-null object\n",
+ "Age 891 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Fare 891 non-null float64\n",
+ "dtypes: float64(2), int64(3), object(1)\n",
+ "memory usage: 41.8+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "x = df.drop(columns=['Survived', 'Cabin', 'PassengerId', 'Name','Embarked', 'Ticket'])\n",
+ "x.head()\n",
+ "x.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 362,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ " Sex_female | \n",
+ " Sex_male | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 7.2500 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 71.2833 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7.9250 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 53.1000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 8.0500 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Pclass Age SibSp Parch Fare Sex_female Sex_male\n",
+ "0 3 22.0 1 0 7.2500 0 1\n",
+ "1 1 38.0 1 0 71.2833 1 0\n",
+ "2 3 26.0 0 0 7.9250 1 0\n",
+ "3 1 35.0 1 0 53.1000 1 0\n",
+ "4 3 35.0 0 0 8.0500 0 1"
+ ]
+ },
+ "execution_count": 362,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x = pd.get_dummies(x)\n",
+ "x.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 363,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = x.drop(columns=['Sex_male'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 364,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((596, 6), (295, 6))"
+ ]
+ },
+ "execution_count": 364,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)\n",
+ "x_train.shape, x_test.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 365,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ " Sex_female | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Pclass | \n",
+ " 1.000000 | \n",
+ " -0.331339 | \n",
+ " 0.083081 | \n",
+ " 0.018443 | \n",
+ " -0.549500 | \n",
+ " -0.131900 | \n",
+ "
\n",
+ " \n",
+ " Age | \n",
+ " -0.331339 | \n",
+ " 1.000000 | \n",
+ " -0.232625 | \n",
+ " -0.179191 | \n",
+ " 0.091566 | \n",
+ " -0.084153 | \n",
+ "
\n",
+ " \n",
+ " SibSp | \n",
+ " 0.083081 | \n",
+ " -0.232625 | \n",
+ " 1.000000 | \n",
+ " 0.414838 | \n",
+ " 0.159651 | \n",
+ " 0.114631 | \n",
+ "
\n",
+ " \n",
+ " Parch | \n",
+ " 0.018443 | \n",
+ " -0.179191 | \n",
+ " 0.414838 | \n",
+ " 1.000000 | \n",
+ " 0.216225 | \n",
+ " 0.245489 | \n",
+ "
\n",
+ " \n",
+ " Fare | \n",
+ " -0.549500 | \n",
+ " 0.091566 | \n",
+ " 0.159651 | \n",
+ " 0.216225 | \n",
+ " 1.000000 | \n",
+ " 0.182333 | \n",
+ "
\n",
+ " \n",
+ " Sex_female | \n",
+ " -0.131900 | \n",
+ " -0.084153 | \n",
+ " 0.114631 | \n",
+ " 0.245489 | \n",
+ " 0.182333 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Pclass Age SibSp Parch Fare Sex_female\n",
+ "Pclass 1.000000 -0.331339 0.083081 0.018443 -0.549500 -0.131900\n",
+ "Age -0.331339 1.000000 -0.232625 -0.179191 0.091566 -0.084153\n",
+ "SibSp 0.083081 -0.232625 1.000000 0.414838 0.159651 0.114631\n",
+ "Parch 0.018443 -0.179191 0.414838 1.000000 0.216225 0.245489\n",
+ "Fare -0.549500 0.091566 0.159651 0.216225 1.000000 0.182333\n",
+ "Sex_female -0.131900 -0.084153 0.114631 0.245489 0.182333 1.000000"
+ ]
+ },
+ "execution_count": 365,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 366,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+ " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+ " verbose=0, warm_start=False)"
+ ]
+ },
+ "execution_count": 366,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = LogisticRegression()\n",
+ "model.fit(x_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 367,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n",
+ " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+ " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
+ " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n",
+ " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
+ " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
+ " 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
+ " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n",
+ " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
+ " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)"
+ ]
+ },
+ "execution_count": 367,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(x_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 368,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8101694915254237"
+ ]
+ },
+ "execution_count": 368,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(x_test, y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Titanic-answer.ipynb b/Titanic-answer.ipynb
new file mode 100644
index 0000000..533e7db
--- /dev/null
+++ b/Titanic-answer.ipynb
@@ -0,0 +1,821 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 339,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import statsmodels.api as sm\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import r2_score\n",
+ "from pandas.plotting import scatter_matrix\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 340,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv('train.csv')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 714 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 342,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 0\n",
+ "1 1\n",
+ "2 1\n",
+ "3 1\n",
+ "4 0\n",
+ "Name: Survived, dtype: int64"
+ ]
+ },
+ "execution_count": 342,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y = df.Survived\n",
+ "y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 343,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 343,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df.Age.isna()]\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 344,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Age_mean = df.Age.mean()\n",
+ "df['Age'] = df.Age.fillna(Age_mean)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 345,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 891 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 361,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 6 columns):\n",
+ "Pclass 891 non-null int64\n",
+ "Sex 891 non-null object\n",
+ "Age 891 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Fare 891 non-null float64\n",
+ "dtypes: float64(2), int64(3), object(1)\n",
+ "memory usage: 41.8+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "x = df.drop(columns=['Survived', 'Cabin', 'PassengerId', 'Name','Embarked', 'Ticket'])\n",
+ "x.head()\n",
+ "x.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 362,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ " Sex_female | \n",
+ " Sex_male | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 7.2500 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 71.2833 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7.9250 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 53.1000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 8.0500 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Pclass Age SibSp Parch Fare Sex_female Sex_male\n",
+ "0 3 22.0 1 0 7.2500 0 1\n",
+ "1 1 38.0 1 0 71.2833 1 0\n",
+ "2 3 26.0 0 0 7.9250 1 0\n",
+ "3 1 35.0 1 0 53.1000 1 0\n",
+ "4 3 35.0 0 0 8.0500 0 1"
+ ]
+ },
+ "execution_count": 362,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x = pd.get_dummies(x)\n",
+ "x.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 363,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = x.drop(columns=['Sex_male'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 364,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((596, 6), (295, 6))"
+ ]
+ },
+ "execution_count": 364,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)\n",
+ "x_train.shape, x_test.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 365,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ " Sex_female | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Pclass | \n",
+ " 1.000000 | \n",
+ " -0.331339 | \n",
+ " 0.083081 | \n",
+ " 0.018443 | \n",
+ " -0.549500 | \n",
+ " -0.131900 | \n",
+ "
\n",
+ " \n",
+ " Age | \n",
+ " -0.331339 | \n",
+ " 1.000000 | \n",
+ " -0.232625 | \n",
+ " -0.179191 | \n",
+ " 0.091566 | \n",
+ " -0.084153 | \n",
+ "
\n",
+ " \n",
+ " SibSp | \n",
+ " 0.083081 | \n",
+ " -0.232625 | \n",
+ " 1.000000 | \n",
+ " 0.414838 | \n",
+ " 0.159651 | \n",
+ " 0.114631 | \n",
+ "
\n",
+ " \n",
+ " Parch | \n",
+ " 0.018443 | \n",
+ " -0.179191 | \n",
+ " 0.414838 | \n",
+ " 1.000000 | \n",
+ " 0.216225 | \n",
+ " 0.245489 | \n",
+ "
\n",
+ " \n",
+ " Fare | \n",
+ " -0.549500 | \n",
+ " 0.091566 | \n",
+ " 0.159651 | \n",
+ " 0.216225 | \n",
+ " 1.000000 | \n",
+ " 0.182333 | \n",
+ "
\n",
+ " \n",
+ " Sex_female | \n",
+ " -0.131900 | \n",
+ " -0.084153 | \n",
+ " 0.114631 | \n",
+ " 0.245489 | \n",
+ " 0.182333 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Pclass Age SibSp Parch Fare Sex_female\n",
+ "Pclass 1.000000 -0.331339 0.083081 0.018443 -0.549500 -0.131900\n",
+ "Age -0.331339 1.000000 -0.232625 -0.179191 0.091566 -0.084153\n",
+ "SibSp 0.083081 -0.232625 1.000000 0.414838 0.159651 0.114631\n",
+ "Parch 0.018443 -0.179191 0.414838 1.000000 0.216225 0.245489\n",
+ "Fare -0.549500 0.091566 0.159651 0.216225 1.000000 0.182333\n",
+ "Sex_female -0.131900 -0.084153 0.114631 0.245489 0.182333 1.000000"
+ ]
+ },
+ "execution_count": 365,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 366,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+ " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+ " verbose=0, warm_start=False)"
+ ]
+ },
+ "execution_count": 366,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = LogisticRegression()\n",
+ "model.fit(x_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 367,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n",
+ " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+ " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
+ " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n",
+ " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
+ " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
+ " 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
+ " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n",
+ " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
+ " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+ " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)"
+ ]
+ },
+ "execution_count": 367,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(x_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 368,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8101694915254237"
+ ]
+ },
+ "execution_count": 368,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(x_test, y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}