{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Chapter 7: Problems with the predictors\n", "\n", "Load the packages:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'%.7f'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import scipy as sp\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "%precision 7\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Errors in the predictors\n", "\n", "Load in the data:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
speeddist
042
1410
274
3722
4816
\n", "
" ], "text/plain": [ " speed dist\n", "0 4 2\n", "1 4 10\n", "2 7 4\n", "3 7 22\n", "4 8 16" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cars = pd.read_csv(\"data/cars.csv\")\n", "cars.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set up the plot and the random seed" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGUJJREFUeJzt3X+UZGV95/H3JwNqi3oaMq0LPTMZzLKTsI5xsEXcSVyiJKOGI51JjBIxE0N2kj2QSHbDCupZzFlzmAQTTUzCZoKYMXFBojiQo3uQ8EPIbkB6GJbhhyMc5Mf8WKY5ZILILDDjd/+o29L03K6qrupbz33qfl7nzKmqp25VfefpOv3t+31+XEUEZmZmc/1Q6gDMzKyenCDMzKyUE4SZmZVygjAzs1JOEGZmVsoJwszMSjlBmJlZKScIMzMr5QRhZmaljkgdQD+WLl0aK1euTB2GmVlWtm3b9kREjHU6LusEsXLlSqamplKHYWaWFUmPdHOcS0xmZlbKCcLMzEo5QZiZWSknCDMzK+UEYWZmpSpLEJIul7RP0j2z2i6R9C1Jd0v6iqTRWc9dKOlBSTslrasqLjOz1LZu383aTTdy/AVfZe2mG9m6fXfqkEpVeQbx18A75rRdD7wuIl4PfBu4EEDSicD7gH9bvOYvJC2pMDYzsyS2bt/NhVfvYPf+AwSwe/8BLrx6Ry2TRGUJIiJuAZ6c0/b1iDhYPLwNWFbcPwO4MiKejYjvAA8CJ1cVm5lZKpdct5MDzx96UduB5w9xyXU7E0U0v5RjEL8G/M/i/jjw2KzndhVth5G0UdKUpKnp6emKQzQzW1x79h9YUHtKSRKEpI8CB4EvzDSVHBZlr42IzRExERETY2MdV4qbmdXKcaMjC2pPaeAJQtIG4HTg/RExkwR2ActnHbYM2DPo2MzMqnb+ulWMHPniIdaRI5dw/rpViSKa30AThKR3AB8G3h0Rz8x66lrgfZJeKul44ATgm4OMzcxsECbXjHPx+tWMj44gYHx0hIvXr2ZyTWlVPanKNuuTdAVwKrBU0i7gIlqzll4KXC8J4LaI+M2IuFfSVcB9tEpP50TEofJ3NjPL2+Sa8VomhLn0QpUnPxMTE+HdXM3MFkbStoiY6HScV1KbmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKOUGYmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKOUGYmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKVZYgJF0uaZ+ke2a1HSPpekkPFLdHF+2S9KeSHpR0t6STqorLzMy6U+UZxF8D75jTdgFwQ0ScANxQPAZ4J3BC8W8jcGmFcZmZWRcqSxARcQvw5JzmM4Atxf0twOSs9s9Hy23AqKRjq4rNzMw6G/QYxGsiYi9Acfvqon0ceGzWcbuKNjMzS6Qug9QqaYvSA6WNkqYkTU1PT1cclplZcw06QTw+UzoqbvcV7buA5bOOWwbsKXuDiNgcERMRMTE2NlZpsGZmTTboBHEtsKG4vwG4Zlb7rxSzmU4B/mWmFGVmZmkcUdUbS7oCOBVYKmkXcBGwCbhK0tnAo8B7isO/BrwLeBB4BvhgVXGZmVl3KksQEXHmPE+9veTYAM6pKhYzM1u4ugxSm5lZzThBmJlZKScIMzMrVdkYhJmZldu6fTeXXLeTPfsPcNzoCOevW8XkmvqtDXaCMDMboK3bd3Ph1Ts48PwhAHbvP8CFV+8AqF2ScInJzGyALrlu5w+Sw4wDzx/ikut2Jopofk4QZmYDtHv/gQW1p+QEYWY2QEtUtvXc/O0pOUGYmQ3QoSjdh3Te9pScIMzMBmh8dGRB7Sk5QZiZDdD561YxcuSSF7WNHLmE89etShTR/DzN1cxsgGamsnodhJmZHWZyzXgtE8JcLjGZmVkpJwgzMyvlBGFmZqWcIMzMrJQHqc3MMjLInWCdIMzMMjHonWCdIMysVC7XLGiSdjvBOkGY2UDkdM2CJtkzz46v87X3y4PUZnaYnK5Z0CTHzbNf03zt/XKCMLPDDPovVevOoPdxcoIws8MM+i9V687kmnEuXr+a8dERRGsH2IvXr/YsJjMbnPPXrXrRGATUd8fRphnkPk5JEoSk3wF+HQhgB/BB4FjgSuAY4E7gAxHxXIr4zNppwuyenHYcteooBnwVI0njwD8CJ0bEAUlXAV8D3gVcHRFXSvrvwP+JiEvbvdfExERMTU1VH7RZYe7sHmj9ZV3lab7ZYpO0LSImOh2XagziCGBE0hHAy4G9wNuALxXPbwEmE8VmNi/P7rEmGXiCiIjdwCeBR2klhn8BtgH7I+JgcdguoPTPMUkbJU1Jmpqenh5EyGY/4Nk91iQDTxCSjgbOAI4HjgOOAt5Zcmhp7SsiNkfERERMjI2NVReoWQnP7rEmSVFiOg34TkRMR8TzwNXAvwNGi5ITwDJgT4LYzNrK6XrCZv1KkSAeBU6R9HJJAt4O3AfcBPxiccwG4JoEsZm1Neh56GYpDXwWE4Ck3wPeCxwEttOa8jrOC9NctwNnRcSz7d7Hs5jMqtOE6bxN1e0spiTrICLiIuCiOc0PAScnCMfM5vBmfQbeasPMSng6r4EThJmV8HReAycIMyvh6bwGThBmVsLTeQ28m6uZlfBmfQZOEGY2j0FuK2311HWJSdKPSDqtuD8i6ZXVhWVmZql1lSAk/QdaO63+ZdG0DNhaVVBmZpZetyWmc2gtYrsdICIekPTqyqIyMxtiuaxS7zZBPBsRz7W2ToJiU73B79FhZpa5nFapdzsG8Q1JH6F1kZ+fAf4O+PvqwjIzG045rVLv9gziAuBsWteP/g1alwi9rKqgzCxvuZRQUshplXq3CWIEuDwi/gpA0pKi7ZmqAjOzPOVUQknhuNERdpckgzquUu+2xHQDrYQwYwT4h8UPx8xyl1MJJYWcVql3ewbxsoh4euZBRDwt6eUVxWRmGcuphJJCTqvUu00Q35N0UkTcCSDpjYB/2mZ2mJxKKJBmvCSXVerdlpjOA/5O0q2SbgW+CJxbXVhmlqucSigz4yW79x8geGG8ZOv23alDq4WuziAi4g5JPwasAgR8KyKerzQyM8tSTiWUduMldYx30BayWd+bgJXFa9ZIIiI+X0lUZpa1Xksogy73eLykva4ShKS/AX4UuAuYSbcBOEGY2aJIMT02t/GSQev2DGICODEivL2GmVUiRbnn/HWrXpSUoL7jJSl0myDuAf4VsLfCWMyswVKUe3IaL0mh2wSxFLhP0jeBZ2caI+LdlURlZo2TqtyTy5TTFLpNEB+vMggzM5d76qfbaa7fWMwPlTRKa7O/19Ea7P41YCet9RUrgYeBX4qIf17MzzWz+nK5p37UzbizpFOAzwA/DrwEWAJ8LyJe1dOHSluAWyPiMkkvAV4OfAR4MiI2SboAODoiPtzufSYmJmJqaqqXEMx65p1KLXeStkXERKfjul1J/WfAmcADtDbq+/WirZfAXgW8FfgsQEQ8FxH7gTOALcVhW4DJXt7frEpeeWtN0m2CICIeBJZExKGI+Bxwao+f+VpgGvicpO2SLpN0FPCaiNhbfNZewJc0tdrxTqXWJN0miGeKUtBdkv5Q0u8AR/X4mUcAJwGXRsQa4Hu0LkjUFUkbJU1Jmpqenu4xBLPeeOWtNUm3CeIDxbHn0vqFvhxY3+Nn7gJ2RcTtxeMv0UoYj0s6FqC43Vf24ojYHBETETExNjbWYwhmvZlvyqVX3tow6jZBTEbE/4uIpyLi9yLiPwGn9/KBEfF/gcckzcxdeztwH3AtsKFo2wBc08v7m1Upp51KzfrV7TqIDcCfzGn71ZK2bv0W8IWibPUQ8EFayeoqSWcDjwLv6fG9zSrTpKmYKWZreYZYvbSd5irpTOCXgZ8Ebp311KuAgxFxWrXhtedprmbVmLtxHrTOlC5ev7qyX9gpPrOpup3m2ukM4n/T2n9pKfBHs9q/C9zde3hmVmcpNs7ztRnqp22CiIhHgEcknQYciIjvS/o3wI8BOwYRoJkNXorZWp4hVj/djkHcAvyUpKOBG4Ap4L3A+6sKzGwY5VJjT7FxXo7XZsjl59mrbmcxKSKeoTW19TMR8fPAidWFZTZ8clqFnWK2Vm4zxHL6efaq6wQh6S20zhi+WrQt5HKlZo2X0yrsyTXjXLx+NeOjIwgYHx2pfLA4xWf2I6efZ6+6/SV/HnAh8JWIuFfSa4GbqgvLbPjkVmP3dRLay+3n2YuFbPf9jVmPHwJ+u6qgzIZRjjX2QUpxTep+NOHn2bbEJOnTxe3fS7p27r/BhGg2HHKrsQ9abiWbJvw8O51B/E1x+8mqAzEbdk1ahd2L3Eo2Tfh5dloHsa24/YakseK+t1A161GKun4uUzFzLNkM+zhNpxKTJH1c0hPAt4BvS5qW9F8HE56Z9SOnqZhNKNnkptM01/OAtcCbIuKHI+Jo4M3A2uKaEGZWYznV9XOb5toEnRLErwBnRsR3ZhqKGUxnFc+ZWY3lVte3eumUII6MiCfmNhbjEEdWE5KZLZacLnCUUzmsKToliOd6fM7MaiCnun5O5bCm6DTN9SckPVXSLuBlFcRjNjC5zO7pR6qpmL30rcth9dNpmuuSds+b5Sq3Vbv9GPRUzF77NsdprsOu2836zIaKyxnV6bVvcyqHNYV3ZLVGcjmjOr32bRNWJufGCcIayeWM6vTTt8O+Mjk3LjFZI7mcUR337fDwGYQ1kssZ1XHfDg9FROoYejYxMRFTU1OpwzAbSk2YBtxUkrZFxESn43wGYWaHadI0YJufxyDM7DCeBmyQ8AxC0hJgCtgdEadLOh64EjgGuBP4QER4Ow8z+iv3eFWz9SrlGcSHgPtnPf4D4FMRcQLwz8DZSaIyq5l+NrHr9bU5bfJn1UmSICQtA34OuKx4LOBtwJeKQ7YAkyliM6ubfso9XtVs/UhVYvo08F+AVxaPfxjYHxEHi8e7gNJzYEkbgY0AK1asqDhMs/T6Kfd4VbP1Y+AJQtLpwL6I2Cbp1JnmkkNL599GxGZgM7SmuVYSpFmN9LMy2auarR8pSkxrgXdLepjWoPTbaJ1RjEqaSVjLgD0JYjOrnX7KPS4VWT8GniAi4sKIWBYRK4H3ATdGxPuBm4BfLA7bAFwz6NjM6qifazX7Os/Wj6QrqYsS0+8W01xfywvTXLcDZ0XEs+1e75XUloJXGHfmPqq3LFZSR8TNwM3F/YeAk1PGY9aJVxh35j4aHl5JbbYAXmHcmftoeHgvJls0uZUVPrZ1B1fc/hiHIlgiceabl/OJydVtX+MVxp25j4aHzyBsUfSz2jeFj23dwd/e9iiHijG4QxH87W2P8rGtO9q+ziuMO3MfDQ8nCFsUuZUVrrj9sQW1z/C00c7cR8PDJSZbFLmVFQ7NM3tvvvYZXmHcmftoeDhB2KLI7RrPS6TSZLBEZYv6XyzFCuNB7+bar177KLdxrGHnEpMtitzKCme+efmC2lNKsZtrCjnF2hROELYoclux+4nJ1Zx1yoofnDEskTjrlBUdZzGlkGI31xRyirUpXGKyRZPb5m6fmFxdy4QwV4rdXFPIKdam8BmEWc31M200pymnOcXaFE4QZjXXlN1cc4q1KVxislrodfZKE2a99DNtNKcppznF2hRJd3Ptl3dzHQ5zN3eD1l+OnQa5e32dWdN1u5urS0yWXK+zVzzrxaxaThCWXK+zVzzrxaxaHoOwRdPreECvq7D7Xb2dYtyjCWMmNjx8BmGLop9VsL3OXuln1kuv8TZlVbMZOEHYIulnPKDXVdj9rN5OMe7hMRPLjUtMtij6HQ8Y9CrsFOMeHjOx3PgMwhZFilWw/ZRseo23KauazcAJwhZJilWw/ZRsUox7eKWw5cYlpprLZdZLilWw/ZRseo23KauazcArqWvNK4XbW7vpxtJpruOjI/yvC96WICKzPHgl9RDwrJf2XLIxq9bAE4Sk5ZJuknS/pHslfahoP0bS9ZIeKG6PHnRsdeNZL+3ldpEis9ykOIM4CPzniPhx4BTgHEknAhcAN0TECcANxeNG86yX4bN1+27WbrqR4y/4Kms33ehFclZrA08QEbE3Iu4s7n8XuB8YB84AthSHbQEmBx1b3biE0l5uK5Nzi9cs6RiEpJXAGuB24DURsRdaSQR4dbrI6sEllPZyG6PJLV6zZNNcJb0C+DJwXkQ8peLi8V28biOwEWDFihXVBVgTOV3nedBTcnMbo8ktXrMkZxCSjqSVHL4QEVcXzY9LOrZ4/lhgX9lrI2JzRExExMTY2NhgAraOUpRPchujyS1esxSzmAR8Frg/Iv541lPXAhuK+xuAawYdm/Wu3/JJL4O3uY3R5BavWYoS01rgA8AOSXcVbR8BNgFXSTobeBR4T4LYhkZO5Z65CwJnzj6AtjHntjI5t3jNBp4gIuIfgfkGHN4+yFiGVa+/cPvRz8V72p19dLP1RU6/YHOL15rNK6mHUIrZMv2UTzx4a1ZPThBDKMUv3H6m5Hrw1qyevJvrEOr3Ws296rV8cv66VaWbEnZ76VDX9M2q4TOIIZTbbJlezz68MtmsWj6DGEI5zpbp5eyjn8FtM+vMCWJINWG2jAe3zarlEpNly4PbZtVygrBs5TbWYpabRpaYPPOlvVz6J8exFrOcNC5BpFhlnJPc+qcJYy1mqTSuxOQ9+dtz/5jZjMYlCM98ac/9Y2YzGpcgPPOlPfePmc1oXILwzJf23D9mNqNxg9Se+dKe+8fMZigiUsfQs4mJiZiamkodhplZViRti4iJTsc1rsRkZmbdcYIwM7NSThBmZlaqcYPUucll2wszGz5OEDWW27YXZjZcXGKqMW97YWYpNfIMIpeyjbe9MLOUGncGkdN1jL3thZml1LgEkVPZxttemFlKtUsQkt4haaekByVdsNjvn1PZZnLNOBevX8346AgCxkdHuHj96lqWw8xs+NRqDELSEuDPgZ8BdgF3SLo2Iu5brM84bnSE3SXJoK5lG18Qx8xSqdsZxMnAgxHxUEQ8B1wJnLGYH+CyjZlZd+qWIMaBx2Y93lW0/YCkjZKmJE1NT08v+ANctjEz606tSkyAStpetN1sRGwGNkNrN9dePsRlGzOzzup2BrELWD7r8TJgT6JYzMwarW4J4g7gBEnHS3oJ8D7g2sQxmZk1Uq1KTBFxUNK5wHXAEuDyiLg3cVhmZo1UqwQBEBFfA76WOg4zs6arW4nJzMxqIutrUkuaBh5J8NFLgScSfG4u3D+duY/ac/901k8f/UhEjHU6KOsEkYqkqW4u+N1U7p/O3EftuX86G0QfucRkZmalnCDMzKyUE0RvNqcOoObcP525j9pz/3RWeR95DMLMzEr5DMLMzEo5QSyQpIcl7ZB0l6Sp1PGkJulySfsk3TOr7RhJ10t6oLg9OmWMqc3TRx+XtLv4Ht0l6V0pY0xJ0nJJN0m6X9K9kj5UtPt7RNv+qfw75BLTAkl6GJiICM/RBiS9FXga+HxEvK5o+0PgyYjYVFwV8OiI+HDKOFOap48+DjwdEZ9MGVsdSDoWODYi7pT0SmAbMAn8Kv4eteufX6Li75DPIKwvEXEL8OSc5jOALcX9LbS+zI01Tx9ZISL2RsSdxf3vAvfTug6Mv0e07Z/KOUEsXABfl7RN0sbUwdTUayJiL7S+3MCrE8dTV+dKursoQTWyfDKXpJXAGuB2/D06zJz+gYq/Q04QC7c2Ik4C3gmcU5QPzBbqUuBHgTcAe4E/ShtOepJeAXwZOC8inkodT92U9E/l3yEniAWKiD3F7T7gK7Suo20v9nhRN52pn+5LHE/tRMTjEXEoIr4P/BUN/x5JOpLWL78vRMTVRbO/R4Wy/hnEd8gJYgEkHVUMEiHpKOBngXvav6qRrgU2FPc3ANckjKWWZn7xFX6eBn+PJAn4LHB/RPzxrKf8PWL+/hnEd8izmBZA0mtpnTVA61oa/yMifj9hSMlJugI4ldbOko8DFwFbgauAFcCjwHsiorGDtPP00am0SgMBPAz8xky9vWkk/SRwK7AD+H7R/BFadfbGf4/a9M+ZVPwdcoIwM7NSLjGZmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMOtA0keLXTTvLnbNfHOFn3WzJF+L2WrhiNQBmNWZpLcApwMnRcSzkpYCL0kcltlA+AzCrL1jgSci4lmAiHgiIvYU1wX5A0nfLP79awBJY5K+LOmO4t/aov2oYkO1OyRtl3RG0T4i6cri7OSLwEiq/6jZXE4QZu19HVgu6duS/kLSv5/13FMRcTLwZ8Cni7Y/AT4VEW8CfgG4rGj/KHBj0f7TwCXFdi3/EXgmIl4P/D7wxur/S2bdcYnJrI2IeFrSG4GfovWL/YvFxWsArph1+6ni/mnAia3tcwB4VbF/188C75b0u0X7y2htIfFW4E+Lz7pb0t1V/n/MFsIJwqyDiDgE3AzcLGkHL2wgN3ufmpn7PwS8JSIOzH6PYsO1X4iInXPa576PWW24xGTWhqRVkk6Y1fQG4JHi/ntn3f5Tcf/rwLmzXv+G4u51wG8ViQJJa4r2W4D3F22vA16/2P8Hs175DMKsvVcAn5E0ChwEHgQ20prZ9FJJt9P6Q+vM4vjfBv68KBUdQSsB/Cbw32iNU9xdJImHi/e4FPhccfxdwDcH9P8y68i7uZr1QNLDwEREPJE6FrOquMRkZmalfAZhZmalfAZhZmalnCDMzKyUE4SZmZVygjAzs1JOEGZmVsoJwszMSv1/XbxYJ4mT8HMAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots()\n", "ax.scatter(cars.speed, cars.dist)\n", "plt.xlabel(\"Speed\")\n", "plt.ylabel(\"Distance\")\n", "xr = np.array(ax.get_xlim())\n", "np.random.seed(123)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Do the four fits with increasing amounts of error. Since we only need univariate regression np.polyfit is adequate." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "\n", "est = np.polyfit(cars.speed, cars.dist, 1)\n", "ax.plot(xr, est[1] + est[0] * xr)\n", "est1 = np.polyfit(cars.speed + np.random.normal(size=50), cars.dist, 1)\n", "ax.plot(xr, est1[1] + est1[0] * xr, c='red')\n", "est2 = np.polyfit(cars.speed + np.random.normal(scale=2,size=50), cars.dist, 1)\n", "ax.plot(xr, est2[1] + est2[0] * xr, c='green')\n", "est5 = np.polyfit(cars.speed + np.random.normal(scale=5,size=50), cars.dist, 1)\n", "ax.plot(xr, est5[1] + est5[0] * xr, c='cyan')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 3.9324088, -17.5790949]),\n", " array([ 3.7603449, -14.9792162]),\n", " array([ 3.47154 , -10.7660126]),\n", " array([2.0647813, 9.9521318]))" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "est, est1, est2, est5" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "vv = np.repeat(np.array([0.1, 0.2, 0.3, 0.4, 0.5]), [1000, 1000, 1000, 1000, 1000])\n", "slopes = np.zeros(5000)\n", "for i in range(5000):\n", " slopes[i] = np.polyfit(cars.speed+np.random.normal(scale=np.sqrt(vv[i]), size=50), cars.dist, 1)[0]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-0.1275732, 3.994792 ])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "betas = np.reshape(slopes, (5, 1000)).mean(axis=1)\n", "betas = np.append(betas,est[0])\n", "variances = np.array([0.6, 0.7, 0.8, 0.9, 1.0, 0.5])\n", "gv = np.polyfit(variances, betas,1)\n", "gv" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xd4VVX6/v/3kwIJEHpATJBeBWkREYGEgFJGUaw4oqIooyMWBEYZZ8Y+fj8CKnZRUVHHAhYUFaSFJi1IryK9SJMOAZKs3x/n4A9jYk7gJDvJuV/XlcudtdfZeZbAnX3W3mcvc84hIiKhIczrAkREpOAo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREKIQl9EJIQo9EVEQkjAoW9m4Wa2yMzGZ7OvpJl9YmbrzGyemdU8bd8Qf/saM+sSnLJFRORMROSh7/3AKqBsNvv6Avucc3XNrBfwf8ANZtYY6AWcD5wLTDaz+s65jJx+SOXKlV3NmjXzUJaIiCxcuHCPcy42t34Bhb6ZxQN/AZ4GHsymy5XAY/7tscDLZmb+9o+dc8eBDWa2DmgNzMnpZ9WsWZPU1NRAyhIRET8z2xRIv0Cnd14A/gFk5rA/DtgC4JxLBw4AlU5v99vqbxMREQ/kGvpmdjmwyzm38M+6ZdPm/qQ968/oZ2apZpa6e/fu3EoSEZEzFMiZ/iVADzPbCHwMJJvZB1n6bAWqA5hZBFAO+PX0dr94YHvWH+CcG+mcS3DOJcTG5jolJSIiZyjX0HfODXHOxTvnauK7KDvVOdc7S7evgFv929f6+zh/ey//3T21gHrA/KBVLyIieZKXu3d+x8yeAFKdc18BbwPv+y/U/orvlwPOuRVm9imwEkgH7vmzO3dERCR/WWFbRCUhIcHl+e6dWS9AXEuo1eH/b9swA7b9CO0eCG6BIiKFkJktdM4l5NaveHwiN64ljOnjC3rw/XdMH1+7iIj85oyndwqVWh1w177DsQ9vJq35rVRc+QFc9+7vz/xFRKSYnOkDG8smMDq9ExVTRzCr/JUcPret1yWJiBQ6xSb0ax1aSL/oFCbH3kqjbWMY/OxLjFu8jcJ2zUJExEvFI/T9c/hh179L53teZF/3kfy/zOf4+NMPueGNuazacdDrCkVECoXiEfrbfvzdHH7di7oT0/sDHmp6lJ92HeLyl2bx2FcrOHDspLd1ioh4rHjcsvkn9h89wfDv1/LhvE1UKFWCh7o15NqW8YSFZfeECBGRoim0btn8E+VLleDJq5rwVf921Kxcmn+MXco1r//Asq0HvC5NRKTAFfvQP6VJXDnG3nUxw69rxpZfj9HjlVn884tl7DtywuvSREQKTMiEPoCZcU2reKYOSuT2S2rxyYItdByewgdzN5GRWbimuURE8kNIhf4pZaMi+ffljfnu/vY0PCeGf325nCtfmcXCTfu8Lk1EJF+FZOifUr9qDB/d2YaXbmzBnkMnuOa1Hxg0Zgm7Dx33ujQRkXwR0qEPvimfK5qdy5SBidydVIdxi7eRPDyFd2ZvID0jp4XCRESKppAP/VNKl4zgoa4NmfBAB5pXL8/jX6/kLy/OYu76vV6XJiISNAr9LOrElmH07a154+ZWHD6eTq+Rc7nvo0X8ciDN69JERM6aQj8bZkaX889h8oOJ3N+pHhNW/EKn4Sm8Mf1nTqRrykdEii6F/p+ILhHOgEvrM3lAIhfXqcwz362m64gZzPxJi7eLSNGk0A/AeZVK8datCbzT50IyMx03vz2fu95fyNZ9R70uTUQkTxT6edCxYRUmDujA4C4NSFm7i87PTeelKT+RdlLL/opI0ZBr6JtZlJnNN7MlZrbCzB7Ppk8NM5tiZkvNLMXM4k/b96z/davM7EUzK9JPOisZEc49HesyZWASnRpWZfiktVz2/AymrNrpdWkiIrkK5Ez/OJDsnGsGNAe6mlmbLH2GAaOdcxcATwDPAJhZW+AS4AKgCXAhkBik2j0VVz6aV25qyYd3XESJiDD6vpdK33cXsGnvEa9LExHJUa6h73wO+7+N9H9lfVBNY2CKf3sacOWplwNRQAmgpP+1xeqU+JK6lfn2vvY80r0Rc9fv5dLnZjD8+zUcO6EpHxEpfAKa0zezcDNbDOwCJjnn5mXpsgS4xr/dE4gxs0rOuTn4fgns8H9NdM6tCk7phUeJiDDu7FCbaYOS+MsF1Xhp6jo6PzedCct3aLlGESlUAgp951yGc645EA+0NrMmWboMAhLNbBG+6ZttQLqZ1QUa+V8XBySbWYesxzezfmaWamapu3cX3dshq5SN4vkbmvPp3y4mJiqCuz74kVtGzWfdrsO5v1hEpADkeeUsM3sUOOKcG5bD/jLAaudcvJkNBqKcc0/69/0HSHPOPZvT8YO9cpZX0jMy+XDeZob5p3r6tqvFvZ3qUaZkhNeliUgxFLSVs8ws1szK+7ejgc7A6ix9KpvZqWMNAUb5tzfjewcQYWaR+N4FFLvpnexEhIdxa9uaTBuUxNUt43hjxno6DU9h3OJtmvIREc8EMr1TDZhmZkuBBfjm9Meb2RNm1sPfJwlYY2ZrgarA0/72scDPwDJ88/5LnHNfB3MAhV3lMiV59tpmfPH3tlSJieL+jxdzw8i5rP7loNeliUgIKvYLoxcmGZmOTxZs4dmJqzmUls7NbWow4NL6lIuO9Lo0ESnitDB6IRQeZvz1ovOYNjCJG1tX5705G+k0PIUxqVvI1HKNIlIAFPoeqFC6BE9d1ZSv+7fjvIqlGDx2Kde8/gPLth7wujQRKeYU+h5qEleOsXe1Zdh1zdjy61F6vDKLR75Yxr4jJ7wuTUSKKYW+x8LCjGtbxTN1UBK3ta3Fxwu20HF4Ch/O20SGpnxEJMgU+oVE2ahI/nNFY769rz0NqsbwyBfLufKVWSzctM/r0kSkGFHoFzINzonh435tePHGFuw+dJxrXvuBwWOWsOfwca9LE5FiQKFfCJkZPZqdy9SBSdyVWIcvF2+j47AU3pm9gfQMLdcoImdOoV+IlS4ZwcPdGjLhgQ40r16ex79eyeUvzWLe+r1elyYiRZRCvwioE1uG0be35vXerTiUls4NI+dy/8eL2HkwzevSRKSIUegXEWZG1ybnMPnBRO7rVI/vlv9C8rAU3pj+MyfSNeUjIoFR6Bcx0SXCefDS+kwa0IGL61Time9W023EDGb+VHQfSS0iBUehX0TVqFSat269kFF9EkjPdNz89nzu/mAh2/Yf87o0ESnE9HD3Ii65YVXa1qnM27M28NLUn5i2Zhf9O9bljva1iYoM97o8ESlkdKZfDERFhnNPx7pMGZhEcsMqDPt+LV1emMHU1cVqOWIRCQKFfjESVz6aV29qxQd9LyIizLj93VT6vruATXuPeF2aiBQSCv1iqF29ynx3fwf+2b0hc9fv5dLnZ/Ccf9lGEQltCv1iqkREGP061GHqoCS6NzmHF6euo/Nz05mwfIeWaxQJYQr9Yq5q2She6NWCT/q1ISYqgrs++JFbRs3n592HvS5NRDyg0A8RF9WuxPh72/HYFY1ZvGU/XV+YwTPfreLw8XSvSxORAqTQDyER4WH0uaQW0wYl0bNFHG9MX0+n4SmMW7xNUz4iISLX0DezKDObb2ZLzGyFmT2eTZ8aZjbFzJaaWYqZxZ+27zwz+97MVpnZSjOrGdwhSF5VLlOSZ69txud/b0uVmCju/3gxvUbOZfUvB70uTUTyWSBn+seBZOdcM6A50NXM2mTpMwwY7Zy7AHgCeOa0faOBoc65RkBrYNfZly3B0PK8Cnx5zyX8t2dT1uw8xF9enMXjX6/gwLGTXpcmIvkk19B3Pqeu+kX6v7LOBTQGpvi3pwFXAphZYyDCOTfJf6zDzrmjwShcgiM8zPjrRecxbWASN7auzrs/bKTT8BTGpG4hU8s1ihQ7Ac3pm1m4mS3Gd5Y+yTk3L0uXJcA1/u2eQIyZVQLqA/vN7HMzW2RmQ83sD88GMLN+ZpZqZqm7d+vBYV6oULoET13VlK/7t6N6xVIMHruUa1//geXbDnhdmogEUUCh75zLcM41B+KB1mbWJEuXQUCimS0CEoFtQDq+Z/u09++/EKgN9Mnm+COdcwnOuYTY2NgzHYsEQZO4cnx2V1uGXdeMzb8e5YqXZ/HIF8vYd+SE16WJSBDk6e4d59x+IAXomqV9u3PuaudcC+ARf9sBYCuwyDm33jmXDnwJtAxG4ZJ/wsKMa1vFM3VQEn3a1uTjBVvoODyFD+dtIkNTPiJFWiB378SaWXn/djTQGVidpU9lMzt1rCHAKP/2AqCCmZ06fU8GVgajcMl/ZaMiefSK8/nmvnY0qBrDI18s56pXZvPj5n1elyYiZyiQM/1qwDQzW4ovxCc558ab2RNm1sPfJwlYY2ZrgarA0+CbFsI3tTPFzJYBBrwZ5DFIPmt4Tlk+7teGF29swa5DaVz96g8MHrOEPYePe12aiOSRFbYP5SQkJLjU1FSvy5AcHD6ezktTf2LUrA1ERYYz8NL69G5Tg4hwfc5PxEtmttA5l5BbP/1LlTwpUzKCId0a8d39HWhevTyPfb2Sy1+axbz1e70uTUQCoNCXM1K3ShlG396a13u34lBaOjeMnMsDHy9i58E0r0sTkT+h0JczZmZ0bXIOkx9M5L7kuny7/BeSh6UwcsbPnEjP9Lo8EcmGQl/OWnSJcB68rAGTBnTg4jqV+O+3q+k2YgazftrjdWkikoVCX4KmRqXSvHXrhYzqk0B6pqP32/O4+4OFbNt/zOvSRMQvwusCpPhJbliVtnUq89bM9bw8bR3T1uyif8e63NG+NlGRf3gKh4gUIJ3pS76Iigynf3I9pgxMomODKgz7fi1dXpjB1NU7vS5NJKQp9CVfxZWP5rXerXi/b2siwozb303ljvcWsGnvEa9LEwlJCn0pEO3rxfLd/R34Z/eGzPl5L5c+P4Pnvl/DsRMZXpcmElIU+lJgSkSE0a9DHaYMTKJbk3N4ceo6Oj83nQnLf9FyjSIFRKEvBe6cclGM6NWCT/q1ISYqgrs+WMgto+bz8+7Dub9YRM6KQl88c1HtSoy/tx2PXdGYxVv20/WFGTzz3SqOHE/3ujSRYkuhL56KCA+jzyW1mDowiauax/HG9PUkD0/hqyXbNeUjkg8U+lIoxMaUZOh1zfj8722JjSnJfR8totfIuaz55ZDXpYkUKwp9KVRanleBcfe04+meTViz8xDdX5zJ41+v4GDaSa9LEykWFPpS6ISHGTddVINpA5PodWF13v1hI8nDUhi7cCuZWq5R5Kwo9KXQqlC6BE/3bMrX/dtRvWIpBo1ZwrWv/8DybQe8Lk2kyFLoS6HXJK4cn93VlqHXXsDmX49yxcuz+NeXy9h/9ITXpYkUOQp9KRLCwozrEqozZWASfdrW5KP5W+g4LIX/zdtMhqZ8RAKWa+ibWZSZzTezJWa2wswez6ZPDTObYmZLzSzFzOKz7C9rZtvM7OVgFi+hp1x0JI9ecT7j721Hvaox/POLZfR8dTaLNu/zujSRIiGQM/3jQLJzrhnQHOhqZm2y9BkGjHbOXQA8ATyTZf+TwPSzLVbklEbVyvJJvzaM6NWcnQfT6PnqD/xj7BL2HD7udWkihVquoe98Tn0+PtL/lfX9dGNgin97GnDlqR1m1gqoCnx/1tWKnMbMuLJ5HFMGJvG3xNp8/uM2Og5L4d3ZG0jP0HKNItkJaE7fzMLNbDGwC5jknJuXpcsS4Br/dk8gxswqmVkYMBwYnMvx+5lZqpml7t69O28jkJBXpmQEQ7o1YsIDHWgWX57Hvl7J5S/NYv6GX70uTaTQCSj0nXMZzrnmQDzQ2syaZOkyCEg0s0VAIrANSAf+DnzrnNuSy/FHOucSnHMJsbGxeR6ECEDdKmV4v29rXu/dkkNp6Vz/xhwe+HgROw+meV2aSKFheX2+iZk9Chxxzg3LYX8ZYLVzLt7MPgTaA5lAGaAE8Kpz7uGcjp+QkOBSU1PzVJNIVsdOZPBqyjremL6eyHDj/s71uO2SWkSG64Y1KZ7MbKFzLiG3foHcvRNrZuX929FAZ2B1lj6V/VM5AEOAUQDOuZucc+c552riezcw+s8CXyRYokuEM/CyBnw/oANtalfiv9+uptuImcz6aY/XpYl4KpDTnmrANDNbCizAN6c/3syeMLMe/j5JwBozW4vvou3T+VKtSB7VrFyat/tcyNu3JnAiPZPeb8/j7x8uZNv+Y16XJuKJPE/v5DdN70h+STuZwZsz1vNKyjoMo39yXe5oX4uSEeFelyZy1oI2vSNSXERFhnNvp3pMfjCRpAaxDJ24hi7Pz2Da6l1elyZSYBT6EnLiK5Titd6teL9va8LCjNveXcAd7y1g896jXpcmku80vSMh7UR6Ju/M3sCIKT+Rnum4K7EOdyfWIbqEb8rny0XbGDpxDdv3H+Pc8tEM7tKAq1rEeVy1yB8FOr2j0BcBfjmQxjPfrWLc4u3ElY/m35c35tiJdP75xXKOncz4rV90ZDjPXN1UwS+FjkJf5AzMXb+XR8etYM3OQ5SMCON4+h8f5xBXPprZDyd7UJ1IznQhV+QMtKldiW/ua8ejVzTONvABtut2TynCFPoiWUSEh3HbJbU4p2xUtvvPLR9dwBWJBI9CXyQHD3drSHTk7+/hDzPofdF5HlUkcvYivC5ApLA6dbF26MQ1bNt/jPLRkZzIyGTYpLXsPnyCBy6tR9moSI+rFMkbXcgVyYN9R04w9Ps1fDR/M5VKl2RIt4b0bBFHWJh5XZqEOF3IFckHFUqX4L89m/LVPe2IrxDNwDFLuO6NOSzfdsDr0kQCotAXOQNN48vx+d1tGXrtBWzcc4QeL8/iX18uY//RE16XJvKnFPoiZygszLguoTpTByVxy8U1+d+8zXQclsJH8zeTkVm4pk1FTlHoi5ylctGRPNbjfL65rz31qsYw5PNl9Hx1Nos27/O6NJE/UOiLBEmjamX5pF8bRvRqzi8H0uj56g/8Y+wS9h4+7nVpIr9R6IsEkZlxZfM4pg5K4m8davP5j9voOCyF937YSHpG9p/wFSlICn2RfFCmZARDujdiwgMduCC+PI9+tYLLX5rF/A2/el2ahDiFvkg+qlulDO/3bc1rN7Xk4LGTXP/GHAZ8sphdB9O8Lk1ClEJfJJ+ZGd2aVmPKwCTuTa7LN0t30HFYCm/OWM9JTflIAcs19M0syszmm9kSM1thZo9n06eGmU0xs6VmlmJm8f725mY2x/+6pWZ2Q34MQqQoiC4RzsDLGvD9gA5cVLsST3+7im4jZjJ73R6vS5MQEsiZ/nEg2TnXDGgOdDWzNln6DANGO+cuAJ4AnvG3HwVucc6dD3QFXjCz8sEpXaRoqlm5NKP6XMjbtyZwIj2Tm96axz0f/qhHNkuByPWBa873cJ7D/m8j/V9ZP3nSGBjg354GfOl/7drTjrPdzHYBscD+sytbpOjr1Kgql9StzJsz1vNKyjqmrt5F/+S63NG+FiUjwrVUo+SLgB64ZmbhwEKgLvCKc+6hLPv/B8xzzo0ws6uBz4DKzrm9p/VpDbwHnO+cy8zy+n5AP4Dzzjuv1aZNm85uVCJFzNZ9R3lq/ComrPiFmpVKcdn55/D+nE1aqlECFtQHrjnnMpxzzYF4oLWZNcnSZRCQaGaLgERgG5B+WjHVgPeB27IGvv/4I51zCc65hNjY2EBKEilW4iuU4vWbWzH69taEhRkjZ6z/XeADHDuZwdCJazyqUIqLPN2945zbD6Tgm58/vX27c+5q51wL4BF/2wEAMysLfAP8yzk3NxhFixRXHerHMuH+Djnu17y/nK1A7t6JPXXx1cyigc7A6ix9KpvZqWMNAUb520sAX+C7yDsmmIWLFFclIsKIy2FJxmrlsl/CUSRQgZzpVwOmmdlSYAEwyTk33syeMLMe/j5JwBozWwtUBZ72t18PdAD6mNli/1fz4A5BpPgZ3KXBH5ZqBCgbHcn63YezeYVIYLRylkghdfrdO9XKRXFR7UpMXrmTtPQM7mhfm/4d61K6pFY8FZ9AL+Qq9EWKkN2HjvN/E1YzduFWzikbxSN/acTlF1TDTMs1hjotlyhSDMXGlGTYdc347O62VI4pwb0fLeLGN+ey5pdDXpcmRYRCX6QIalWjAuPuacdTVzVh1Y5DdH9xJk+OX8nBtJNelyaFnEJfpIgKDzN6t6nBtEFJ3HBhdUbN3kDysOl8tnArmVquUXKg0Bcp4iqWLsF/ezZl3D2XEF8hmoFjlnDdG3NYsf2A16VJIaTQFykmLogvz+d3t+XZay9g454jXPHSLP795XL2Hz3hdWlSiCj0RYqRsDDj+oTqTB2UxC0X1+TDeZvoOCyFj+Zv1pSPAAp9kWKpXHQkj/U4n2/ua0+9KjEM+XwZPV+dzeItesBtqFPoixRjjaqV5ZO/tWFEr+bsOJDGVa/M5qGxS9l7+LjXpYlHFPoixZyZcWXzOKYOSqJfh9p89uNWOg5LYfScjaRrucaQo9AXCRFlSkbwz+6NmPBAe5rGl+M/41ZwxcuzWbDxV69LkwKk0BcJMXWrxPBB34t47aaWHDh6guten8OATxaz62Ca16VJAVDoi4QgM6Nb02pMHphI/451+WbpDpKHT+etmes5qSmfYk2hLxLCSpWIYFCXBnw/oAMX1qzAU9+sotuImcxet8fr0iSf6CmbIvKbySt38tBnS9l7xPeBrqplSzKkWyOty1sE6CmbIpJnh4+nc+T4b8tbs/PgcQaNWcLY1C0eViXBpNAXkd8MnbiGtPTfz+mnZzoe+nwZ09bs8qgqCSaFvoj8JqeF1zMyHbe9s4A7R6ey5dejBVyVBJNCX0R+c24OC7KfWy6Kh7s1ZPa6PXR6bjrPT1pL2smMAq5OgiHX0DezKDObb2ZLzGyFmT2eTZ8aZjbFzJaaWYqZxZ+271Yz+8n/dWuwByAiwZPdguzRkeH8o2tD7kqsw9SBSXQ9/xxGTPmJzs9NZ+KKXyhsN4PIn8v17h3zLb5Z2jl32MwigVnA/c65uaf1GQOMd869Z2bJwG3OuZvNrCKQCiQADlgItHLO7cvp5+nuHRFvnb4g+7nloxncpcEf7t6Z8/NeHv1qOWt3HiaxfiyPXtGY2rFlPKpYIJ8WRjezUvhC/27n3LzT2lcAXZxzW/2/JA4458qa2Y1AknPub/5+bwApzrmPcvoZCn2RouFkRibvz9nkm+pJz+CO9rXp37EupUtGeF1aSArqLZtmFm5mi4FdwKTTA99vCXCNf7snEGNmlYA44PR7vbb620SkiIsMD+P2drWYOiiJHs3ieC3lZzo/N53xS7dryqcQCyj0nXMZzrnmQDzQ2syaZOkyCEg0s0VAIrANSAcsu8NlbTCzfmaWamapu3fvztMARMRbsTElGX59Mz67+2Iqli5B//8t4q9vzmPtzkNelybZyNPdO865/UAK0DVL+3bn3NXOuRbAI/62A/jO7Kuf1jUe2J7NcUc65xKccwmxsbF5G4GIFAqtalTkq/7teOqqJqzccZBuI2by5PiVHEo76XVpcppA7t6JNbPy/u1ooDOwOkufymZ26lhDgFH+7YnAZWZWwcwqAJf520SkGAoPM3q3qcG0QUlcn1CdUbM30HHYdD7/caumfAqJQM70qwHTzGwpsADfnP54M3vCzHr4+yQBa8xsLVAVeBrAOfcr8KT/dQuAJ/xtIlKMVSxdgmeubsq4ey4hvkI0D366hOten8OK7Qe8Li3k6YFrIpKvMjMdYxdu5f8mrGbf0RP0blODBy+tT/lSJbwurVjRA9dEpFAICzOuv7A6UwcmccvFNflg7iaSh0/n4/mbycwsXCedoUChLyIFolypSB7rcT7j721P3dgyPPz5Mnq+OpvFW/Z7XVpIUeiLSIFqfG5ZPvlbG164oTnbD6TR89XZPPzZUvYePu51aSFBoS8iBc7MuKpFHFMHJnJn+9qMXbiVjsNSGD1nI+larjFfKfRFxDMxUZH8s3sjJjzQnqbx5fjPuBVc8fJsUjfqJr/8otAXEc/VrRLDB30v4tWbWnLg6AmufX0OD36ymF0H07wurdhR6ItIoTBu8Xae/mYV2w+kEVMygnFLtpM8fDpvzVzPSU35BI1CX0Q89+WibQz5fBnb/Ct3HTqeTkSYUb1iNE99s4ruI2byw7o9HldZPCj0RcRzQyeu4ViWlbiOp2dy4OhJ3rolgbT0DP761jzu+d+POS7pKIFR6IuI53IK8h0H0ujcuCqTBiTy4KX1mbxyJ52GT+eVaes4nq7lGs+EQl9EPJfj2rz+9qjIcO7rVI/JDybSoX5lhk5cQ9cXZpKyZldBllksKPRFxHM5rc07uEuD37VVr1iKN25O4L3bW2NAn3cWcOfoVLb8erQAqy3aFPoi4rmrWsTxzNVNiSsfjQFx5aN55uqmf1ib95TE+rFMeKADD3VtyOx1e+j83HTfso0nNeWTGz1lU0SKtB0HjvHfb1fz9ZLtxFeI5j+XN+bSxlXxLdcdOvSUTREJCdXKRfPSjS346M42lCoRTr/3F9LnnQVs2HPE69IKJYW+iBQLF9epxDf3tefflzfmx0376PL8DJ6dsJqjJ9K9Lq1QUeiLSLERGR5G33a1mDIokSuancurKT/Tafh0xi/druUa/TSnLyLFVurGX/nPuBWs3HGQtnUq8XiP86lXNeZ3fb5ctI2hE9ewff8xzi0fzeAuDXK8gFyYaU5fREJeQs2KfH1vO568qgkrth+k24iZPDV+JYfSTgK/f/yDA7btP8aQz5fx5aJt3haej3INfTOLMrP5ZrbEzFaY2ePZ9DnPzKaZ2SIzW2pm3f3tkWb2npktM7NVZjYkPwYhIpKT8DDj5jY1mDYoiesSqvP27A0kD5/OF4u28uyE1X94/MOxkxkMnbjGo2rzXyBn+seBZOdcM6A50NXM2mTp8y/gU+dcC6AX8Kq//TqgpHOuKdAK+JuZ1QxG4SIieVGxdAmeubopX/79Es4tH82AT5aw/UD2j24uzs/3yTX0nc9h/7eR/q+sFwIcUNa/XQ7Yflp7aTOLAKKBE8DBsy1aRORMNateni/ubsuz11xAWA638uf0WIjiIKA5fTMLN7PFwC5gknNuXpaUArRCAAAJgUlEQVQujwG9zWwr8C1wr799LHAE2AFsBoY557Qkjoh4KizMuP7C6jx1VRPCsyR/VETYHx7/UJwEFPrOuQznXHMgHmhtZk2ydLkReNc5Fw90B943szCgNZABnAvUAgaaWe2sxzezfmaWamapu3fvPovhiIgE7q8X1WD4dc2oElPyt7YqZaOoVbm0h1XlrzzfsmlmjwJHnHPDTmtbAXR1zm3xf78eaAM8Csx1zr3vbx8FTHDOfZrT8XXLpoh4wTnnW73r21XsOXycXhdWZ3CXhlQsXcLr0gIStFs2zSzWzMr7t6OBzsDqLN02A538fRoBUcBuf3uy+ZTG94sg62tFRDxnZlzVIo6pAxO5o10txqRupeOwFN6fs5GMzML1eaazEcj0TjVgmpktBRbgm9Mfb2ZPmFkPf5+BwJ1mtgT4COjjfG8hXgHKAMv9r33HObc06KMQEQmSmKhIHvlLY767vz3nn1uWf49bwRUvzSJ1Y/G4HKlP5IqI5MA5x3fLf+Gp8SvZfiCNq1vG8XC3hlSJifK6tD/QJ3JFRM6SmdG9aTUmD0zkno51GL9kB8nDpvPWzPWczMj0urwzotAXEclFqRIRDO7SkIkDOpBQswJPfbOKv7w4kx9+3uN1aXmm0BcRCVCtyqV5p8+FvHlLAsdOZvDXN+fR/38/suNA0fkEr0JfRCQPzIxLG1dl0oBEBnSuz6SVO0keNp1XU9ZxPL3wL9eo0BcROQNRkeHc37kekx9MpH29yjw7YQ3dXpjJ9LWF+wOmCn0RkbNQvWIpRt6SwLu3XYgDbh01n36jU9ny61GvS8uWQl9EJAiSGlRhwgPteahrQ2at20Pn56YzYvJPpJ0sXFM+Cn0RkSApGRHO3Ul1mDIwkUsbV+X5yWu59PnpTFq5s9As16jQFxEJsmrlonn5ry35350XERURzp2jU7nt3QVs2HPE69L0iVwRkfx0MiOT937YyAuTf+JEeiZ3dqjFPR3r8v2KnUFdmzfQT+Qq9EVECsCuQ2n8v+9W8/mP2ygfHcnRExmcOO1TvdGR4TxzddMzDn49hkFEpBCpEhPFc9c3Z+xdF3PkRPrvAh8Kbm1ehb6ISAFKqFmRkxnZz7AUxNq8Cn0RkQIWl8MavAWxNq9CX0SkgA3u0oDoyPDftUVHhhfI2rwR+f4TRETkd05drA3m3TuBUuiLiHjgqhZxBRLyWWl6R0QkhCj0RURCSK6hb2ZRZjbfzJaY2QozezybPueZ2TQzW2RmS82s+2n7LjCzOf7XLjOzwre4pIhIiAhkTv84kOycO2xmkcAsM/vOOTf3tD7/Aj51zr1mZo2Bb4GaZhYBfADc7JxbYmaVgJPBHoSIiAQm19B3vuc0HPZ/G+n/yvrJAgeU9W+XA7b7ty8DljrnlviPtfdsCxYRkTMX0Jy+mYWb2WJgFzDJOTcvS5fHgN5mthXfWf69/vb6gDOziWb2o5n9I0h1i4jIGQgo9J1zGc655kA80NrMmmTpciPwrnMuHugOvG9mYfjeSbQDbvL/t6eZdcp6fDPrZ2apZpa6e3fhXmpMRKQoy9PdO865/UAK0DXLrr7Ap/4+c4AooDKwFZjunNvjnDuK711Ay2yOO9I5l+CcS4iNjc3zIEREJDCB3L0Ta2bl/dvRQGdgdZZum4FO/j6N8IX+bmAicIGZlfJf1E0EVgavfBERyYtA7t6pBrxnZuH4fkl86pwbb2ZPAKnOua+AgcCbZjYA30XdPv4LwPvM7Dlggb/9W+fcN/kyEhERyZUWURERKQa0iIqIiPyBQl9EJIQo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREKIQl9EJIQo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREJIoXvKppntBjadxSEqA3uCVE5RoPEWf6E2Zo33zNRwzuW6ClWhC/2zZWapgTxetLjQeIu/UBuzxpu/NL0jIhJCFPoiIiGkOIb+SK8LKGAab/EXamPWePNRsZvTFxGRnBXHM30REclBkQx9M+tqZmvMbJ2ZPZzN/pJm9ol//zwzq1nwVQZXAGN+0MxWmtlSM5tiZjW8qDNYchvvaf2uNTNnZkX6bo9Axmtm1/v/jFeY2f8KusZgC+Dv9HlmNs3MFvn/Xnf3os5gMbNRZrbLzJbnsN/M7EX//4+lZtYyXwpxzhWpLyAc+BmoDZQAlgCNs/T5O/C6f7sX8InXdRfAmDsCpfzbdxflMQcyXn+/GGAGMBdI8LrufP7zrQcsAir4v6/idd0FMOaRwN3+7cbARq/rPssxdwBaAstz2N8d+A4woA0wLz/qKIpn+q2Bdc659c65E8DHwJVZ+lwJvOffHgt0MjMrwBqDLdcxO+emOeeO+r+dC8QXcI3BFMifMcCTwLNAWkEWlw8CGe+dwCvOuX0AzrldBVxjsAUyZgeU9W+XA7YXYH1B55ybAfz6J12uBEY7n7lAeTOrFuw6imLoxwFbTvt+q78t2z7OuXTgAFCpQKrLH4GM+XR98Z0xFFW5jtfMWgDVnXPjC7KwfBLIn299oL6ZzTazuWbWtcCqyx+BjPkxoLeZbQW+Be4tmNI8k9d/52ckItgHLADZnbFnvQUpkD5FScDjMbPeQAKQmK8V5a8/Ha+ZhQHPA30KqqB8FsifbwS+KZ4kfO/iZppZE+fc/nyuLb8EMuYbgXedc8PN7GLgff+YM/O/PE8USG4VxTP9rUD1076P549v+37rY2YR+N4a/tnbqsIukDFjZp2BR4AezrnjBVRbfshtvDFAEyDFzDbim//8qghfzA307/Q459xJ59wGYA2+XwJFVSBj7gt8CuCcmwNE4XtOTXEV0L/zs1UUQ38BUM/MaplZCXwXar/K0ucr4Fb/9rXAVOe/UlJE5Tpm/3THG/gCv6jP9/7peJ1zB5xzlZ1zNZ1zNfFdw+jhnEv1ptyzFsjf6S/xXazHzCrjm+5ZX6BVBlcgY94MdAIws0b4Qn93gVZZsL4CbvHfxdMGOOCc2xHsH1Lkpnecc+lm1h+YiO8OgFHOuRVm9gSQ6pz7Cngb31vBdfjO8Ht5V/HZC3DMQ4EywBj/NevNzrkenhV9FgIcb7ER4HgnApeZ2UogAxjsnNvrXdVnJ8AxDwTeNLMB+KY5+hTlkzcz+wjf9Fxl/3WKR4FIAOfc6/iuW3QH1gFHgdvypY4i/P9QRETyqChO74iIyBlS6IuIhBCFvohICFHoi4iEEIW+iEgIUeiLiIQQhb6ISAhR6IuIhJD/D7yjfi12Dc7hAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(variances, betas)\n", "xr = np.array([0,1])\n", "plt.plot(xr, np.array(gv[1] + gv[0]*xr))\n", "plt.plot([0], [gv[1]], marker='x', markersize=6)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Do not believe there is a `simex` package for Python.\n", "\n", "## Changes of Scale\n", "\n", "Read in the data and fit the model:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: sr R-squared: 0.338
Model: OLS Adj. R-squared: 0.280
Method: Least Squares F-statistic: 5.756
Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790
Time: 15:57:32 Log-Likelihood: -135.10
No. Observations: 50 AIC: 280.2
Df Residuals: 45 BIC: 289.8
Df Model: 4
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 28.5661 7.355 3.884 0.000 13.753 43.379
pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170
pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491
dpi -0.0003 0.001 -0.362 0.719 -0.002 0.002
ddpi 0.4097 0.196 2.088 0.042 0.015 0.805
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.866 Durbin-Watson: 1.934
Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493
Skew: 0.241 Prob(JB): 0.782
Kurtosis: 3.064 Cond. No. 2.04e+04


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.04e+04. This might indicate that there are
strong multicollinearity or other numerical problems." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sr R-squared: 0.338\n", "Model: OLS Adj. R-squared: 0.280\n", "Method: Least Squares F-statistic: 5.756\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n", "Time: 15:57:32 Log-Likelihood: -135.10\n", "No. Observations: 50 AIC: 280.2\n", "Df Residuals: 45 BIC: 289.8\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 28.5661 7.355 3.884 0.000 13.753 43.379\n", "pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170\n", "pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491\n", "dpi -0.0003 0.001 -0.362 0.719 -0.002 0.002\n", "ddpi 0.4097 0.196 2.088 0.042 0.015 0.805\n", "==============================================================================\n", "Omnibus: 0.866 Durbin-Watson: 1.934\n", "Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n", "Skew: 0.241 Prob(JB): 0.782\n", "Kurtosis: 3.064 Cond. No. 2.04e+04\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 2.04e+04. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "savings = pd.read_csv(\"data/savings.csv\",index_col=0)\n", "lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + dpi + ddpi', data=savings).fit()\n", "lmod.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rescale one of the predictors:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: sr R-squared: 0.338
Model: OLS Adj. R-squared: 0.280
Method: Least Squares F-statistic: 5.756
Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790
Time: 15:57:32 Log-Likelihood: -135.10
No. Observations: 50 AIC: 280.2
Df Residuals: 45 BIC: 289.8
Df Model: 4
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 28.5661 7.355 3.884 0.000 13.753 43.379
pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170
pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491
I(dpi / 1000) -0.3369 0.931 -0.362 0.719 -2.212 1.538
ddpi 0.4097 0.196 2.088 0.042 0.015 0.805
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.866 Durbin-Watson: 1.934
Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493
Skew: 0.241 Prob(JB): 0.782
Kurtosis: 3.064 Cond. No. 503.


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sr R-squared: 0.338\n", "Model: OLS Adj. R-squared: 0.280\n", "Method: Least Squares F-statistic: 5.756\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n", "Time: 15:57:32 Log-Likelihood: -135.10\n", "No. Observations: 50 AIC: 280.2\n", "Df Residuals: 45 BIC: 289.8\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "=================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "Intercept 28.5661 7.355 3.884 0.000 13.753 43.379\n", "pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170\n", "pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491\n", "I(dpi / 1000) -0.3369 0.931 -0.362 0.719 -2.212 1.538\n", "ddpi 0.4097 0.196 2.088 0.042 0.015 0.805\n", "==============================================================================\n", "Omnibus: 0.866 Durbin-Watson: 1.934\n", "Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n", "Skew: 0.241 Prob(JB): 0.782\n", "Kurtosis: 3.064 Cond. No. 503.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + I(dpi/1000) + ddpi', data=savings).fit()\n", "lmod.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Standardize all the variables:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: sr R-squared: 0.338
Model: OLS Adj. R-squared: 0.280
Method: Least Squares F-statistic: 5.756
Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790
Time: 15:57:32 Log-Likelihood: -60.617
No. Observations: 50 AIC: 131.2
Df Residuals: 45 BIC: 140.8
Df Model: 4
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 1.665e-16 0.121 1.37e-15 1.000 -0.244 0.244
pop15 -0.9420 0.295 -3.189 0.003 -1.537 -0.347
pop75 -0.4873 0.312 -1.561 0.126 -1.116 0.141
dpi -0.0745 0.206 -0.362 0.719 -0.489 0.340
ddpi 0.2624 0.126 2.088 0.042 0.009 0.516
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.866 Durbin-Watson: 1.934
Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493
Skew: 0.241 Prob(JB): 0.782
Kurtosis: 3.064 Cond. No. 5.42


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sr R-squared: 0.338\n", "Model: OLS Adj. R-squared: 0.280\n", "Method: Least Squares F-statistic: 5.756\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n", "Time: 15:57:32 Log-Likelihood: -60.617\n", "No. Observations: 50 AIC: 131.2\n", "Df Residuals: 45 BIC: 140.8\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 1.665e-16 0.121 1.37e-15 1.000 -0.244 0.244\n", "pop15 -0.9420 0.295 -3.189 0.003 -1.537 -0.347\n", "pop75 -0.4873 0.312 -1.561 0.126 -1.116 0.141\n", "dpi -0.0745 0.206 -0.362 0.719 -0.489 0.340\n", "ddpi 0.2624 0.126 2.088 0.042 0.009 0.516\n", "==============================================================================\n", "Omnibus: 0.866 Durbin-Watson: 1.934\n", "Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n", "Skew: 0.241 Prob(JB): 0.782\n", "Kurtosis: 3.064 Cond. No. 5.42\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scsav = savings.apply(sp.stats.zscore)\n", "lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + dpi + ddpi', data=scsav).fit()\n", "lmod.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set up data frame and make a plot of the estimates and confidence intervals:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEChJREFUeJzt3X1sXfV9x/HPp0kIhsEcGhjgLhhKZaCgYdWLBGnZSrqFTjwE1gnCxNOYAtVaVeualZQ/FqYhgphU/kFrsio8rBpFZMFNtQcGZEAbHoKDKS4P4akk4LTgFgxqsXhIvvvDx/R+HSc+Jr73XMfvl2Sd49/53d/9+qfjfO7vnnNjR4QAABjxsaoLAAA0F4IBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAACSmVUX8FHMnTs32tvbqy4DwF56aeA3kqRjDj2w4kqmh82bN/8yIg4dr9+UDIb29nb19PRUXQaAvXT+qoclSXdccUrFlUwPtreW6cdbSQCAhGAAACQEAwAgIRgAAAnBAABI6hIMtlfY/saotnbbPy3x2IfqURMAoJymu101Ik6tugYA01t3b79uuHuLtg8O6cjWFi1b1KHFnW1Vl9UwkxYMtq+WdLGkVyQNSNps+zOS1kh6R9KPa/peKulcSbMlHS3p3yPimuLYryPidyarLgCYiO7efi1f16eh93dIkvoHh7R8XZ8kTZtwmJRgKALgAkmdxZiPS9os6WZJX42IB2zfMOph8yWdqOHQeMz2f0YEn1oDppn3tm3T1ov+peoyfmvboFZ8sGPX9gdnaOu81sbXM8rs44/T4d/6Vl2fY7KuMXxO0l0R8U5EvC1pvaQDJbVGxANFn38b9Zh7IuJXETEkaZ2kz+7pCWwvtd1ju2dgYGCSygaA7N2xQmEP7fuiybzGEKO+/80YbXvqv6e+iojVklZLUldX1x77Apg69ps3T0dde1vVZXzowpUb1D84tEt7W2uLzr/q9AoqarzJWjE8KOlc2y22D5J0VtH+lu2RlcBfjnrMn9g+xHaLpMWSNk5SLQDwkS1b1KGWWTNSW8usGVq2qKOiihpvUlYMEfG47TskPSFpq6QfFYcuk7TG9juS7h71sB9r+O2lYzV88ZnrCwAqN3KBmbuSJkFEXCvp2jEO/UHN/oqa/dcj4itjjMMdSQAqtbizbVoFwWh88hkAkFTyAbeIuEXSLVU8NwBgz1gxAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEhmNvLJbK+Q9OuI+OfdHD9b0gkRsbKRdQGYHN29/brh7i3aPjikI1tbtGxRhxZ3tlVdFiaoocEwnohYL2l91XUAmLju3n4tX9enofd3SJL6B4e0fF2fJBEOU0zdg8H21ZIulvSKpAFJm23fL+kJSfMlHSzpryJik+1LJXVFxFfqXRdQb9dvul7PvvFs1WU0TO+2QemIHWoZ1b7isRn6wWutYz5myxt/JEm67H9W17m6+jvukOP0zfnfrLqMSVHXYLD9GUkXSOosnutxSZuLwwdGxKm2T5O0RtKJ44y1VNJSSZo3b17dagbw0bz3wY4JtaN51XvF8DlJd0XEO5Jku/ZtotslKSIetH2w7bFfUhQiYrWk1ZLU1dUVdaoXmDT7yqvHshas3KD+waFd2ttaW3TzGaeP+Zjztz4sSbr5jEvrWRomqBF3Je3uH/HR7fxjD0xhyxZ1qGXWjNTWMmuGli3qqKgifFT1DoYHJZ1ru8X2QZLOqjl2viTZ/qyktyLirTrXAqCOFne26brzTlJba4us4ZXCdeedxIXnKaiubyVFxOO279Dwheatkn5Uc/hN2w+puPhczzoANMbizjaCYB9Q97uSIuJaSdfWttk+U9J/RMTyUX1vkXRLvWsCAOwen3wGACSVfMAtIv64iucFAIyPFQMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJDMbNQT2f62pM8X3x4g6bCIaC2O7ZDUVxzbFhFnN6ouAEDWsGCIiL8d2bf9VUmdNYeHIuLkRtUC1Ft3b79uuHuLtg8O6cjWFi1b1KHFnW1VlwWUMu5bSbbbbT9r+1bbT9pea/sA2wtt99rus73G9uyi/8u2r7e9qfg6doxhl0i6fbJ/GKAZdPf2a/m6PvUPDikk9Q8Oafm6PnX39lddGlBK2RVDh6TLI2Kj7TWSvi7pCkkLI+I527dJ+rKkG4v+b0fEfNsXF21njgxk+yhJR0vaUDP+/rZ7JH0gaWVEdO/VT4Vd/fdV0i/6xu+HvTZv25u62Tul/XL7fus/Jj0xp5qimtUvzhne3vxPkzvu4SdJX1w5uWNOI2UvPr8SERuL/e9JWijpZxHxXNF2q6TTavrfXrM9ZdRYF0haGxE7atrmRUSXpAsl3Wj7k6MLsL3Udo/tnoGBgZJlA4333o6dE2oHmk3ZFUNMcNzYzb40HAx/kzpHbC+2L9m+X8PXH14c1We1pNWS1NXVNdF6wKunhvm7lRvUPzi0S3tba4s2XnZ6BRU1sVUPD28vW1ptHUjKrhjm2R555b9E0r2S2muuH1wk6YGa/ufXbB8eabTdIWnOqLY5Ndcn5kpaIOnpCf4cQNNYtqhDLbNmpLaWWTO0bFFHRRUBE1N2xfCMpEtsr5L0vKSvSXpE0p22Z0p6TNJ3avrPtv2ohoNnSU37Eknfj4jaV/zHS1ple2fRf2VEEAyYskbuPuKuJExVZYNhZ0RcOartPuVbTmvdFBHXjG6MiBVjtD0k6aSSdQBTwuLONoIAUxaffAYAJOOuGCLiZUknlh0wItr3oh4AQMVYMQAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAEnDgsH2abYft/2B7S+NOrbD9hPF1/pG1QQA2NXMBj7XNkmXSvrGGMeGIuLkBtaCOuju7dcNd2/R9sEhHdnaomWLOrS4s63qsgBM0LgrBtvttp+1favtJ22vtX2A7YW2e2332V5je3bR/2Xb19veVHwdK0kR8XJEPClpZ51/JlSgu7dfy9f1qX9wSCGpf3BIy9f1qbu3v+rSAExQ2RVDh6TLI2Kj7TWSvi7pCkkLI+I527dJ+rKkG4v+b0fEfNsXF21njjP+/rZ7JH0gaWVEdE/4Jynpmh8+pae3v12v4aet3m2Dem9Hzvyh93fo79c+qds3bauoqn3TCUcerH8469NVl4F9WNlrDK9ExMZi/3uSFkr6WUQ8V7TdKum0mv6312xPKTH+vIjoknShpBttf3J0B9tLbffY7hkYGChZNhpldCiM1w6geZVdMcQEx43d7I/dOWJ7sX3J9v2SOiW9OKrPakmrJamrq2ui9XyIV1r1sWDlBvUPDu3S3tbaojuuKPPaAECzKLtimGd75Ld7iaR7JbWPXD+QdJGkB2r6n1+zfXhPA9ueU3N9Yq6kBZKeLlkXmsSyRR1qmTUjtbXMmqFlizoqqgjAR1V2xfCMpEtsr5L0vKSvSXpE0p22Z0p6TNJ3avrPtv2ohoNniSTZ/kNJd0maI+ks29dExKclHS9ple2dRf+VEUEwTDEjdx9xVxIw9ZUNhp0RceWotvs0/JbPWG6KiGtqGyLiMUmfGN0xIh6SdFLJOtDEFne2EQTAPoBPPgMAknFXDBHxsqQTyw4YEe17UQ8AoGKsGAAACcEAAEgIBgBAQjAAABJHfOQPEVfG9oCkrZM87FxJv5zkMfdFzFM5zFN5zFU5kzFPR0XEoeN1mpLBUA+2e4r/rwl7wDyVwzyVx1yV08h54q0kAEBCMAAAEoLht1ZXXcAUwTyVwzyVx1yV07B54hoDACBhxQAASKZtMNj+C9tP2d5pe7dX+ou/Yd1n+4niz49OKxOYpzNsb7H9gu2rGlljM7B9iO17bD9fbOfspt+O4lx6wvb6RtdZlfHOD9uzbd9RHH/Udnvjq6xeiXm61PZAzTn01/WoY9oGg6SfSjpP0oMl+n4+Ik6eprfUjTtPtmdIuknSFyWdIGmJ7RMaU17TuErSfRHxKQ3/l/S7C8eh4lw6OSLOblx51Sl5flwu6c2IOFbStyVd39gqqzeB36M7as6h79ajlmkbDBHxTERsqbqOZldynuZLeiEiXoqI9yR9X9I59a+uqZyj4b99rmK7uMJamk2Z86N2/tZKWmjbDayxGTTN79G0DYYJCEn/a3uz7aVVF9Ok2iS9UvP9q0XbdPJ7EfFzSSq2h+2m3/62e2w/Ynu6hEeZ8+PDPhHxgaS3JH28IdU1j7K/R39u+0nba23/fj0KKfsX3KYk2/dKOnyMQ1dHxA9KDrMgIrbbPkzSPbafjYgybz9NGZMwT2O9stvnbnfb0zxNYJh5xfl0jKQNtvsi4sXJqbBplTk/psU5NI4yc/BDSbdHxLu2r9TwKuv0yS5knw6GiPjCJIyxvdi+bvsuDS/39qlgmIR5elVS7SuXT0javpdjNp09zZPt12wfERE/t32EpNd3M8bI+fSS7fs1/Odx9/VgKHN+jPR5tfg78r8r6Y3GlNc0xp2niPhVzbf/qjpdi+GtpD2wfaDtg0b2Jf2phi/GIntM0qdsH217P0kXSJo2d9wU1ku6pNi/RNIuKy3bc2zPLvbnSlog6emGVVidMudH7fx9SdKGmH4fshp3nooXHSPOlvRMXSqJiGn5JelcDSf0u5Jek3R30X6kpP8q9o+R9JPi6ykNv7VSee3NNk/F938m6TkNv/qdjvP0cQ3fjfR8sT2kaO+S9N1i/1RJfcX51Cfp8qrrbuD87HJ+SPpHSWcX+/tLulPSC5I2STqm6pqbdJ6uK/4t+omk/5N0XD3q4JPPAICEt5IAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACD5f0BsJFew6IE6AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "edf = pd.concat([lmod.params, lmod.conf_int()],axis=1).iloc[1:,]\n", "edf.columns = ['estimate','lb','ub']\n", "npreds = edf.shape[0]\n", "fig, ax = plt.subplots()\n", "ax.scatter(edf.estimate,np.arange(npreds))\n", "for i in range(npreds):\n", " ax.plot([edf.lb[i], edf.ub[i]], [i, i])\n", "ax.set_yticks(np.arange(npreds))\n", "ax.set_yticklabels(edf.index)\n", "ax.axvline(0)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Do Gelman's rescaling:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: sr R-squared: 0.325
Model: OLS Adj. R-squared: 0.281
Method: Least Squares F-statistic: 7.374
Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000391
Time: 15:57:32 Log-Likelihood: -135.61
No. Observations: 50 AIC: 279.2
Df Residuals: 46 BIC: 286.9
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 6.8176 1.011 6.746 0.000 4.783 8.852
age 5.2841 1.585 3.334 0.002 2.094 8.474
dpis -1.5485 1.593 -0.972 0.336 -4.755 1.658
ddpis 2.4433 1.097 2.227 0.031 0.235 4.651
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 2.962 Durbin-Watson: 2.035
Prob(Omnibus): 0.227 Jarque-Bera (JB): 1.987
Skew: 0.433 Prob(JB): 0.370
Kurtosis: 3.451 Cond. No. 4.92


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sr R-squared: 0.325\n", "Model: OLS Adj. R-squared: 0.281\n", "Method: Least Squares F-statistic: 7.374\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000391\n", "Time: 15:57:32 Log-Likelihood: -135.61\n", "No. Observations: 50 AIC: 279.2\n", "Df Residuals: 46 BIC: 286.9\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 6.8176 1.011 6.746 0.000 4.783 8.852\n", "age 5.2841 1.585 3.334 0.002 2.094 8.474\n", "dpis -1.5485 1.593 -0.972 0.336 -4.755 1.658\n", "ddpis 2.4433 1.097 2.227 0.031 0.235 4.651\n", "==============================================================================\n", "Omnibus: 2.962 Durbin-Watson: 2.035\n", "Prob(Omnibus): 0.227 Jarque-Bera (JB): 1.987\n", "Skew: 0.433 Prob(JB): 0.370\n", "Kurtosis: 3.451 Cond. No. 4.92\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "savings['age'] = np.where(savings.pop15 > 35, 0, 1)\n", "savings['dpis'] = sp.stats.zscore(savings.dpi)/2\n", "savings['ddpis'] = sp.stats.zscore(savings.ddpi)/2\n", "smf.ols(formula='sr ~ age + dpis + ddpis', data=savings).fit().summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Collinearity\n", "\n", "Read in the data:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeWeightHtShoesHtSeatedArmThighLeghipcenter
046180187.2184.995.236.145.341.3-206.300
131175167.5165.583.832.936.535.9-178.210
223100153.6152.282.926.036.631.0-71.673
319185190.3187.497.337.444.141.0-257.720
423159178.0174.193.929.540.136.9-173.230
\n", "
" ], "text/plain": [ " Age Weight HtShoes Ht Seated Arm Thigh Leg hipcenter\n", "0 46 180 187.2 184.9 95.2 36.1 45.3 41.3 -206.300\n", "1 31 175 167.5 165.5 83.8 32.9 36.5 35.9 -178.210\n", "2 23 100 153.6 152.2 82.9 26.0 36.6 31.0 -71.673\n", "3 19 185 190.3 187.4 97.3 37.4 44.1 41.0 -257.720\n", "4 23 159 178.0 174.1 93.9 29.5 40.1 36.9 -173.230" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seatpos = pd.read_csv(\"data/seatpos.csv\")\n", "seatpos.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fit a model with all the predictors:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: hipcenter R-squared: 0.687
Model: OLS Adj. R-squared: 0.600
Method: Least Squares F-statistic: 7.940
Date: Tue, 25 Sep 2018 Prob (F-statistic): 1.31e-05
Time: 15:57:32 Log-Likelihood: -186.73
No. Observations: 38 AIC: 391.5
Df Residuals: 29 BIC: 406.2
Df Model: 8
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 436.4321 166.572 2.620 0.014 95.755 777.109
Age 0.7757 0.570 1.360 0.184 -0.391 1.942
Weight 0.0263 0.331 0.080 0.937 -0.651 0.703
HtShoes -2.6924 9.753 -0.276 0.784 -22.640 17.255
Ht 0.6013 10.130 0.059 0.953 -20.117 21.319
Seated 0.5338 3.762 0.142 0.888 -7.160 8.228
Arm -1.3281 3.900 -0.341 0.736 -9.305 6.649
Thigh -1.1431 2.660 -0.430 0.671 -6.583 4.297
Leg -6.4390 4.714 -1.366 0.182 -16.080 3.202
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.543 Durbin-Watson: 1.769
Prob(Omnibus): 0.762 Jarque-Bera (JB): 0.664
Skew: 0.157 Prob(JB): 0.717
Kurtosis: 2.434 Cond. No. 8.44e+03


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.44e+03. This might indicate that there are
strong multicollinearity or other numerical problems." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: hipcenter R-squared: 0.687\n", "Model: OLS Adj. R-squared: 0.600\n", "Method: Least Squares F-statistic: 7.940\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 1.31e-05\n", "Time: 15:57:32 Log-Likelihood: -186.73\n", "No. Observations: 38 AIC: 391.5\n", "Df Residuals: 29 BIC: 406.2\n", "Df Model: 8 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 436.4321 166.572 2.620 0.014 95.755 777.109\n", "Age 0.7757 0.570 1.360 0.184 -0.391 1.942\n", "Weight 0.0263 0.331 0.080 0.937 -0.651 0.703\n", "HtShoes -2.6924 9.753 -0.276 0.784 -22.640 17.255\n", "Ht 0.6013 10.130 0.059 0.953 -20.117 21.319\n", "Seated 0.5338 3.762 0.142 0.888 -7.160 8.228\n", "Arm -1.3281 3.900 -0.341 0.736 -9.305 6.649\n", "Thigh -1.1431 2.660 -0.430 0.671 -6.583 4.297\n", "Leg -6.4390 4.714 -1.366 0.182 -16.080 3.202\n", "==============================================================================\n", "Omnibus: 0.543 Durbin-Watson: 1.769\n", "Prob(Omnibus): 0.762 Jarque-Bera (JB): 0.664\n", "Skew: 0.157 Prob(JB): 0.717\n", "Kurtosis: 2.434 Cond. No. 8.44e+03\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 8.44e+03. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lmod = smf.ols(formula = 'hipcenter ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n", "lmod.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Look at the correlations of the predictors:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeWeightHtShoesHtSeatedArmThighLeg
Age1.0000.081-0.079-0.090-0.1700.3600.091-0.042
Weight0.0811.0000.8280.8290.7760.6980.5730.784
HtShoes-0.0790.8281.0000.9980.9300.7520.7250.908
Ht-0.0900.8290.9981.0000.9280.7520.7350.910
Seated-0.1700.7760.9300.9281.0000.6250.6070.812
Arm0.3600.6980.7520.7520.6251.0000.6710.754
Thigh0.0910.5730.7250.7350.6070.6711.0000.650
Leg-0.0420.7840.9080.9100.8120.7540.6501.000
\n", "
" ], "text/plain": [ " Age Weight HtShoes Ht Seated Arm Thigh Leg\n", "Age 1.000 0.081 -0.079 -0.090 -0.170 0.360 0.091 -0.042\n", "Weight 0.081 1.000 0.828 0.829 0.776 0.698 0.573 0.784\n", "HtShoes -0.079 0.828 1.000 0.998 0.930 0.752 0.725 0.908\n", "Ht -0.090 0.829 0.998 1.000 0.928 0.752 0.735 0.910\n", "Seated -0.170 0.776 0.930 0.928 1.000 0.625 0.607 0.812\n", "Arm 0.360 0.698 0.752 0.752 0.625 1.000 0.671 0.754\n", "Thigh 0.091 0.573 0.725 0.735 0.607 0.671 1.000 0.650\n", "Leg -0.042 0.784 0.908 0.910 0.812 0.754 0.650 1.000" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seatpos.iloc[:,:-1].corr().round(3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Do the eigendecomposition. Gives similar but not identical results to R calculation." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3.6536714e+06, 2.1479480e+04, 9.0432253e+03, 2.9895260e+02,\n", " 1.4839482e+02, 7.2982092e+00, 8.1173974e+01, 5.3361943e+01])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = seatpos.iloc[:,:-1]\n", "XTX = np.dot(X.T,X)\n", "evals, evecs = np.linalg.eig(XTX)\n", "evals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Calculate the condition numbers;" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1.0000000e+00, 1.7010055e+02, 4.0402304e+02, 1.2221574e+04,\n", " 2.4621286e+04, 5.0062574e+05, 4.5010379e+04, 6.8469608e+04])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "evals[0]/evals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute the variance inflation factors:" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.49948233386392404" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from patsy import dmatrix\n", "X = dmatrix(\"Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg\", data=seatpos, return_type='dataframe')\n", "lmod = sm.OLS(X['Age'],X.drop('Age',axis=1)).fit()\n", "lmod.rsquared" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.9979314770642473" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1/(1-lmod.rsquared)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get them all:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Intercept 741.029693\n", "Age 1.997931\n", "Weight 3.647030\n", "HtShoes 307.429378\n", "Ht 333.137832\n", "Seated 8.951054\n", "Arm 4.496368\n", "Thigh 2.762886\n", "Leg 6.694291\n", "dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "vif = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", "pd.Series(vif, X.columns)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Change in coefficients due to perturbing the response" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InterceptAgeWeightHtShoesHtSeatedArmThighLeg
0436.4321280.7757160.026313-2.6924080.6013450.533752-1.328069-1.143119-6.439046
1389.1463300.751927-0.001767-2.7792681.629064-0.167238-0.757036-1.740402-7.524041
\n", "
" ], "text/plain": [ " Intercept Age Weight HtShoes Ht Seated Arm \\\n", "0 436.432128 0.775716 0.026313 -2.692408 0.601345 0.533752 -1.328069 \n", "1 389.146330 0.751927 -0.001767 -2.779268 1.629064 -0.167238 -0.757036 \n", "\n", " Thigh Leg \n", "0 -1.143119 -6.439046 \n", "1 -1.740402 -7.524041 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seatpos['hiperb'] = seatpos.hipcenter+np.random.normal(scale=10,size=38)\n", "lmod = smf.ols(formula = 'hipcenter ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n", "lmodp = smf.ols(formula = 'hiperb ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n", "pd.DataFrame([lmod.params, lmodp.params])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Change in R-squared" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.6865534760253379, 0.6520350104273814)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lmod.rsquared, lmodp.rsquared" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Correlations of the length variables" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HtShoesHtSeatedArmThighLeg
HtShoes1.0000.9980.9300.7220.7100.896
Ht0.9981.0000.9290.7240.7200.898
Seated0.9300.9291.0000.6030.5760.803
Arm0.7220.7240.6031.0000.6700.723
Thigh0.7100.7200.5760.6701.0000.626
Leg0.8960.8980.8030.7230.6261.000
\n", "
" ], "text/plain": [ " HtShoes Ht Seated Arm Thigh Leg\n", "HtShoes 1.000 0.998 0.930 0.722 0.710 0.896\n", "Ht 0.998 1.000 0.929 0.724 0.720 0.898\n", "Seated 0.930 0.929 1.000 0.603 0.576 0.803\n", "Arm 0.722 0.724 0.603 1.000 0.670 0.723\n", "Thigh 0.710 0.720 0.576 0.670 1.000 0.626\n", "Leg 0.896 0.898 0.803 0.723 0.626 1.000" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame.corr(X.iloc[3:,3:]).round(3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recommended smaller model:" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: hipcenter R-squared: 0.656
Model: OLS Adj. R-squared: 0.626
Method: Least Squares F-statistic: 21.63
Date: Tue, 25 Sep 2018 Prob (F-statistic): 5.13e-08
Time: 15:57:32 Log-Likelihood: -188.49
No. Observations: 38 AIC: 385.0
Df Residuals: 34 BIC: 391.5
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 528.2977 135.313 3.904 0.000 253.309 803.287
Age 0.5195 0.408 1.273 0.212 -0.310 1.349
Weight 0.0043 0.312 0.014 0.989 -0.629 0.638
Ht -4.2119 0.999 -4.216 0.000 -6.242 -2.182
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.932 Durbin-Watson: 1.742
Prob(Omnibus): 0.628 Jarque-Bera (JB): 0.868
Skew: -0.341 Prob(JB): 0.648
Kurtosis: 2.713 Cond. No. 5.36e+03


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.36e+03. This might indicate that there are
strong multicollinearity or other numerical problems." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: hipcenter R-squared: 0.656\n", "Model: OLS Adj. R-squared: 0.626\n", "Method: Least Squares F-statistic: 21.63\n", "Date: Tue, 25 Sep 2018 Prob (F-statistic): 5.13e-08\n", "Time: 15:57:32 Log-Likelihood: -188.49\n", "No. Observations: 38 AIC: 385.0\n", "Df Residuals: 34 BIC: 391.5\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 528.2977 135.313 3.904 0.000 253.309 803.287\n", "Age 0.5195 0.408 1.273 0.212 -0.310 1.349\n", "Weight 0.0043 0.312 0.014 0.989 -0.629 0.638\n", "Ht -4.2119 0.999 -4.216 0.000 -6.242 -2.182\n", "==============================================================================\n", "Omnibus: 0.932 Durbin-Watson: 1.742\n", "Prob(Omnibus): 0.628 Jarque-Bera (JB): 0.868\n", "Skew: -0.341 Prob(JB): 0.648\n", "Kurtosis: 2.713 Cond. No. 5.36e+03\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 5.36e+03. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smf.ols(formula = 'hipcenter ~ Age+Weight+Ht', data=seatpos).fit().summary()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/json": { "Software versions": [ { "module": "Python", "version": "3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)]" }, { "module": "IPython", "version": "6.5.0" }, { "module": "OS", "version": "Darwin 17.7.0 x86_64 i386 64bit" }, { "module": "pandas", "version": "0.23.4" }, { "module": "numpy", "version": "1.15.1" }, { "module": "matplotlib", "version": "2.2.3" }, { "module": "seaborn", "version": "0.9.0" }, { "module": "scipy", "version": "1.1.0" }, { "module": "patsy", "version": "0.5.0" }, { "module": "statsmodels", "version": "0.9.0" } ] }, "text/html": [ "
SoftwareVersion
Python3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)]
IPython6.5.0
OSDarwin 17.7.0 x86_64 i386 64bit
pandas0.23.4
numpy1.15.1
matplotlib2.2.3
seaborn0.9.0
scipy1.1.0
patsy0.5.0
statsmodels0.9.0
Tue Sep 25 15:57:32 2018 BST
" ], "text/latex": [ "\\begin{tabular}{|l|l|}\\hline\n", "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n", "Python & 3.7.0 64bit [Clang 4.0.1 (tags/RELEASE\\_401/final)] \\\\ \\hline\n", "IPython & 6.5.0 \\\\ \\hline\n", "OS & Darwin 17.7.0 x86\\_64 i386 64bit \\\\ \\hline\n", "pandas & 0.23.4 \\\\ \\hline\n", "numpy & 1.15.1 \\\\ \\hline\n", "matplotlib & 2.2.3 \\\\ \\hline\n", "seaborn & 0.9.0 \\\\ \\hline\n", "scipy & 1.1.0 \\\\ \\hline\n", "patsy & 0.5.0 \\\\ \\hline\n", "statsmodels & 0.9.0 \\\\ \\hline\n", "\\hline \\multicolumn{2}{|l|}{Tue Sep 25 15:57:32 2018 BST} \\\\ \\hline\n", "\\end{tabular}\n" ], "text/plain": [ "Software versions\n", "Python 3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)]\n", "IPython 6.5.0\n", "OS Darwin 17.7.0 x86_64 i386 64bit\n", "pandas 0.23.4\n", "numpy 1.15.1\n", "matplotlib 2.2.3\n", "seaborn 0.9.0\n", "scipy 1.1.0\n", "patsy 0.5.0\n", "statsmodels 0.9.0\n", "Tue Sep 25 15:57:32 2018 BST" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%load_ext version_information\n", "%version_information pandas, numpy, matplotlib, seaborn, scipy, patsy, statsmodels" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }