{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# [`textstat`](https://pypi.python.org/pypi/textstat/)\n",
    "\n",
    "    pip install textstat\n",
    "    \n",
    "> Python package to calculate statistics from text, which helps to decide readability, complexity and grade level of a particular corpus."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "    from articles import *; from pandas import *; o = __name__ == '__main__'\n",
    "    if o: \n",
    "        %load_ext literacy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "    from textstat.textstat import textstat\n",
       "    measures = a[{\n",
       "        k: f for k in dir(textstat) if not k.startswith('_') for f in [getattr(textstat, k)] if callable(f)\n",
       "    }]"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "    from textstat.textstat import textstat\n",
    "    measures = a[{\n",
    "        k: f for k in dir(textstat) if not k.startswith('_') for f in [getattr(textstat, k)] if callable(f)\n",
    "    }]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Playing games has always been thought to be important to the development of well-balanced and creative children; however, what part, if any, they should play in the lives of adults has never been researched that deeply. I believe that playing games is every bit as important for adults as for children. Not only is taking time out to play games with our children and other adults valuable to building interpersonal relationships but is also a wonderful way to release built up tension."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cell_type</th>\n",
       "      <td>markdown</td>\n",
       "      <td>code</td>\n",
       "      <td>code</td>\n",
       "      <td>markdown</td>\n",
       "      <td>code</td>\n",
       "      <td>code</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>execution_count</th>\n",
       "      <td>NaN</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>metadata</th>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "      <td>{'collapsed': True}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>outputs</th>\n",
       "      <td>NaN</td>\n",
       "      <td>[{'metadata': {}, 'output_type': 'display_data...</td>\n",
       "      <td>[{'metadata': {}, 'output_type': 'display_data...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[{'execution_count': 10, 'metadata': {}, 'outp...</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>source</th>\n",
       "      <td># [`textstat`](https://pypi.python.org/pypi/te...</td>\n",
       "      <td>from articles import *; from pandas import...</td>\n",
       "      <td>from textstat.textstat import textstat\\n\\n...</td>\n",
       "      <td>Playing games has always been thought to be im...</td>\n",
       "      <td>\\n\\n    if o: \\n\\n        df = a.Path().re...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>automated_readability_index</th>\n",
       "      <td>29.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>avg_letter_per_word</th>\n",
       "      <td>8.17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.93</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>avg_sentence_length</th>\n",
       "      <td>24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>avg_sentence_per_word</th>\n",
       "      <td>0.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>avg_syllables_per_word</th>\n",
       "      <td>2.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_count</th>\n",
       "      <td>196</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>404</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>coleman_liau_index</th>\n",
       "      <td>30.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.61</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dale_chall_readability_score</th>\n",
       "      <td>13.38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.57</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>difficult_words</th>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>flesch_kincaid_grade</th>\n",
       "      <td>19.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>flesch_reading_ease</th>\n",
       "      <td>-3.64</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gunning_fog</th>\n",
       "      <td>33.2667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.0663</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lexicon_count</th>\n",
       "      <td>24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>linsear_write_formula</th>\n",
       "      <td>17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>polysyllabcount</th>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sentence_count</th>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smog_index</th>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>syllable_count</th>\n",
       "      <td>53.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>122.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text_standard</th>\n",
       "      <td>29th and 30th grade</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9th and 10th grade</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                              0  \\\n",
       "cell_type                                                              markdown   \n",
       "execution_count                                                             NaN   \n",
       "metadata                                                                     {}   \n",
       "outputs                                                                     NaN   \n",
       "source                        # [`textstat`](https://pypi.python.org/pypi/te...   \n",
       "automated_readability_index                                                29.1   \n",
       "avg_letter_per_word                                                        8.17   \n",
       "avg_sentence_length                                                          24   \n",
       "avg_sentence_per_word                                                      0.04   \n",
       "avg_syllables_per_word                                                      2.2   \n",
       "char_count                                                                  196   \n",
       "coleman_liau_index                                                         30.4   \n",
       "dale_chall_readability_score                                              13.38   \n",
       "difficult_words                                                              13   \n",
       "flesch_kincaid_grade                                                       19.7   \n",
       "flesch_reading_ease                                                       -3.64   \n",
       "gunning_fog                                                             33.2667   \n",
       "lexicon_count                                                                24   \n",
       "linsear_write_formula                                                        17   \n",
       "polysyllabcount                                                               4   \n",
       "sentence_count                                                                1   \n",
       "smog_index                                                                    0   \n",
       "syllable_count                                                             53.1   \n",
       "text_standard                                               29th and 30th grade   \n",
       "\n",
       "                                                                              1  \\\n",
       "cell_type                                                                  code   \n",
       "execution_count                                                              11   \n",
       "metadata                                                                     {}   \n",
       "outputs                       [{'metadata': {}, 'output_type': 'display_data...   \n",
       "source                            from articles import *; from pandas import...   \n",
       "automated_readability_index                                                 NaN   \n",
       "avg_letter_per_word                                                         NaN   \n",
       "avg_sentence_length                                                         NaN   \n",
       "avg_sentence_per_word                                                       NaN   \n",
       "avg_syllables_per_word                                                      NaN   \n",
       "char_count                                                                  NaN   \n",
       "coleman_liau_index                                                          NaN   \n",
       "dale_chall_readability_score                                                NaN   \n",
       "difficult_words                                                             NaN   \n",
       "flesch_kincaid_grade                                                        NaN   \n",
       "flesch_reading_ease                                                         NaN   \n",
       "gunning_fog                                                                 NaN   \n",
       "lexicon_count                                                               NaN   \n",
       "linsear_write_formula                                                       NaN   \n",
       "polysyllabcount                                                             NaN   \n",
       "sentence_count                                                              NaN   \n",
       "smog_index                                                                  NaN   \n",
       "syllable_count                                                              NaN   \n",
       "text_standard                                                               NaN   \n",
       "\n",
       "                                                                              2  \\\n",
       "cell_type                                                                  code   \n",
       "execution_count                                                               2   \n",
       "metadata                                                                     {}   \n",
       "outputs                       [{'metadata': {}, 'output_type': 'display_data...   \n",
       "source                            from textstat.textstat import textstat\\n\\n...   \n",
       "automated_readability_index                                                 NaN   \n",
       "avg_letter_per_word                                                         NaN   \n",
       "avg_sentence_length                                                         NaN   \n",
       "avg_sentence_per_word                                                       NaN   \n",
       "avg_syllables_per_word                                                      NaN   \n",
       "char_count                                                                  NaN   \n",
       "coleman_liau_index                                                          NaN   \n",
       "dale_chall_readability_score                                                NaN   \n",
       "difficult_words                                                             NaN   \n",
       "flesch_kincaid_grade                                                        NaN   \n",
       "flesch_reading_ease                                                         NaN   \n",
       "gunning_fog                                                                 NaN   \n",
       "lexicon_count                                                               NaN   \n",
       "linsear_write_formula                                                       NaN   \n",
       "polysyllabcount                                                             NaN   \n",
       "sentence_count                                                              NaN   \n",
       "smog_index                                                                  NaN   \n",
       "syllable_count                                                              NaN   \n",
       "text_standard                                                               NaN   \n",
       "\n",
       "                                                                              3  \\\n",
       "cell_type                                                              markdown   \n",
       "execution_count                                                             NaN   \n",
       "metadata                                                                     {}   \n",
       "outputs                                                                     NaN   \n",
       "source                        Playing games has always been thought to be im...   \n",
       "automated_readability_index                                                15.5   \n",
       "avg_letter_per_word                                                        4.93   \n",
       "avg_sentence_length                                                        27.3   \n",
       "avg_sentence_per_word                                                      0.04   \n",
       "avg_syllables_per_word                                                      1.5   \n",
       "char_count                                                                  404   \n",
       "coleman_liau_index                                                        11.61   \n",
       "dale_chall_readability_score                                              10.57   \n",
       "difficult_words                                                              29   \n",
       "flesch_kincaid_grade                                                       12.8   \n",
       "flesch_reading_ease                                                       52.23   \n",
       "gunning_fog                                                             27.0663   \n",
       "lexicon_count                                                                82   \n",
       "linsear_write_formula                                                        15   \n",
       "polysyllabcount                                                               4   \n",
       "sentence_count                                                                3   \n",
       "smog_index                                                                  9.7   \n",
       "syllable_count                                                            122.4   \n",
       "text_standard                                                9th and 10th grade   \n",
       "\n",
       "                                                                              4  \\\n",
       "cell_type                                                                  code   \n",
       "execution_count                                                              10   \n",
       "metadata                                                                     {}   \n",
       "outputs                       [{'execution_count': 10, 'metadata': {}, 'outp...   \n",
       "source                            \\n\\n    if o: \\n\\n        df = a.Path().re...   \n",
       "automated_readability_index                                                 NaN   \n",
       "avg_letter_per_word                                                         NaN   \n",
       "avg_sentence_length                                                         NaN   \n",
       "avg_sentence_per_word                                                       NaN   \n",
       "avg_syllables_per_word                                                      NaN   \n",
       "char_count                                                                  NaN   \n",
       "coleman_liau_index                                                          NaN   \n",
       "dale_chall_readability_score                                                NaN   \n",
       "difficult_words                                                             NaN   \n",
       "flesch_kincaid_grade                                                        NaN   \n",
       "flesch_reading_ease                                                         NaN   \n",
       "gunning_fog                                                                 NaN   \n",
       "lexicon_count                                                               NaN   \n",
       "linsear_write_formula                                                       NaN   \n",
       "polysyllabcount                                                             NaN   \n",
       "sentence_count                                                              NaN   \n",
       "smog_index                                                                  NaN   \n",
       "syllable_count                                                              NaN   \n",
       "text_standard                                                               NaN   \n",
       "\n",
       "                                                5  \n",
       "cell_type                                    code  \n",
       "execution_count                               NaN  \n",
       "metadata                      {'collapsed': True}  \n",
       "outputs                                        []  \n",
       "source                                             \n",
       "automated_readability_index                   NaN  \n",
       "avg_letter_per_word                           NaN  \n",
       "avg_sentence_length                           NaN  \n",
       "avg_sentence_per_word                         NaN  \n",
       "avg_syllables_per_word                        NaN  \n",
       "char_count                                    NaN  \n",
       "coleman_liau_index                            NaN  \n",
       "dale_chall_readability_score                  NaN  \n",
       "difficult_words                               NaN  \n",
       "flesch_kincaid_grade                          NaN  \n",
       "flesch_reading_ease                           NaN  \n",
       "gunning_fog                                   NaN  \n",
       "lexicon_count                                 NaN  \n",
       "linsear_write_formula                         NaN  \n",
       "polysyllabcount                               NaN  \n",
       "sentence_count                                NaN  \n",
       "smog_index                                    NaN  \n",
       "syllable_count                                NaN  \n",
       "text_standard                                 NaN  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "    \n",
    "    if o: \n",
    "        df = a.Path().read_text().loads()[this()['cells']][DataFrame](\n",
    "            __import__('Untitled183').__file__)\n",
    "        df.source = df.source.apply('\\n'.join)\n",
    "        df = df[df.cell_type == 'markdown'].source.apply(measures).apply(Series).pipe(lambda df_: concat([df, df_], axis=1))\n",
    "    o and df.T"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}