diff --git a/notebooks/q29_cleaning.ipynb b/notebooks/q29_cleaning.ipynb index fb484d9..cb6ce57 100644 --- a/notebooks/q29_cleaning.ipynb +++ b/notebooks/q29_cleaning.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0b6dc5f5-e2df-47e2-ab43-3c49600290b1", "metadata": {}, "outputs": [], @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "250889a9-f63c-45c1-9bf6-eb0bd1073a89", "metadata": {}, "outputs": [], @@ -87,7 +87,9 @@ "cell_type": "code", "execution_count": null, "id": "27fe45d8-cfbd-47e2-b295-939cc8f4372e", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# Full Model Pipeline\n", @@ -120,8 +122,10 @@ "print('Filtering to new IDs...', end = '')\n", "# grab only the IDs of current interest\n", "completed_ids = df_done.id.unique()\n", - "#df = df_open[~df_open.id.isin(completed_ids)].reset_index(drop=True)\n", - "df = df_open.copy()\n", + "df = df_open[~df_open.id.isin(completed_ids)].reset_index(drop=True)\n", + "\n", + "# remove empty responses\n", + "df = df[~ (pd.isnull(df.aq29lang) | (df.aq29lang.str.len()==0))].reset_index(drop=True)\n", "\n", "df['response'] = df['aq29lang']\n", "print('Done.')\n",