diff --git a/content/01-python_v_stata/.ipynb_checkpoints/syntax-checkpoint.ipynb b/content/01-python_v_stata/.ipynb_checkpoints/syntax-checkpoint.ipynb index 629b4fe..a704b86 100644 --- a/content/01-python_v_stata/.ipynb_checkpoints/syntax-checkpoint.ipynb +++ b/content/01-python_v_stata/.ipynb_checkpoints/syntax-checkpoint.ipynb @@ -506,105 +506,6 @@ "Below, we discuss all the features currently supported by `Stata2Python`, along with providing example usages." ] }, - { - "cell_type": "markdown", - "id": "d9ace3e5-b5e7-473e-8bdb-cf6559c2fcc1", - "metadata": {}, - "source": [ - "#### T-tests\n", - "\n", - "This function helps users determine the code for running [t-tests](https://www.jmp.com/en_us/statistics-knowledge-portal/t-test.html) in Python. Examples include:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b48ee104-aa0a-4461-a1bc-9f40c6c90996", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(df['guard'])\n", - "df_1 = df[df['guard'] == catvar_vals[0]]\n", - "df_2 = df[df['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=True, nan_policy='propagate')\n" - ] - } - ], - "source": [ - "stata2python(\"ttest wage, by(guard)\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "87876a2e-0b2c-40d5-a64a-c8ec25941aa1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(nba['guard'])\n", - "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", - "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')\n" - ] - } - ], - "source": [ - "stata2python(\"ttest wage, by(guard) unequal\", \"nba\")" - ] - }, - { - "cell_type": "markdown", - "id": "89b477e9-82df-43d2-bc97-dc466c1bcb92", - "metadata": {}, - "source": [ - "Assuming you have all the correct packages installed, you can directly copy paste this code to see the output. For example, " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "a485ec9f-25f6-4c24-9ee8-380b751ef6a1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "TtestResult(statistic=2.1432820571177977, pvalue=0.03299634994484977, df=266.3682612357414)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(nba['guard'])\n", - "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", - "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')" - ] - }, { "cell_type": "markdown", "id": "bcac0225-7131-4a7d-9997-47958c10c172", @@ -658,7 +559,7 @@ "id": "9357b734-d25a-4008-b0c9-86240daecff2", "metadata": {}, "source": [ - "Copy-pasting the output into Python to verify it works." + "Assuming you have all the correct packages installed, you can directly copy paste this code to see the output. For example, " ] }, { @@ -1049,6 +950,14 @@ "stata2python(\"describe\",\"nba\")" ] }, + { + "cell_type": "markdown", + "id": "6b4f8ca6-2d2f-4cdd-8bb4-89cef1d52631", + "metadata": {}, + "source": [ + "Verifying that the output works:" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -1326,7 +1235,7 @@ } ], "source": [ - "import pandas as pd # Verifying that the output works\n", + "import pandas as pd \n", "nba.describe()" ] }, @@ -1541,6 +1450,105 @@ "pollution.hist(column='co2pc',bins=80);" ] }, + { + "cell_type": "markdown", + "id": "d9ace3e5-b5e7-473e-8bdb-cf6559c2fcc1", + "metadata": {}, + "source": [ + "#### T-tests\n", + "\n", + "This function helps users determine the code for running [t-tests](https://www.jmp.com/en_us/statistics-knowledge-portal/t-test.html) in Python. Examples include:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b48ee104-aa0a-4461-a1bc-9f40c6c90996", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(df['guard'])\n", + "df_1 = df[df['guard'] == catvar_vals[0]]\n", + "df_2 = df[df['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=True, nan_policy='propagate')\n" + ] + } + ], + "source": [ + "stata2python(\"ttest wage, by(guard)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "87876a2e-0b2c-40d5-a64a-c8ec25941aa1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(nba['guard'])\n", + "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", + "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')\n" + ] + } + ], + "source": [ + "stata2python(\"ttest wage, by(guard) unequal\", \"nba\")" + ] + }, + { + "cell_type": "markdown", + "id": "89b477e9-82df-43d2-bc97-dc466c1bcb92", + "metadata": {}, + "source": [ + "Verifying that the output works:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a485ec9f-25f6-4c24-9ee8-380b751ef6a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=2.1432820571177977, pvalue=0.03299634994484977, df=266.3682612357414)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(nba['guard'])\n", + "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", + "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')" + ] + }, { "cell_type": "markdown", "id": "14245a39-b06f-406b-a597-fc4ffcffead0", diff --git a/content/01-python_v_stata/syntax.ipynb b/content/01-python_v_stata/syntax.ipynb index 629b4fe..a704b86 100644 --- a/content/01-python_v_stata/syntax.ipynb +++ b/content/01-python_v_stata/syntax.ipynb @@ -506,105 +506,6 @@ "Below, we discuss all the features currently supported by `Stata2Python`, along with providing example usages." ] }, - { - "cell_type": "markdown", - "id": "d9ace3e5-b5e7-473e-8bdb-cf6559c2fcc1", - "metadata": {}, - "source": [ - "#### T-tests\n", - "\n", - "This function helps users determine the code for running [t-tests](https://www.jmp.com/en_us/statistics-knowledge-portal/t-test.html) in Python. Examples include:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b48ee104-aa0a-4461-a1bc-9f40c6c90996", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(df['guard'])\n", - "df_1 = df[df['guard'] == catvar_vals[0]]\n", - "df_2 = df[df['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=True, nan_policy='propagate')\n" - ] - } - ], - "source": [ - "stata2python(\"ttest wage, by(guard)\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "87876a2e-0b2c-40d5-a64a-c8ec25941aa1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(nba['guard'])\n", - "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", - "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')\n" - ] - } - ], - "source": [ - "stata2python(\"ttest wage, by(guard) unequal\", \"nba\")" - ] - }, - { - "cell_type": "markdown", - "id": "89b477e9-82df-43d2-bc97-dc466c1bcb92", - "metadata": {}, - "source": [ - "Assuming you have all the correct packages installed, you can directly copy paste this code to see the output. For example, " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "a485ec9f-25f6-4c24-9ee8-380b751ef6a1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "TtestResult(statistic=2.1432820571177977, pvalue=0.03299634994484977, df=266.3682612357414)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy import stats\n", - "### First, we must filter the DataFrame to obtain the right values\n", - "catvar_vals = np.unique(nba['guard'])\n", - "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", - "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", - "### Then, we can run our t-test\n", - "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')" - ] - }, { "cell_type": "markdown", "id": "bcac0225-7131-4a7d-9997-47958c10c172", @@ -658,7 +559,7 @@ "id": "9357b734-d25a-4008-b0c9-86240daecff2", "metadata": {}, "source": [ - "Copy-pasting the output into Python to verify it works." + "Assuming you have all the correct packages installed, you can directly copy paste this code to see the output. For example, " ] }, { @@ -1049,6 +950,14 @@ "stata2python(\"describe\",\"nba\")" ] }, + { + "cell_type": "markdown", + "id": "6b4f8ca6-2d2f-4cdd-8bb4-89cef1d52631", + "metadata": {}, + "source": [ + "Verifying that the output works:" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -1326,7 +1235,7 @@ } ], "source": [ - "import pandas as pd # Verifying that the output works\n", + "import pandas as pd \n", "nba.describe()" ] }, @@ -1541,6 +1450,105 @@ "pollution.hist(column='co2pc',bins=80);" ] }, + { + "cell_type": "markdown", + "id": "d9ace3e5-b5e7-473e-8bdb-cf6559c2fcc1", + "metadata": {}, + "source": [ + "#### T-tests\n", + "\n", + "This function helps users determine the code for running [t-tests](https://www.jmp.com/en_us/statistics-knowledge-portal/t-test.html) in Python. Examples include:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b48ee104-aa0a-4461-a1bc-9f40c6c90996", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(df['guard'])\n", + "df_1 = df[df['guard'] == catvar_vals[0]]\n", + "df_2 = df[df['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=True, nan_policy='propagate')\n" + ] + } + ], + "source": [ + "stata2python(\"ttest wage, by(guard)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "87876a2e-0b2c-40d5-a64a-c8ec25941aa1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(nba['guard'])\n", + "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", + "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')\n" + ] + } + ], + "source": [ + "stata2python(\"ttest wage, by(guard) unequal\", \"nba\")" + ] + }, + { + "cell_type": "markdown", + "id": "89b477e9-82df-43d2-bc97-dc466c1bcb92", + "metadata": {}, + "source": [ + "Verifying that the output works:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a485ec9f-25f6-4c24-9ee8-380b751ef6a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=2.1432820571177977, pvalue=0.03299634994484977, df=266.3682612357414)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "### First, we must filter the DataFrame to obtain the right values\n", + "catvar_vals = np.unique(nba['guard'])\n", + "df_1 = nba[nba['guard'] == catvar_vals[0]]\n", + "df_2 = nba[nba['guard'] == catvar_vals[1]]\n", + "### Then, we can run our t-test\n", + "stats.ttest_ind(df_1['wage'], df_2['wage'], equal_var=False, nan_policy='propagate')" + ] + }, { "cell_type": "markdown", "id": "14245a39-b06f-406b-a597-fc4ffcffead0",