diff --git a/docs/assignment.md b/docs/assignment.md
index 96c76c1..fd687fe 100644
--- a/docs/assignment.md
+++ b/docs/assignment.md
@@ -1 +1,3 @@
 # Assignment
+<p><a href="_static/praca_domowa_2023.pdf">Praca domowa 2023 - opis zadania</a></p> 
+<p><a href="_static/dane.zip">Praca domowa 2023 - dane do pracy domowej</a></p> 
diff --git a/docs/lecture.md b/docs/lecture.md
index 6cd8e8d..1762cc3 100644
--- a/docs/lecture.md
+++ b/docs/lecture.md
@@ -6,3 +6,4 @@
 <p><a href="_static/aw_03.pdf">Lecture 3</a></p>
 <p><a href="_static/aw_04.pdf">Lecture 4</a></p>
 <p><a href="_static/aw_05.pdf">Lecture 5</a></p>
+<p><a href="_static/aw_06.pdf">Lecture 6</a></p>
\ No newline at end of file
diff --git a/docs/lectures/aw_06.pdf b/docs/lectures/aw_06.pdf
new file mode 100644
index 0000000..53140fa
Binary files /dev/null and b/docs/lectures/aw_06.pdf differ
diff --git a/docs/lectures/dane.zip b/docs/lectures/dane.zip
new file mode 100644
index 0000000..b01c0cc
Binary files /dev/null and b/docs/lectures/dane.zip differ
diff --git a/docs/lectures/praca_domowa_2023.pdf b/docs/lectures/praca_domowa_2023.pdf
new file mode 100644
index 0000000..7d53651
Binary files /dev/null and b/docs/lectures/praca_domowa_2023.pdf differ
diff --git a/src/multidim/notebooks/05_mds_outliers.ipynb b/src/multidim/notebooks/05_mds_outliers.ipynb
index 1daa095..574e3ac 100644
--- a/src/multidim/notebooks/05_mds_outliers.ipynb
+++ b/src/multidim/notebooks/05_mds_outliers.ipynb
@@ -40,7 +40,7 @@
    "outputs": [],
    "source": [
     "#load_stata(STATA_PATH, STATA_TYPE)\n",
-    "load_stata(\"D:\\Stata18\", \"se\")"
+    "load_stata(STATA_PATH, \"se\")"
    ]
   },
   {
diff --git a/src/multidim/notebooks/06_factor.ipynb b/src/multidim/notebooks/06_factor.ipynb
new file mode 100644
index 0000000..0f928de
--- /dev/null
+++ b/src/multidim/notebooks/06_factor.ipynb
@@ -0,0 +1,862 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2a48c727",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Analiza Wielowymiarowa - zajecia 6 - Analiza czynnikowa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32b52329",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.utils import resolve_stata, load_stata\n",
+    "\n",
+    "STATA_PATH, STATA_TYPE = resolve_stata(version = 18, stype = \"se\")\n",
+    "# make sure they are proper ones\n",
+    "STATA_PATH, STATA_TYPE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40123e5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_stata(STATA_PATH, STATA_TYPE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1df4db13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Załadowanie bibliotek\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "#https://scikit-learn.org/stable/modules/decomposition.html#fa\n",
+    "#https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FactorAnalysis.html\n",
+    "from sklearn import decomposition\n",
+    "from sklearn.decomposition import FactorAnalysis\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "#https://factor-analyzer.readthedocs.io/en/latest/factor_analyzer.html\n",
+    "from factor_analyzer import FactorAnalyzer\n",
+    "from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity\n",
+    "from factor_analyzer.factor_analyzer import calculate_kmo\n",
+    "\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.decomposition import TruncatedSVD \n",
+    "from scipy.linalg import svd\n",
+    "\n",
+    "from multidim.funs import corr_mat\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d84d8e6-cda6-49dc-b436-02eca4735e97",
+   "metadata": {},
+   "source": [
+    "## Analiza Czynnikowa"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e91bc599",
+   "metadata": {},
+   "source": [
+    "### Przyklad 1 -- zaczerpniety z materialow pani Natalii Nehrebeckiej"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c97bb918",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Ponizej przedstawimy analize czynnikowa metoda najwiekszej wiarogodnosci. Zaleca\n",
+    "sie na wstepie analize skladowych glownych, aby ustalic przyblizona liczbe czynnikow*/\n",
+    "\n",
+    "/*Dane wejsciowe do analizy czynnikowej moga miec postac macierzy kowariancji lub korelacji.\n",
+    "Posluzymy sie danymi pochodzacymi z badania przeprowadzonego na 123 osobach cierpiacych\n",
+    "z powodu silnych napadow bolu. Poproszono ich o wydanie opinii na skali od 1 do 6\n",
+    "(1-calkowicie sie zgadzam, 6-nie zgadzam sie) na temat 9 oswiadczen na temat bolu.\n",
+    "\n",
+    "Ponizej lista zmiennych:\n",
+    "1. To, czy bede cierpial z powodu bolu w przyszlosci zalezy od lekarza.\n",
+    "2. To, czy bede cierpial z powodu bolu, zalezy zwykle od tego, czy cos zrobilem lub nie\n",
+    "   zrobilem.\n",
+    "3. To, czy bede cierpial z powodu bolu, zalezy od tego, co zrobi dla mnie lekarz.\n",
+    "4. Nie moge poradzic sobie z bolem, dopoki nie skorzystam z pomocy medycznej.\n",
+    "5. Jesli czuje bol, to jest to spowodowane tym, iz nie wykonywalem odpowiednich cwiczen lub\n",
+    "   nieprawidlowo sie odzywialem.\n",
+    "6. Bol jest wynikiem zaniedbania.\n",
+    "7. Jestem calkowicie odpowiedzialny za moj bol.\n",
+    "8. Pozbycie sie bolu jest kontrolowane przez doktora.\n",
+    "9. Ludzie, ktorzy nigdy nie cierpia z powodu bolu, sa szczesciarzami.*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf289888-8057-4819-bd98-29ad7de08189",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata -qui\n",
+    "matrix C = ( 1.0000, -0.0385, 0.6066, 0.4507, 0.0320, -0.2877, -0.2974, 0.4526, 0.2952 \\/*\n",
+    "*/ -0.0385, 1.0000, -0.0693, -0.1167, 0.4881, 0.4271, 0.3045, -0.3090, -0.1704 \\/*\n",
+    "*/ 0.6066, -0.0693, 1.000, 0.5916, 0.0317, -0.1336, -0.2404, 0.5886, 0.3165 \\ /*\n",
+    "*/0.4507, -0.1167, 0.5916, 1.0000, -0.0802,  -0.2073, -0.1850, 0.6286, 0.3680 \\ /*\n",
+    "*/0.0320, 0.4881, 0.0317, -0.0802, 1.0000, 0.4731, 0.4138, -0.1397, -0.2367 \\ /*\n",
+    "*/-0.2877, 0.4271, -0.1336, -0.2073, 0.4731, 1.0000, 0.6346, -0.1329, -0.1541 \\ /*\n",
+    "*/-0.2974, 0.3045, -0.2404, -0.1850, 0.4138, 0.6346, 1.0000, -0.2599, -0.2893 \\ /*\n",
+    "*/0.4526, -0.3090, 0.5886, 0.6286, -0.1397, -0.1329, -0.2599, 1.0000, 0.4047 \\ /*\n",
+    "*/0.2952, -0.1704, 0.3165, 0.3680, -0.2367, -0.1541, -0.2893, 0.4047, 1.0000 )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "725b97fd-8d47-439f-9260-b97bd403c342",
+   "metadata": {},
+   "source": [
+    "Test Ilorazu Wiarogodności (ang. LR test) https://www.jstor.org/stable/2287400"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4efab7f6-a854-4dc4-8456-a961b42a3d1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Nie musimy nigdzie okreslac, iz na wejsciu mamy dane w postaci macierzy\n",
+    "korelacji. Rozpoczniemy od 2 czynnikow*/\n",
+    "/*Jesli wykorzystujemy dane w postaci macierzy korelacji, musimy okreslic liczbe obserwacji*/\n",
+    "\n",
+    "factormat C, n(123) names(p1 p2 p3 p4 p5 p6 p7 p8 p9) fac(2) ml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b8064bb2-0926-4d81-9936-81423d61f07e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "C = np.array([[ 1.0000, -0.0385, 0.6066, 0.4507, 0.0320, -0.2877, -0.2974, 0.4526, 0.2952],\n",
+    "[-0.0385, 1.0000, -0.0693, -0.1167, 0.4881, 0.4271, 0.3045, -0.3090, -0.1704],\n",
+    "[0.6066, -0.0693, 1.000, 0.5916, 0.0317, -0.1336, -0.2404, 0.5886, 0.3165],\n",
+    "[0.4507, -0.1167, 0.5916, 1.0000, -0.0802,  -0.2073, -0.1850, 0.6286, 0.3680],\n",
+    "[0.0320, 0.4881, 0.0317, -0.0802, 1.0000, 0.4731, 0.4138, -0.1397, -0.2367],\n",
+    "[-0.2877, 0.4271, -0.1336, -0.2073, 0.4731, 1.0000, 0.6346, -0.1329, -0.1541],\n",
+    "[-0.2974, 0.3045, -0.2404, -0.1850, 0.4138, 0.6346, 1.0000, -0.2599, -0.2893],\n",
+    "[0.4526, -0.3090, 0.5886, 0.6286, -0.1397, -0.1329, -0.2599, 1.0000, 0.4047],\n",
+    "[0.2952, -0.1704, 0.3165, 0.3680, -0.2367, -0.1541, -0.2893, 0.4047, 1.0000]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c89669eb-a7bd-450e-9c6d-8fcba38342a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.linalg.eigvals(C) # macierz oddatnio okreslona"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0405f2b3-b008-4cdc-ad78-1b42910ae576",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa = FactorAnalyzer(rotation = None, is_corr_matrix = True, n_factors = 2, method = 'ml')\n",
+    "fa.fit(C)\n",
+    "\n",
+    "# GET EIGENVALUES\n",
+    "# Large values of the communalities will indicate that the fitting hyperplane (factors) is rather accurately reproducing the correlation matrix. \n",
+    "fa.get_uniquenesses(), fa.get_communalities()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b929250-f3aa-48ef-9f32-46f97962e6be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa.get_uniquenesses() + fa.get_communalities()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "930a859f-7cca-4352-8ca8-01d71be84a90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa.get_factor_variance()[0], fa.get_eigenvalues()[1][0:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1d9273bf-7487-48c5-bf64-9165d87e3c39",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nams = [ \"p\" + i for i in list(\"123456789\")]\n",
+    "loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "loadings.index = nams\n",
+    "loadings.columns = [\"Factor1\", \"Factor2\", \"uniquenesses\"] \n",
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5fc8b2c6",
+   "metadata": {},
+   "source": [
+    "Truncated SVD Directly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dea69757",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tsvd = TruncatedSVD(2)\n",
+    "tsvd.fit(C)\n",
+    "loadings_direct_svd = tsvd.components_.T * np.sqrt(tsvd.explained_variance_)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "001e9054",
+   "metadata": {},
+   "source": [
+    "Correlation between FA loadings and direct Truncated SVD loadings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0be0520",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.diag(\n",
+    "    corr_mat(\n",
+    "        loadings_direct_svd,\n",
+    "        fa.loadings_\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f099d12-a22b-427b-9057-2071180da802",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "\n",
+    "/*Wyniki testu -- okazuje sie ze 2 czyniki nie wystarcza (p-value = 0.0000<0.05)*/\n",
+    "//Probujemy z 3 czynnikami\n",
+    "\n",
+    "factormat C, n(123) names(p1 p2 p3 p4 p5 p6 p7 p8 p9) fac(3) ml\n",
+    "\n",
+    "/*Na poziomie istotnosci 0,05 brak podstaw do odrzucenia H0 zakladajacej, ze model\n",
+    "trzyczynnikowy jest adekwatny (wystarczajacy). p-value 0.1055>0.05*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fac579d2-b14b-4edf-8e35-12542bf71dad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa = FactorAnalyzer(rotation = None, is_corr_matrix = True, n_factors = 3, method = 'ml')\n",
+    "fa.fit(C)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "114e4ecf-9c86-4d8f-972e-91a3c94f7541",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa.get_factor_variance()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cf19e06-3788-44c8-b866-7fd9f9364e44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa.get_eigenvalues()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c19b8c7-f645-472a-b641-557fdbd8d086",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.Series(fa.get_eigenvalues()[1]).plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c946d1de-2f68-4cd0-8157-9abfe69801b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nams = [ \"p\" + i for i in list(\"123456789\")]\n",
+    "loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "loadings.index = nams\n",
+    "loadings.columns = [\"Factor1\", \"Factor2\", \"Factor3\", \"uniquenesses\"] \n",
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e12fe4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Sprobujemy nadac czynnikom interpretacje. Przeprowadzamy rotacje czynnikow*/\n",
+    "rotate, varimax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c8b4750-515e-4de4-8af8-8b405feca4e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa = FactorAnalyzer(rotation='varimax', is_corr_matrix = True, n_factors = 3, method = 'ml')\n",
+    "fa.fit(C)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "022e0b74-ce17-47fb-9f2b-44d41937a461",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#GET EIGENVALUES\n",
+    "fa.get_uniquenesses(),fa.get_communalities()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d3f528e-996d-4b3c-a2c3-7fa7dc49e756",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nams = [ \"p\" + i for i in list(\"123456789\")]\n",
+    "loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "loadings.index = nams\n",
+    "loadings.columns = [\"Factor1\", \"Factor2\", \"Factor3\", \"uniquenesses\"] \n",
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bdd798f-527f-48c7-9922-23490ca7a5bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa.rotation_matrix_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b166b19-77fc-43f5-85ff-365908907953",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.matmul(fa.rotation_matrix_.T, fa.rotation_matrix_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4a7b9f2-885e-404f-bec8-4eb3000ba251",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://www.tandfonline.com/doi/abs/10.1080/10705510701301891?journalCode=hsem20"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0b147ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*pierwszy czynnik - stwierdzenia 1, 3, 4 i 8 - wszystkie zwiazane z lekarzami; mozemy  zinterpretowac jako \"kontrola lekarska bolu\"\n",
+    "  drugi czynnik - stwierdzenia 6 i 7 - bol jako wynik wlasnych dzialan\n",
+    "  trzeci czynnik - stwierdzenia 2 i 5 - znow bol jako wynik wlasnych dzialan.*/\n",
+    "\n",
+    "estat smc\n",
+    "/*oszacowanie czesci wspólnej ->\"communality\" (jaka czesc zmiennej Xi  jest zwiazana z pozostalymi zmiennymi X)\n",
+    "szacowna jako kwadrat wspolczynnika korelacji wielorakiej\n",
+    "danej zmiennej z pozostalymi (czyli R2 z regresji tej zmiennej na pozostale)*/\n",
+    "\n",
+    "estat kmo\n",
+    "\n",
+    "/*statystyka adekwatnosci proby Kaiser-Meyer-Olkin.\n",
+    "Metoda ta polega na porownaniu korelacji i czastkowych korelacji pomiedzy zmiennymi.\n",
+    "Gdy korelacja czastkowa jest relatywnie wysoka w stosunku do zwyklej korelacji to KMO jest male,\n",
+    "co oznacza ze uzyskanie adekwatnego rozwiazania w przestrzeni malego wymiaru jest niewykonalne.\n",
+    "\n",
+    "Wielkosci wspolczynnika:\n",
+    "0.00 to 0.49 nie do przyjecia\n",
+    "0.50 to 0.59 bardzo slaby\n",
+    "0.60 to 0.69 slaby\n",
+    "0.70 to 0.79 umiarkowany\n",
+    "0.80 to 0.89 dobry\n",
+    "0.90 to 1.00 znakomity*/\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4839275e-2092-4a5e-80e9-362cbce99fcf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate_bartlett_sphericity(C) not for correlation matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44d01513-be7c-4b08-b292-c19dad083990",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate_kmo(C) not for correlation matrix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6078e46",
+   "metadata": {},
+   "source": [
+    "### Przyklad 2 -- Indeks kapitalu spolecznego i problemy z analiza czynnikowa"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "acaf4150-ae71-43d1-a509-30b589a372de",
+   "metadata": {},
+   "source": [
+    "#### Probujemy stworzyc indeks kapitalu spolecznego"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd0aee94-451d-40ae-b534-fc4d53bd005a",
+   "metadata": {},
+   "source": [
+    "Dane oryginalnie pochodzily z badania World Values Survey"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43507f16-99c6-469f-bcfb-50fc5db4c359",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.datasets import load_indeks_spol\n",
+    "F = load_indeks_spol()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35d74f92-5b9f-4ea5-94fd-076c1122cf86",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%mata -m F\n",
+    "st_matrix(\"F\", F)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e708caf-a0e8-407c-b4a4-61f6e057beba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "//METODA NAJWIEKSZEJ WIARYGODNOSCI\n",
+    "factormat F, n(35312) names(imp_family imp_friends imp_politics imp_church member_dis political_dis trust_family trust_ppers trust_neighbour trust_arel trust_firsttime trust_anation fair conf_church conf_forces conf_press conf_tv conf_labour conf_police conf_courts conf_govern conf_parties conf_parl religion_freq tradition help local) fac(7) ml\n",
+    "//za malo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1fcde1ec-1bd7-4b92-9e6d-34fb18e59a41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nams = [\"imp_family\", \"imp_friends\", \"imp_politics\", \"imp_church\", \"member_dis\", \"political_dis\", \"trust_family\",\n",
+    "        \"trust_ppers\", \"trust_neighbour\", \"trust_arel\", \"trust_firsttime\", \"trust_anation\", \"fair\", \"conf_church\",\n",
+    "        \"conf_forces\", \"conf_press\", \"conf_tv\", \"conf_labour\", \"conf_police\", \"conf_courts\", \"conf_govern\", \"conf_parties\",\n",
+    "        \"conf_parl\", \"religion_freq\", \"tradition\", \"help\", \"local\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60d7a783-7a42-467f-a657-e96aca25fe38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_factors = 7\n",
+    "fa = FactorAnalyzer(rotation=None, is_corr_matrix = True, n_factors = n_factors, method = 'ml')\n",
+    "fa.fit(F)\n",
+    "loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "loadings.index = nams\n",
+    "loadings.columns = [ \"Factor\" + str(i + 1) for i in range(n_factors)] + [\"uniquenesses\"] \n",
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7643a085-d510-4b2c-a1f1-dfb706ab57d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.Series(fa.get_eigenvalues()[1]).plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4363894-1a9d-4eb5-a5cd-a3bcb23ff77e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata \n",
+    "factormat F, n(35312) names(imp_family imp_friends imp_politics imp_church member_dis political_dis trust_family trust_ppers trust_neighbour trust_arel trust_firsttime trust_anation fair conf_church conf_forces conf_press conf_tv conf_labour conf_police conf_courts conf_govern conf_parties conf_parl religion_freq tradition help local) fac(9) ml\n",
+    "//HEYWOOD CASE -- negative variance estimate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1bf940f2-4e0f-4836-8a71-b638d8cb946d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_factors = 9\n",
+    "fa = FactorAnalyzer(rotation=None, is_corr_matrix = True, n_factors = n_factors, method = 'ml')\n",
+    "fa.fit(F)\n",
+    "loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "loadings.index = nams\n",
+    "loadings.columns = [ \"Factor\" + str(i + 1) for i in range(n_factors)] + [\"uniquenesses\"] \n",
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c002bcf-1fdb-4d6e-85eb-d53269117348",
+   "metadata": {},
+   "source": [
+    "### Analiza czynnikowa (FA) przy wykorzytaniu Analizy głównych składowych (PCA) (optymalizacja)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9529f72f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%%stata\n",
+    "#factormat F, n(35312) names(imp_family imp_friends imp_politics imp_church member_dis political_dis trust_family trust_ppers trust_neighbour trust_arel trust_firsttime trust_anation fair conf_church conf_forces conf_press conf_tv conf_labour conf_police conf_courts conf_govern conf_parties conf_parl religion_freq tradition help local) fac(4) pcf\n",
+    "#//za malo\n",
+    "#\n",
+    "#factormat F, n(35312) names(imp_family imp_friends imp_politics imp_church member_dis political_dis trust_family trust_ppers trust_neighbour trust_arel trust_firsttime trust_anation fair conf_church conf_forces conf_press conf_tv conf_labour conf_police conf_courts conf_govern conf_parties conf_parl religion_freq tradition help local) fac(27) pcf\n",
+    "#//tez nie"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a69f19ab-2eb3-4559-a12f-7068e4887f60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# n_factors = 10\n",
+    "# fa = FactorAnalyzer(rotation=None, is_corr_matrix = True, n_factors = n_factors, method = 'principal')\n",
+    "# fa.fit(F)\n",
+    "# loadings = pd.DataFrame(np.column_stack((fa.loadings_, fa.get_uniquenesses())))\n",
+    "# loadings.index = nams\n",
+    "# loadings.columns = [ \"Factor\" + str(i + 1) for i in range(n_factors)] + [\"uniquenesses\"] \n",
+    "# loadings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f5549f40",
+   "metadata": {},
+   "source": [
+    "## Przyklad 3 - Scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "253695bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from sklearn.decomposition import FactorAnalysis\n",
+    "from sklearn.preprocessing import StandardScaler, scale\n",
+    "from scipy.stats import rankdata\n",
+    "\n",
+    "from factor_analyzer import FactorAnalyzer\n",
+    "from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity\n",
+    "from factor_analyzer.factor_analyzer import calculate_kmo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7dcf1d83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.datasets import load_seul1988\n",
+    "seul1988 = load_seul1988()\n",
+    "seul1988 = seul1988.sample(seul1988.shape[0])\n",
+    "seul1988 = seul1988.query(\"wynik >= 6000\")\n",
+    "seul1988[\"Subject\"] = list(range(1, seul1988.shape[0]+1, 1))\n",
+    "seul1988_copy = seul1988.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d6c9a4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "seul1988 = seul1988[~np.isnan(seul1988).any(axis=1)]\n",
+    "seul1988_normal = scale(seul1988)\n",
+    "fa = FactorAnalysis(n_components = 3, tol = 0.001, svd_method = \"lapack\")\n",
+    "X = seul1988_normal[:, :-2]\n",
+    "fa.fit(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24b90c06",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comps = fa.transform(X)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14d23f7a",
+   "metadata": {},
+   "source": [
+    "Interpretacja dla kazdego czynnika ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dce0cffe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cc = corr_mat(comps, seul1988.iloc[:, :-2]).T\n",
+    "cc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13871178",
+   "metadata": {},
+   "source": [
+    "Jest duzo roznych metod budowania rankingu."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa4ac1f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores1a = comps[:,0]\n",
+    "order1a = rankdata(scores1a, \"max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbec394e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores1b = comps.sum(axis = 1)\n",
+    "order1b = rankdata(scores1b, \"max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a01aae5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores2 = fa.score_samples(X)\n",
+    "order2 = rankdata(scores2, \"max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a6e13b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fa2 = FactorAnalysis(n_components = 1, tol = 0.001, svd_method = \"lapack\")\n",
+    "fa2.fit(X)\n",
+    "comps = fa2.transform(X)\n",
+    "scores1c = comps.ravel()\n",
+    "order1c = rankdata(scores1c, \"max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5440c686",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = pd.DataFrame({\n",
+    "    'subject': seul1988['Subject'],\n",
+    "    'wynik': seul1988['wynik'], \n",
+    "    'scores_one': scores1c,\n",
+    "    'rank_one': order1c,\n",
+    "    'scores_first': scores1a,\n",
+    "    'rank_first': order1a,\n",
+    "    'scores_sum': scores1b,\n",
+    "    'rank_sum': order1b,\n",
+    "    'scores_loglike': scores2,\n",
+    "    'rank_loglike': order2\n",
+    "})\n",
+    "res['rank_wynik'] = rankdata(res[\"wynik\"].values, \"max\")\n",
+    "res"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffa88034",
+   "metadata": {},
+   "source": [
+    "Correlation between different rankings - ABS is needed as we do not know sense (zwrot)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3aa48fd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.funs import corr_mat\n",
+    "rank_cols = [\"rank_one\", \"rank_first\", \"rank_sum\", \"rank_loglike\", \"rank_wynik\"]\n",
+    "# ABS\n",
+    "np.abs(np.corrcoef(res.loc[:,rank_cols].T))[-1,:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a777eb20",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "aa9d594cf5340018b76a70eeac417329866dfddee009818da806e7b1b5f80883"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/multidim/notebooks/06_pca.ipynb b/src/multidim/notebooks/06_pca.ipynb
new file mode 100644
index 0000000..a3b26c8
--- /dev/null
+++ b/src/multidim/notebooks/06_pca.ipynb
@@ -0,0 +1,964 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2a48c727",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Analiza Wielowymiarowa - zajecia 6 - Analiza głównych składowych (PCA)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32b52329",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.utils import resolve_stata, load_stata\n",
+    "\n",
+    "STATA_PATH, STATA_TYPE = resolve_stata(version = 17, stype = \"se\")\n",
+    "# make sure they are proper ones\n",
+    "STATA_PATH, STATA_TYPE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40123e5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_stata(STATA_PATH, STATA_TYPE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1df4db13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Załadowanie bibliotek\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.decomposition import PCA\n",
+    "from scipy.linalg import svd\n",
+    "\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "import statsmodels.api as sm\n",
+    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
+    "\n",
+    "\n",
+    "from multidim.funs import corr_mat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5417c75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(1234)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9979aab1-fd4c-4c58-9f07-632efd3bff5c",
+   "metadata": {},
+   "source": [
+    "## Przykład 1 - Dekompozycja głównych składowych (SVD) a wartości własne (Eigenvalues) and Internals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f8470d0-1e77-409a-a2e3-8578580f53a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_set = datasets.load_iris()\n",
+    "iris_x = iris_set[\"data\"]\n",
+    "# CENTRING\n",
+    "iris_x_centred = iris_set[\"data\"] - np.mean(iris_set[\"data\"], axis = 0)\n",
+    "#SCALING\n",
+    "iris_x_scaled = (iris_set[\"data\"] - np.mean(iris_set[\"data\"], axis = 0))/np.std(iris_set[\"data\"], axis = 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e811df0-5797-460c-90db-c938d06d36ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# almost always centring\n",
+    "# in most scenerios full scaling\n",
+    "# correlation matrix = scaling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e126cc6a-d481-4add-b637-c51ad30c24dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when X CENTRED -> X`X/n = COV(X)\n",
+    "np.allclose((iris_x_centred.T @ iris_x_centred) / (iris_x_centred.shape[0] - 1), np.cov(iris_x_centred.T, ddof = 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b06ff9c-8155-4a8f-8c68-37c5e60334e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when X SCALED -> Corr(X) = COV(X)\n",
+    "np.allclose(np.corrcoef(iris_x_scaled.T), np.cov(iris_x_scaled.T, ddof=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "debad822-40b7-424b-a40a-bb48e6cdabb1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# SVD -> X = U * S * Vt\n",
+    "u, s, vh = np.linalg.svd(iris_x_scaled, full_matrices = False)\n",
+    "np.allclose(iris_x_scaled, u @ np.diag(s) @ vh)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99d7348e-76c1-4d53-ba40-52bdb0103fd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Eigen -> C = V * S * Vt\n",
+    "s_eigen, v_eigen = np.linalg.eigh(np.corrcoef(iris_x.T))\n",
+    "np.allclose(np.corrcoef(iris_x.T), v_eigen @ np.diag(s_eigen) @ v_eigen.T)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac221e9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Unit disc\n",
+    "# https://en.wikipedia.org/wiki/Singular_value_decomposition#/media/File:Singular-Value-Decomposition.svg\n",
+    "# SVD could be run for square or long shape\n",
+    "corr_iris_x = np.corrcoef(iris_x.T)\n",
+    "u_s, s_s, vh_s = np.linalg.svd(corr_iris_x, full_matrices = False)\n",
+    "assert np.allclose(\n",
+    "    np.diag(np.ones((4))) @ corr_iris_x, \n",
+    "    corr_iris_x\n",
+    ")\n",
+    "assert np.allclose(\n",
+    "    np.diag(np.ones((4))) @ u_s @ np.diag(s_s) @ vh_s, \n",
+    "    corr_iris_x\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b3de809-2cd4-4406-a539-fc25a3c23128",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# principal components\n",
+    "pca_svd = pd.DataFrame(iris_x_scaled @ vh.T)\n",
+    "pca_svd_alt = pd.DataFrame(u @ np.diag(s))\n",
+    "assert np.allclose(pca_svd, pca_svd_alt)\n",
+    "pca_eigen = pd.DataFrame(iris_x_scaled @ v_eigen)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92a51609",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# direction is stable (kierunek) but not sense (zwrot)\n",
+    "# from sklearn.utils.extmath import svd_flip\n",
+    "# Sign correction to ensure deterministic output from SVD.\n",
+    "# Adjusts the columns of u and the rows of v such that the loadings in the\n",
+    "# columns in u that are largest in absolute value are always positive."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "126b29fa",
+   "metadata": {},
+   "source": [
+    "Ładunki czynnikowe (ang. Loadings )\n",
+    "\n",
+    "Korelacje między oryginalnymi zmiennymi a składowymi głównymi (correlations between the original variables and the principal components)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ece668ca-3ff2-4687-8533-ea53512909ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pca_nams = ['PC1', 'PC2', 'PC3', 'PC4']\n",
+    "feature_nams = iris_set[\"feature_names\"]\n",
+    "# loadings from sklearn\n",
+    "pca = PCA(n_components = 4)\n",
+    "X = pca.fit_transform(iris_x_scaled)\n",
+    "loadings1 = pd.DataFrame(pca.components_.T * np.sqrt(pca.explained_variance_), index = pca_nams, columns = feature_nams)\n",
+    "# Loadings from numpy svd decomposition\n",
+    "loadings2 = pd.DataFrame(vh.T * s/np.sqrt(iris_x_scaled.shape[0] - 1), index = pca_nams, columns = feature_nams)\n",
+    "# Loadings by directly getting correlations between variables and prinicpal components\n",
+    "loadings3 = pd.DataFrame(corr_mat(pca_svd, iris_x_scaled).values, columns = pca_nams, index = feature_nams)\n",
+    "loadings1.T, loadings2.T, loadings3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c148d158-2316-4537-af2c-b6ee0f031614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# e.g. first PCA component == original data * first rotation\n",
+    "assert np.allclose(\n",
+    "    np.round(pca_svd.iloc[:,0].values, 2), \n",
+    "    np.round(vh.T[:, 0] @ iris_x_scaled.T, 2)\n",
+    ")\n",
+    "ev = pd.DataFrame(vh.T)\n",
+    "ev.index = iris_set.feature_names\n",
+    "ev"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ab7ae6bb",
+   "metadata": {},
+   "source": [
+    "Korelacje pomiędzy czynnikami głównymi (ang. correlation between principal components)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d866375c-ac54-4099-b77c-4535b4ac91e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pca_svd.corr(), pca_eigen.corr()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46c2e1e7",
+   "metadata": {},
+   "source": [
+    "Wartości własne (ang. Eigenvalues)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02808a2d-3b65-462c-99f0-b56783822723",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_eigen = [i[0] for i in sorted(enumerate(s_eigen), reverse=True, key = lambda x: x[1])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0611c6f-ab43-4ffe-898e-c3d9e5c2e606",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(s**2 / (iris_x_scaled.shape[0]), s_eigen[index_eigen])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0da92302",
+   "metadata": {},
+   "source": [
+    "Pierwsze wiersze czynników głównych (ang. First rows of principal components)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "482ee345-da56-427b-b67d-e9bd706999a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pca_svd.head(), pca_eigen[index_eigen].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b80815e0-c5ba-44a6-ab03-2be923f3599b",
+   "metadata": {},
+   "source": [
+    "### PCA wykres na podstawie 2 pierwszych komponentow\n",
+    "\n",
+    "Prosze pamietac ze informacja o odmianach iris-ow (y) nie byla wykorzystana przy obliczaniu PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48cbc312-366e-4946-8d91-c0e21ed7a437",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pca_svd.plot(\n",
+    "    x = 0,\n",
+    "    y = 1, \n",
+    "    color = pd.Series(iris_set[\"target\"]).map({0: \"b\", 1: \"r\", 2: \"y\"}), \n",
+    "    kind = \"scatter\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b58d0e58",
+   "metadata": {},
+   "source": [
+    "Iloraz wiariancji pokazuje udział kolejnych komponentow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5494a6fc-0bd5-4606-a775-3aefc6829df0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s_eigen[index_eigen] / np.sum(s_eigen),\\\n",
+    "s**2 / np.sum(s**2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5eda8fd6-30fd-426f-a102-341716622462",
+   "metadata": {},
+   "source": [
+    "## Przyklad 2 -- seul1988 -- wyniki dziesiecioboju mezczyzn w Seulu w 1988"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d48de20f-c1bf-42a4-9be4-25cb9ee922a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.datasets import load_seul1988\n",
+    "seul1988 = load_seul1988()\n",
+    "seul1988_copy = seul1988.copy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffce172b-2765-49ff-bb3a-9d24de7e8e9f",
+   "metadata": {},
+   "source": [
+    "Sprawdzamy czy w zbiorze danych sa obserwacje nietypowe - zmienna wynik"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5826a1e-ee1d-4a93-b523-7a22c229fef8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata -d seul1988_copy -force\n",
+    "des"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22c503b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seul1988.shape, seul1988.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d03dbf91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "graph box wynik, title(\"Wykres pudelkowy\")\n",
+    "/*Istnieje jedna potencjalna obserwacja nietypowa.*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e6d72e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seul1988[\"wynik\"].plot(kind = \"box\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1b6b789-daab-4eee-92ff-26c0a9b0c0f3",
+   "metadata": {},
+   "source": [
+    "Przygotowujemy zbior danych do analizy -- usuwamy obserwacje potencjalnie nietypowa oraz  przemnazamy wyniki uzyskane w konkurencjach biegowych przez -1, tak aby najnizsza wartosc oznaczala najgorszy wynik"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5607ab08-a701-445b-959c-9853da16d4d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata -qui\n",
+    "drop if wynik < 6000\n",
+    "gen bieg100_1 = bieg100 * (-1)\n",
+    "gen bieg400_1 = bieg400 * (-1)\n",
+    "gen plotki_1 = plotki * (-1)\n",
+    "gen bieg1500_1 = bieg1500 * (-1)\n",
+    "\n",
+    "replace bieg100 = bieg100_1\n",
+    "replace bieg400 = bieg400_1\n",
+    "replace bieg1500 = bieg1500_1\n",
+    "replace plotki = plotki_1\n",
+    "drop bieg100_1 bieg400_1 plotki_1 bieg1500_1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88b71fcb-5f8f-403e-af00-632ea6043e4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seul1988 = seul1988.query(\"wynik >= 6000\")\n",
+    "seul1988[\"bieg100\"] = seul1988.bieg100 * -1\n",
+    "seul1988[\"bieg400\"] = seul1988.bieg400 * -1\n",
+    "seul1988[\"bieg1500\"] = seul1988.bieg1500 * -1\n",
+    "seul1988[\"plotki\"] = seul1988.plotki * -1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ffeb575-ebfe-4781-a0a6-98c04c51eb99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Przeprowadzamy analize glownych skladowych. Domyslnie wykorzystywana jest macierz korelacji*/\n",
+    "corr bieg100-bieg1500\n",
+    "summarize bieg100-bieg1500"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be7d3bbf-bac3-4db7-b286-13c8e289465f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Przeprowadzamy analize glownych skladowych. Domyslnie wykorzystywana jest macierz korelacji*/\n",
+    "pca bieg100-bieg1500\n",
+    "\n",
+    "/*Tylko pierwsze dwie glowne skladowe maja wartosci wlasne wieksze od 1 (co jest rownoznaczne z wyjasniona wariancja wieksza niz srednia) i wyjasniaja ponad\n",
+    "60% calkowitej wariancji.*/\n",
+    "\n",
+    "/*Interpretacja dwoch pierwszych skladowych:\n",
+    "pierwsza -- mierzy osiagniety wynik (wszystkie wspolczynniki dodatnie)\n",
+    "druga -- ma wysokie wartosci dla konkurencji zwiazanych z rzucaniem i sila (kula, dysk, oszczep) i\n",
+    "duze ujemne dla wytrzymalosciowych (biegi na 400 i 1500 metrow)*/\n",
+    "\n",
+    "/*Okreslenie liczby skladowych glownych, ktore dobrze opisuja wariancje wyjsciowych zmiennych*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9db8f841-a5a9-4aad-a8ee-5b8ac169af18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols_x = ['bieg100', 'skok_w_dal', 'rzut_kula', 'skok_wzwyz', \n",
+    "          'bieg400', 'plotki', 'rzut_dysk', 'tyczka', 'oszczep', 'bieg1500']\n",
+    "seul1988_x = seul1988[cols_x]\n",
+    "centred_seul1988_x = pd.DataFrame(seul1988_x - np.mean(seul1988_x), columns=cols_x)\n",
+    "scaled_seul1988_x = pd.DataFrame(seul1988_x - np.mean(seul1988_x), columns=cols_x) / np.std(seul1988_x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4adf576e-3d83-4be7-9a0e-ecb672605702",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# seul1988_x.corr()\n",
+    "# seul1988_x.describe().T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb55dd35-ef04-495f-9038-e3914b77f2e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/decomposition/_pca.py\n",
+    "pca = PCA(svd_solver = 'full')\n",
+    "# SVD\n",
+    "# X = U * S * Vt\n",
+    "U, s, Vt = np.linalg.svd(scaled_seul1988_x, full_matrices = False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc4bec22",
+   "metadata": {},
+   "source": [
+    "Główne składowe (ang. Principal components)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c209621-5957-4c4e-b2e9-af9669bcaa90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# principal components\n",
+    "# X_principals = X * V = U * S * Vt * V = U * S\n",
+    "pca_components = pca.fit_transform(scaled_seul1988_x)\n",
+    "assert np.allclose(np.abs(np.round(pca_components, 3)), np.abs(np.round(np.matmul(scaled_seul1988_x,  Vt.T), 3)))\n",
+    "assert np.allclose(pca.explained_variance_ratio_, s**2 / np.sum(s**2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5a65e03-1690-4374-93a8-79b4b244b0ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# rotation matrix\n",
+    "# np.abs needed\n",
+    "assert np.allclose(np.abs(np.round(Vt.T, 3)), np.abs(np.round(pca.components_.T, 3)))\n",
+    "pd.DataFrame(Vt.T)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd5832e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%stata predict comp1-comp2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9d56cd6-1d90-4135-8e3a-7e88dc82bef9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Wykres osypiska*/\n",
+    "screeplot, mean /*Wykres kolejnych wartosci wlasnych; \"mean\" - zostaje naniesiona prosta odpowiadajaca sredniej z wartosci wlasnych*/\n",
+    "screeplot, ci /*Dodatkowo zostaje nalozony przedzial ufnosci*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e62e5da-2cb7-4229-8198-a6eac4bba6ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.Series(s**2 / seul1988_x.shape[0]).plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba70d846-f71a-4cc1-b72a-c8884ea0dd27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Wykres wspólczynników dla poszczegolnych zmiennych - przydatne przy interpretacji skladowych glownych*/\n",
+    "loadingplot /*Tylko dla dwóch pierwszych wektorów wlasnych*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac3a5367-7e37-4743-aab4-65de787a8d43",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loadings = pd.DataFrame(Vt.T * s / np.sqrt(seul1988_x.shape[0] - 1))\n",
+    "ax = loadings.plot.scatter(0,1)\n",
+    "cols = seul1988_x.columns.tolist()\n",
+    "for i in range(loadings.shape[0]):\n",
+    "    ax.text(x = loadings.iloc[i,0], y = loadings.iloc[i,1], s = cols[i])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80cdc522",
+   "metadata": {},
+   "source": [
+    "Wykres dwoch pierwszych czynników głównych\n",
+    "First two prinicipal components vs not known during extraction \"wynik\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b088bb6-b516-4a1e-b39e-919949af027b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(pca_components).plot.scatter(0, 1, c = seul1988[\"wynik\"], colormap = \"winter\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8a31923-1ffb-4495-a0f2-fea751522a99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%%stata\n",
+    "#mvtest normality  bieg100-bieg1500, stats(all)\n",
+    "#pca bieg100-bieg1500, cov vce(normal)\n",
+    "#testparm bieg100-bieg1500, equal eq(Comp1)\n",
+    "#\n",
+    "#/*niestety, nie jest spelnione zalozenie o wielowymiarowym rozkladzie normalnym*/\n",
+    "#\n",
+    "#/*Dalej analizujemy tylko dwie pierwsze glowne skladowe. */"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "854e72f5-2e44-4458-b3fb-806850f0926d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%%stata\n",
+    "#/*Jeszcze wyznaczmy macierz korelacji*/\n",
+    "#pwcorr bieg100-bieg1500 wynik comp2 comp1\n",
+    "#sum bieg100-bieg1500 wynik comp2 comp1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6bdd4c8-e16f-4e32-be7d-cbcd4fa92ccc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "//Weryfikacja poprawności przeprowadzonej analizy głównych składowych (PCA)\n",
+    "\n",
+    "/*Aby sprawdzic, czy wystarczajaco dobrze odtworzylismy zmiennosc wykorzystamy \"reszty\" -\n",
+    "roznice pomiedzy zaobserwowanymi korelacjami a tymi odtworzonymi za pomoca tylko kilku pierwszych skladowych*/\n",
+    "\n",
+    "pca  bieg100 - bieg1500, components(3) /*Rozwiazanie skladajace sie z 3 pierwszych skladowych glownych*/\n",
+    "estat residual, fit /*Macierz \"resztowa\" oraz macierz korelacji odtworzona za pomoca 3 pierwszych skladowych*/\n",
+    "corr bieg100 - bieg1500"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dab5a5f5-c28a-465c-9775-8c7baf803566",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cor_original = np.corrcoef(scaled_seul1988_x.T)\n",
+    "# set to zero not used principal components\n",
+    "s3 = np.concatenate([s[:3], [0] * 7]) \n",
+    "# Eigen -> C = V * (S**2/ (n-1)) * Vt\n",
+    "cor_pca3 = Vt.T @ (np.diag(s3 ** 2) / (seul1988.shape[0] - 1)) @ Vt\n",
+    "pd.DataFrame(cor_original).round(3),\\\n",
+    "pd.DataFrame(cor_pca3).round(3),\\\n",
+    "pd.DataFrame(cor_original - cor_pca3).round(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "615b26da-f093-4c3e-aa84-2463d9ba7758",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*Sprawdzenie, czy zmienne wejsciowe sa ze soba skorelowane.\n",
+    "Czyli sprawdzamy czy ma sens w ogole przeprwowadzenie analizy skladowych glownych*/\n",
+    "\n",
+    "/*1. Pierwszy sposob to analiza R^2*/\n",
+    "estat smc\n",
+    "reg  bieg100 skok_w_dal - bieg1500  /*sprawdzenie, skad sie biora te wartosci*/\n",
+    "\n",
+    "//WNIOSEK: \"Skok wzwyz\" wykazuje najmniejsza zaleznosc z pozostalymi konkurencjami"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cdbfbfc4-d34f-42d9-a2a3-d07c3154b858",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*2. Drugi sposób:  \"anti-image correlation\" (minus korelacja czastkowa).*/\n",
+    "\n",
+    "/*Korelacja czastkowa pokazuje \"czysta\" zaleznosc miedzy dwoma zmiennymi, traktujac pozostale jako stale.\n",
+    "Dazymy do uzyskania malej korelacji czastkowej!\n",
+    "Jesli wiele tych korelacji jest relatywnie duzych, to zaleznosc pomiedzy niektorymi zmiennymi nie zalezy od poziomu pozostalych zmiennych.\n",
+    "Tym samym moze byc trudno uzyskac wlasciwe rozwiazanie malego wymiaru. */\n",
+    "\n",
+    "pca  bieg100 - bieg1500\n",
+    "estat anti, nocov /*\"anti-image correlation\" (minus korelacja czastkowa)*/\n",
+    "pcorr  bieg100  skok_w_dal - bieg1500 /*Korelacje czastkowe zmiennej bieg100 z pozostalymi*/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4abb002a-e035-466a-87fe-03c0cddac5d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata\n",
+    "/*3. Trzeci sposób: statystyka adekwatnosci proby Kaiser-Meyer-Olkin.*/\n",
+    "estat kmo\n",
+    "\n",
+    "/*Bez  zmiennej \"skok_wzwyz\"*/\n",
+    "pca bieg100 - kula bieg400 - bieg1500\n",
+    "estat kmo /*Wielkosc statystyki ulegla nieznacznej poprawie*/\n",
+    "\n",
+    "/*Wykres wspolczynnikow dla poszczegolnych zmiennych -- przydatne przy interpretacji skladowych glownych*/\n",
+    "pca  bieg100 - bieg1500\n",
+    "loadingplot, comp(3) combined /*3 pierwsze skladowe glowne*/\n",
+    "\n",
+    "pca bieg100 - kula bieg400 - bieg1500\n",
+    "loadingplot, comp(3) combined /*bez \"skoku wzwyz\"*/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e6747ac-5d0f-4bbc-9512-768200f58c5a",
+   "metadata": {},
+   "source": [
+    "## Przykład 3 - Regresja Liniowa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b8be9cde-e17d-472c-a025-f41f417ee8bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = seul1988_x.copy()\n",
+    "X[\"const\"] = 1\n",
+    "y = seul1988[\"wynik\"]\n",
+    "ols = sm.OLS(y, X)\n",
+    "ols_result = ols.fit()\n",
+    "print(ols_result.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af806163-9973-4d8a-9ec6-3c9ab4180980",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# VIF\n",
+    "pd.DataFrame({\n",
+    "    \"variable\": seul1988_x.columns, \n",
+    "    \"VIF\": [variance_inflation_factor(seul1988_x, i) for i in range(seul1988_x.shape[1])]\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "480db3f3-06db-4b32-9d18-b1bf06dd32d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_skok = centred_seul1988_x[['skok_w_dal', 'skok_wzwyz', 'tyczka']]\n",
+    "x_bieg = centred_seul1988_x[['bieg100','bieg400', 'bieg1500', 'plotki']]\n",
+    "x_atlet = centred_seul1988_x[['rzut_kula','rzut_dysk', 'oszczep']]\n",
+    "\n",
+    "svd_skok = np.linalg.svd(x_skok, full_matrices = False)\n",
+    "svd_bieg = np.linalg.svd(x_bieg, full_matrices = False)\n",
+    "svd_atlet = np.linalg.svd(x_atlet, full_matrices = False)\n",
+    "\n",
+    "print(svd_skok[1]**2/np.sum(svd_skok[1]**2))\n",
+    "print(svd_bieg[1]**2/np.sum(svd_bieg[1]**2))\n",
+    "print(svd_atlet[1]**2/np.sum(svd_atlet[1]**2))\n",
+    "\n",
+    "pca_skok = x_skok @ svd_skok[2].T\n",
+    "pca_bieg = x_bieg @ svd_bieg[2].T\n",
+    "pca_atlet = x_atlet @ svd_atlet[2].T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a0a39b2-2a54-4dde-804e-0ed4891dd874",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "svd_skok[2].T[:, 0],\\\n",
+    "svd_bieg[2].T[:, 0],\\\n",
+    "svd_atlet[2].T[:, 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d09ffe8-8e26-4c78-b57c-b52a7ad34d1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We do not now the sense (zwrot)\n",
+    "X = pd.concat([pca_skok.iloc[:,0]*-1, pca_bieg.iloc[:,0], pca_atlet.iloc[:,0]*-1], axis = 1)\n",
+    "X.columns = [\"skok\", \"bieg\", \"atlet\"]\n",
+    "print(pd.DataFrame({\"variable\": X.columns, \"VIF\": [variance_inflation_factor(X, i) for i in range(X.shape[1])]}))\n",
+    "X[\"const\"] = 1\n",
+    "y = seul1988[\"wynik\"]\n",
+    "ols = sm.OLS(y, X)\n",
+    "ols_result = ols.fit()\n",
+    "print(ols_result.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "059094ba-81c0-46f0-baed-4b5b5a7d2607",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Interpretacja?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dc3dddb4",
+   "metadata": {},
+   "source": [
+    "## Przyklad 4 -- Objawy depresji (Dane z Diagnoza Spoleczna 2011) PCA"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c018614",
+   "metadata": {},
+   "source": [
+    "1. Wczytaj zbior danych \"depresja.dta\"\n",
+    "2. Przeprowadz analize glownych skladowych.\n",
+    "3. Utworz wykres osypiska Ile powinismy wyroznic glownych skladowych?\n",
+    "4. Przeprowadz jeszcze raz analize, ale tylko dla ustalonej w punkcie 3 liczby glownych skladowych. Zapisz glowne skladowe oraz wyjsciowe zmienne w pliku.\n",
+    "5. Zinterpretuj glowne skladowe. Interpretacje poprzyj macierza korelacji.\n",
+    "6. Sprawdz, czy zmienne wejsciowe sa ze soba skorelowane (zweryfikuj, czy ma sens w ogole przeprwowadzenie analizy skladowych glownych).\n",
+    "    Uzyj: R^2, \"anti-image correlation\" oraz statystyki adekwatnosci Kaisera-Meyera-Olkina.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4809038e-0403-4cec-a24e-5e94830b9eff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from multidim.datasets import load_depresja\n",
+    "depresja = load_depresja()\n",
+    "depresja_copy = depresja.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f55bf0b-cbbe-448c-9148-4ebaf79d84d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%stata -d depresja_copy -force\n",
+    "des"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "363b60c2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "aa9d594cf5340018b76a70eeac417329866dfddee009818da806e7b1b5f80883"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}