Result of step1.py as of March 2023:
assets = ['/datasets/autos.csv', '/datasets/arrivals.xlsx', '/datasets/calls.csv', '/datasets/data.csv', '/datasets/data_final.csv', '/datasets/drive-data.csv', '/datasets/feedback.csv', '/datasets/food_market_stats.csv', '/datasets/ds_test', '/datasets/hogwarts_points.csv', '/datasets/ids.xlsx', '/datasets/internet.csv', '/datasets/logs.csv', '/datasets/messages.csv', '/datasets/metrica_data.csv', '/datasets/music-data-2.csv', '/datasets/music_log.csv', '/datasets/music_log_upd_col.csv', '/datasets/music_log_upd.csv', '/datasets/music_log_upd_nan.csv', '/datasets/music_project.csv', '/datasets/position.csv', '/datasets/rating.csv', '/datasets/real_estate_data.csv', '/datasets/returned.csv', '/datasets/revenue.csv', '/datasets/seo_data.xlsx', '/datasets/stats_by_age.csv', '/datasets/stats_by_age_employment.csv', '/datasets/stock_upd.csv', '/datasets/stock.xlsx', '/datasets/support.csv', '/datasets/support_log.csv', '/datasets/support_log_grouped.csv', '/datasets/support_upd.csv', '/datasets/tariffs.csv', '/datasets/users.csv', '/datasets/visits.csv', '/datasets/real_estate_data_old.csv', '/datasets/query_1.csv', '/datasets/query_3.csv', '/datasets/query_last.csv', '/datasets/real_estate_data_old', '/datasets/ad_data', '/datasets/site_data', '/datasets/funnel_prod_events.csv', '/datasets/pfunnel_demo.csv', '/datasets/events.csv', '/datasets/yandex_metrika_logs.csv', '/datasets/ad_data.csv', '/datasets/site_data.csv', '/datasets/funnel_cr_example.csv', '/datasets/funnel_daily.csv', '/datasets/Анализ бизнес-показателей, Тема 1, урок 5', '/datasets/ad_data_2.csv', '/datasets/site_data_2.csv', '/datasets/orders_data_for_cohort.csv', '/datasets/game_purchases.csv', '/datasets/revenue_pivot.csv', '/datasets/user_activity.csv', '/datasets/work_user_activity.csv', '/datasets/churn_rate.csv', '/datasets/behavioral_cohorts.csv', '/datasets/coffee_home.csv', '/datasets/ltv_costs_1.csv', '/datasets/ltv_orders_1.csv', '/datasets/ltv_costs_2.csv', '/datasets/ltv_orders_2.csv', '/datasets/users_data.csv', '/datasets/visits_log.csv', '/datasets/costs.csv', '/datasets/orders_log.csv', '/datasets/games_data.csv', '/datasets/languages.csv', '/datasets/music_log_upd_en.csv', '/datasets/lesson_data_3-3.csv', '/datasets/data_3-3.csv', '/datasets/hypothesis_4-1-1.csv', '/datasets/hypothesis_4-1-2.csv', '/datasets/data_for_tasks_3.csv', '/datasets/data_for_tasks_3_visitors.csv', '/datasets/orders.csv', '/datasets/undefined', '/datasets/hypothesis.csv', '/datasets/visitors.csv', '/datasets/slices.csv', '/datasets/auto.csv', '/datasets/rest.csv', '/datasets/restaurant_data.csv', '/datasets/rest_data.csv', '/datasets/games.csv', '/datasets/train_data.csv', '/datasets/logs_exp.csv', '/datasets/test_data.csv', '/datasets/test_data_full.csv', '/datasets/languages_rus.csv', '/datasets/movies.csv', '/datasets/urbanization.csv', '/datasets/users_behavior.csv', '/datasets/travel_insurance.csv', '/datasets/travel_insurance_preprocessed.csv', '/datasets/tripadvisor_review_case.csv', '/datasets/gym_churn.csv', '/datasets/flights.csv', '/datasets/flights_preprocessed.csv', '/datasets/flights_test_preprocessed.csv', '/datasets/Churn.csv', '/datasets/geo_data_0.csv', '/datasets/geo_data_1.csv', '/datasets/geo_data_2.csv', '/datasets/eng_target.csv', '/datasets/eng_probabilites.csv', '/datasets/heart_labeled.csv', '/datasets/heart.csv', '/datasets/cholera.csv', '/datasets/exoplanet.csv', '/datasets/gold_recovery_test.csv', '/datasets/gold_recovery_train.csv', '/datasets/gold_recovery_full.csv', '/datasets/insurance.csv', '/datasets/hogwarts_points_eng.csv', '/datasets/web_analytics_data.csv', '/datasets/stock_upd_eng.csv', '/datasets/support_eng.csv', '/datasets/support_upd_eng.csv', '/datasets/support_log_grouped_eng.csv', '/datasets/d', '/datasets/credit_scoring_eng.csv', '/datasets/seo_data_eng.xlsx', '/datasets/data_final_eng.csv', '/datasets/stock_eng.xlsx', '/datasets/dataset_facebook_cosmetics.csv', '/datasets/Electrical_Grid_Stability.csv', '/datasets/auto_cons.csv', '/datasets/feedback_eng.csv', '/datasets/train_data_n.csv', '/datasets/test_data_n.csv', '/datasets/games_full.csv', '/datasets/visits_eng.csv', '/datasets/energy_consumption.csv', '/datasets/taxi.csv', '/datasets/real_estate_data_eng.csv', '/datasets/ds_bert', '/datasets/tweets.csv', '/datasets/tweets_lemm.csv', '/datasets/tweets_lemm_test_labels.csv', '/datasets/tweets_lemm_test.csv', '/datasets/tweets_lemm_train.csv', '/datasets/toxic_comments.csv', '/datasets/megaline_tariffs.csv', '/datasets/megaline_users.csv', '/datasets/megaline_internet.csv', '/datasets/megaline_calls.csv', '/datasets/megaline_messages.csv', '/datasets/pickups_terminal_5.csv', '/datasets/query2.csv', '/datasets/emoji.csv', '/datasets/purchase_data', '/datasets/ad_costs.csv', '/datasets/ds_cv_images', '/datasets/imdb_reviews_small_lemm.tsv', '/datasets/weather.csv', '/datasets/fashion_mnist', '/datasets/fruits_small', '/datasets/purchase_data.csv', '/datasets/faces', '/datasets/ad_data_eng.csv', '/datasets/site_data_eng.csv', '/datasets/train_data_us.csv', '/datasets/test_data_us.csv', '/datasets/test_data_us1.csv', '/datasets/orders_log_us.csv', '/datasets/costs_us.csv', '/datasets/visits_log_us.csv', '/datasets/test_data_us2.csv', '/datasets/test_data_full_us.csv', '/datasets/keras_models', '/datasets/project_sql_result_01.csv', '/datasets/project_sql_result_02.csv', '/datasets/project_sql_result_03.csv', '/datasets/project_sql_result_04.csv', '/datasets/project_sql_result_05.csv', '/datasets/project_sql_result_06.csv', '/datasets/project_sql_result_07.csv', '/datasets/project_sql_result_01 new.csv', '/datasets/polomki.csv', '/datasets/segments.csv', '/datasets/cars.csv', '/datasets/cars_label.csv', '/datasets/sales.csv', '/datasets/orders_for_detecting_anomalies_visitors.csv', '/datasets/orders_for_detecting_anomalies.csv', '/datasets/travel_insurance_us.csv', '/datasets/travel_insurance_us_preprocessed.csv', '/datasets/megaline_plans.csv', '/datasets/megaline_users1.csv', '/datasets/final_steel', '/datasets/final_provider', '/datasets/music_log_old_en.csv', '/datasets/restaurant_data_us.csv', '/datasets/rest_data_us.csv', '/datasets/real_estate_data_us', '/datasets/real_estate_data_us.csv', '/datasets/hypotheses_us.csv', '/datasets/orders_us.csv', '/datasets/visitors_us.csv', '/datasets/products_stores', '/datasets/products_data_all', '/datasets/transactions', '/datasets/products', '/datasets/stores', '/datasets/visits_us.csv', '/datasets/rest_us.csv', '/datasets/problems.csv', '/datasets/insurance_us.csv', '/datasets/car_data.csv', '/datasets/dataset_facebook_cosmetics_us.csv', '/datasets/Electrical_Grid_Stability_us.csv', '/datasets/tripadvisor_review_case_us.csv', '/datasets/auto_cons_us.csv', '/datasets/game_data.csv', '/datasets/gym_churn_us.csv', '/datasets/oscar_data.csv', '/datasets/mobile_stats.csv', '/datasets/final_ab_events_us.csv', '/datasets/final_ab_new_users_us.csv', '/datasets/ab_project_marketing_events_us.csv', '/datasets/final_ab_participants_us.csv', '/datasets/urbanization_rot.csv', '/datasets/imdb_reviews_small.tsv', '/datasets/imdb_reviews.tsv', '/datasets/final_ab_new_users_upd_us.csv', '/datasets/final_ab_events_upd_us', '/datasets/final_ab_participants_upd_us.csv', '/datasets/schedules.csv', '/datasets/logs_exp_us.csv', '/datasets/imdb_reviews_small_lemm_test_labels.tsv', '/datasets/imdb_reviews_small_lemm_train.tsv', '/datasets/imdb_reviews_small_lemm_test.tsv', '/datasets/vehicle_sales_us.csv', '/datasets/urbanization_rot_us.csv', '/datasets/sales_managers_us.csv', '/datasets/tableau_schedules_us.csv', '/datasets/music.csv', '/datasets/morse_code.csv', '/datasets/philosophy_books.csv', '/datasets/produce_categories.csv', '/datasets/app_stats.csv', '/datasets/sdf978s9ad8f7sadf.csv', '/datasets/experimental_seo_eng_1.csv', '/datasets/experimental_seo_eng_2.csv', '/datasets/experimental_seo_eng_2.xlsx', '/datasets/exp_seo_data_eng.xlsx', '/datasets/imdb_reviews_200.csv', '/datasets/imdb_reviews_200.tsv', '/datasets/vehicles_brief_v3_us.csv', '/datasets/vehicles_us.csv', '/datasets/recent_visitors.csv', '/datasets/test_participants.csv', '/datasets/result_analysis_events.csv', '/datasets/result_analysis_participants.csv', '/datasets/ticket_data_new.csv', '/datasets/virus_patients_new.csv', '/datasets/l7_users_test.csv', '/datasets/l7_purchases_test.csv', '/datasets/l7_participants_test.csv', '/datasets/virus_patients.csv', '/datasets/ticket_data.csv', '/datasets/final_ab_participants.csv', '/datasets/final_ab_new_users.csv', '/datasets/final_ab_events.csv', '/datasets/ab_project_marketing_events.csv', '/datasets/yandex_realty_data.csv', '/datasets/game_data_us.csv', '/datasets/data_arc_en.csv', '/datasets/data_bulk_en.csv', '/datasets/data_bulk_time_en.csv', '/datasets/data_gas_en.csv', '/datasets/data_temp_en.csv', '/datasets/data_wire_en.csv', '/datasets/data_wire_time_en.csv', '/datasets/telecom_clients.csv', '/datasets/telecom_dataset.csv', '/datasets/product_codes.csv', '/datasets/retail_dataset.csv', '/datasets/ecommerce_dataset.csv', '/datasets/bank_dataset.csv', '/datasets/mobile_soures.csv', '/datasets/mobile_dataset.csv', '/datasets/user_source.csv', '/datasets/game_actions.csv', '/datasets/seo_data_category_ids.xlsx', '/datasets/seo_data_subcategory_ids.xlsx', '/datasets/crops_usa.csv', '/datasets/test.csv', '/datasets/DrinkPreferences.csv', '/datasets/PassengerTotals.xlsx', '/datasets/cheese.csv', '/datasets/bank_sales.csv', '/datasets/support_data.csv', '/datasets/DrinkPreferencesFilled.csv', '/datasets/PassengerCompare.xlsx', '/datasets/PassengerTotalsClean.xlsx', '/datasets/SideEffects.xlsx', '/datasets/SideEffectsCleaned.xlsx', '/datasets/FinalDengueTestingData.xlsx', '/datasets/TestingWith_Age_ExpDays.xlsx', '/datasets/TestResults.xlsx', '/datasets/ClientInfo.xlsx', '/datasets/MergedData.xlsx', '/datasets/ProcessedSurveyData.xlsx', '/datasets/SurveyFindings.xlsx', '/datasets/game_board.csv', '/datasets/yandex_music_project.csv', '/datasets/properties_us.csv', '/datasets/Borough_Boundaries.csv', '/datasets/ad_costs_us.csv', '/datasets/ecommerce_dataset_us.csv', '/datasets/events_us.csv', '/datasets/mobile_dataset_us.csv', '/datasets/mobile_sources_us.csv', '/datasets/product_codes_us.csv', '/datasets/retail_dataset_us.csv', '/datasets/telecom_clients_us.csv', '/datasets/telecom_dataset_us.csv', '/datasets/user_source_us.csv', '/datasets/test111.csv', '/datasets/support_data_usa.csv', '/datasets/visits_separated.csv', '/datasets/trending_by_time.csv', '/datasets/passenger_compare.xlsx', '/datasets/passenger_totals.xlsx', '/datasets/merged_data.xlsx', '/datasets/client_info.xlsx', '/datasets/drink_preferences.csv', '/datasets/drink_preferences_filled.csv', '/datasets/final_dengue_testing_data.xlsx', '/datasets/passenger_totals_clean.xlsx', '/datasets/processed_survey_data.xlsx', '/datasets/side_effects.xlsx', '/datasets/side_effects_cleaned.xlsx', '/datasets/survey_findings.xlsx', '/datasets/test_results.xlsx', '/datasets/testing_with_age_exp_days.xlsx', '/datasets/agent_call_times.csv', '/datasets/apple_counts.csv', '/datasets/apple_quality.csv', '/datasets/bike_injuries.csv', '/datasets/call_type_dictionary.csv', '/datasets/neighborhood_dict.csv', '/datasets/personal_data.xlsx', '/datasets/region_code_dict.csv', '/datasets/region_info.csv', '/datasets/standardized_test_results.xlsx', '/datasets/delete', '/datasets/delete.csv', '/datasets/удалить', '/datasets/data', '/datasets/letters', '/datasets/letters.csv', '/datasets/product_codes_us_test.csv', '/datasets/31231312t.csv', '/datasets/visittrrrt.csv', '/datasets/visittrrrt333.csv', '/datasets/visittrrrt4444.csv', '/datasets/vis555.csv', '/datasets/vis666555.csv', '/datasets/vis55588.csv', '/datasets/weather_data.csv', '/datasets/languages_websites.csv', '/datasets/users_behavior_upd.csv', '/datasets/letters_colors_decimals.csv', '/datasets/train_ml.csv', '/datasets/logs_us.csv', '/datasets/delete_this', '/datasets/funnel_cr_example2.csv', '/datasets/product_funnel_demo.csv', '/datasets/ch02_les03_costs.csv', '/datasets/ch02_les03_events.csv', '/datasets/ch02_les03_orders.csv', '/datasets/ch02_problems01_users.csv', '/datasets/ch02_problems02_events.csv', '/datasets/three_games.csv', '/datasets/three_more_games.csv', '/datasets/book_orders.csv', '/datasets/ch02_problems01_visits', '/datasets/profiles.csv', '/datasets/ch02_problems01_visits.csv', '/datasets/ch02_problems02_users.csv', '/datasets/ch02_problems02_visits.csv', '/datasets/ch02_problems03_visits.csv', '/datasets/sessions.csv', '/datasets/ch03_problems03_active_users.csv', '/datasets/ch03_vis_profiles.csv', '/datasets/ch03_vis_sessions.csv', '/datasets/ch04_final_visits.csv', '/datasets/ch02_problems02_orders.csv', '/datasets/ch02_problems02_costs.csv', '/datasets/ch03_vis_costs.csv', '/datasets/ch03_vis_events.csv', '/datasets/ch03_vis_orders.csv', '/datasets/ch04_final_costs.csv', '/datasets/ch04_final_events.csv', '/datasets/ch04_final_orders.csv', '/datasets/users_sessions_data.csv', '/datasets/ывс', '/datasets/ывсцвсысв', '/datasets/data1', '/datasets/orders_for_anomalies_detection.csv', '/datasets/csv_example_1.csv', '/datasets/csv_example_2.csv', '/datasets/power_plants_truncated.csv', '/datasets/costs_info.csv', '/datasets/orders_info.csv', '/datasets/visits_info.csv', '/datasets/visits_log_renamed.csv', '/datasets/final_ab_events_upd_us.csv', '/datasets/code_snippets.csv', '/datasets/mike', '/datasets/doggy_daycare.csv', '/datasets/gold_recovery_full_new.csv', '/datasets/gold_recovery_test_new.csv', '/datasets/gold_recovery_train_new.csv', '/datasets/cats_data.csv', '/datasets/borough_boundaries.csv', '/datasets/shows.csv', '/datasets/mkrf_shows.csv', '/datasets/mkrf_movies.csv', '/datasets/datasets.csv', '/datasets/simpsons.csv', '/datasets/user_time.csv', '/datasets/random_sample.csv', '/datasets/data1.csv', '/datasets/data2.csv', '/datasets/data3.csv', '/datasets/music_project_en.csv', '/datasets/cosmetics_sales.csv', '/datasets/profiles_raw.pickle', '/datasets/report_raw.pickle', '/datasets/result_raw.pickle', '/datasets/sessions_raw.pickle', '/datasets/result_backup.csv', '/datasets/profiles_backup.csv', '/datasets/sessions_backup.csv', '/datasets/drink_pref_cleaned.csv', '/datasets/ad_costs_new.csv', '/datasets/products_data_all_us.xlsx', '/datasets/telecomm_csi.db', '/datasets/memosky_test_00.csv', '/datasets/memosky_test_04.csv', '/datasets/memosky_test_05.csv', '/datasets/memosky_test_06.csv', '/datasets/memosky_test_07.csv', '/datasets/memosky_test_08.csv', '/datasets/memosky_test_09.csv', '/datasets/memosky_test_01.csv', '/datasets/memosky_test_02.csv', '/datasets/memosky_test_03.csv', '/datasets/hotel_train.csv', '/datasets/hotel_test.csv', '/datasets/be.csv', '/datasets/hotel_train_old.csv', '/datasets/hotel_test_old.csv', '/datasets/command_line.csv', '/datasets/Alcohol_Sales.csv', '/datasets/BeerWineLiquor.csv', '/datasets/visits_info_short.csv', '/datasets/orders_info_short.csv', '/datasets/costs_info_short.csv', '/datasets/titanic_train.csv', '/datasets/cal_housing_data.csv', '/datasets/cases.csv', '/datasets/time_province.csv', '/datasets/wiki_extract.txt', '/datasets/housing.csv', '/datasets/produce_sales_es.csv', '/datasets/produce_sales_ptbr.csv', '/datasets/orders_for_anomalies_detection_visitors.csv', '/datasets/logs_us_esp.csv', '/datasets/languages_websites_es.csv', '/datasets/languages_websites_ptbr.csv', '/datasets/projects.csv', '/datasets/web_analytics_data_esp.csv', '/datasets/support_eng_esp.csv', '/datasets/support_upd_esp.csv', '/datasets/support_log_grouped_esp.csv', '/datasets/support_esp.csv', '/datasets/stock_upd_esp.csv', '/datasets/drink_preferences_esp.csv', '/datasets/drink_pref_cleaned_esp.csv', '/datasets/seo_data_esp.xlsx', '/datasets/drive_data_eng.csv', '/datasets/SLNREG', '/datasets/sqlite_train.db', '/datasets/test_data_full_us_upd.csv', '/datasets/standardized_test_results_esp.xlsx', '/datasets/standardized_test_results_esp(1).xlsx', '/datasets/shipping.csv', '/datasets/heavy', '/datasets/languages_websites_idn.csv', '/datasets/mnist_database', '/datasets/MNIST', '/datasets/real_estate_data', '/datasets/6_class.csv', '/datasets/gpp_modified.csv', '/datasets/product_reviews.xlsx', '/datasets/visit_log.csv', '/datasets/phone_stock.csv', '/datasets/mouse_growth_rate.csv', '/datasets/unemployment_usa.csv', '/datasets/global_temp_dev.csv', '/datasets/height_weight.csv', '/datasets/sbux.csv', '/datasets/west_coast_pop.csv', '/datasets/us_state_symbols.csv', '/datasets/vg_sales.csv', '/datasets/OnlineRetail.csv', '/datasets/products.csv', '/datasets/instacart_orders.csv', '/datasets/Salaries.csv', '/datasets/order_products.csv', '/datasets/toxic_comments_old.csv', '/datasets/recent_orders.csv', '/datasets/new_members.csv', '/datasets/departments.csv', '/datasets/aisles.csv', '/datasets/moscow_places.csv', '/datasets/moscow_malls_info.csv', '/datasets/admin_level_geomap.geojson', '/datasets/X_test.csv', '/datasets/X.csv', '/datasets/y_test.csv', '/datasets/y.csv', '/datasets/mobile_sources.csv', '/datasets/Test.csv', '/datasets/Train.csv', '/datasets/zoo_ru.csv', '/datasets/autos_old.csv', '/datasets/zooru', '/datasets/support_data_ds.csv', '/datasets/eng_probabilities.csv', '/datasets/real_estate.csv', '/datasets/real_estate_clean.csv', '/datasets/real_estate_2.csv', '/datasets/car_loan.csv', '/datasets/car_loan_levels.csv', '/datasets/ds_test/X_test.csv', '/datasets/ds_test/X_test_small.csv', '/datasets/ds_test/X_train.csv', '/datasets/ds_test/X_train_small.csv', '/datasets/ds_test/y_test.csv', '/datasets/ds_test/y_test_small.csv', '/datasets/ds_test/y_train.csv', '/datasets/ds_test/y_train_small.csv', '/datasets/ds_bert/rubert_model.bin', '/datasets/ds_bert/vocab.txt', '/datasets/ds_bert/bert_config.json', '/datasets/ds_cv_images/cat.jpg', '/datasets/ds_cv_images/face.png', '/datasets/fashion_mnist/train_features.npy', '/datasets/fashion_mnist/train_target.npy', '/datasets/fashion_mnist/test_features.npy', '/datasets/fashion_mnist/test_target.npy', '/datasets/fruits_small/Banana', '/datasets/fruits_small/Carambola', '/datasets/fruits_small/Mango', '/datasets/fruits_small/muskmelon', '/datasets/fruits_small/Orange', '/datasets/fruits_small/Peach', '/datasets/fruits_small/Pear', '/datasets/fruits_small/Persimmon', '/datasets/fruits_small/Pitaya', '/datasets/fruits_small/Plum', '/datasets/fruits_small/Pomegranate', '/datasets/fruits_small/Tomatoes', '/datasets/faces/labels.csv', '/datasets/faces/final_files', '/datasets/keras_models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', '/datasets/final_steel/data_gas.csv', '/datasets/final_steel/data_bulk.csv', '/datasets/final_steel/data_bulk_time.csv', '/datasets/final_steel/data_wire_time.csv', '/datasets/final_steel/data_arc.csv', '/datasets/final_steel/data_temp.csv', '/datasets/final_steel/data_wire.csv', '/datasets/final_provider/internet.csv', '/datasets/final_provider/phone.csv', '/datasets/final_provider/personal.csv', '/datasets/final_provider/contract.csv', '/datasets/heavy/6_class.csv', '/datasets/mnist_database/t10k-images-idx3-ubyte', '/datasets/mnist_database/t10k-labels-idx1-ubyte', '/datasets/mnist_database/README.md', '/datasets/mnist_database/train-images-idx3-ubyte', '/datasets/mnist_database/train-labels-idx1-ubyte', '/datasets/MNIST/raw']