diff --git a/dtable_events/app/app.py b/dtable_events/app/app.py index f1c13e9d..520d9bff 100644 --- a/dtable_events/app/app.py +++ b/dtable_events/app/app.py @@ -21,7 +21,7 @@ from dtable_events.data_sync.data_syncer import DataSyncer from dtable_events.workflow.workflow_actions import WorkflowActionsHandler from dtable_events.workflow.workflow_schedules_scanner import WorkflowSchedulesScanner -from dtable_events.page_design.manager import conver_page_to_pdf_manager +from dtable_events.convert_page.manager import conver_page_to_pdf_manager from dtable_events.api_calls.api_calls_counter import APICallsCounter diff --git a/dtable_events/automations/actions.py b/dtable_events/automations/actions.py index 186b20b9..8c84bf76 100644 --- a/dtable_events/automations/actions.py +++ b/dtable_events/automations/actions.py @@ -21,7 +21,7 @@ from dtable_events.app.config import DTABLE_WEB_SERVICE_URL, DTABLE_PRIVATE_KEY, \ SEATABLE_FAAS_AUTH_TOKEN, SEATABLE_FAAS_URL, INNER_DTABLE_DB_URL from dtable_events.dtable_io import send_wechat_msg, send_email_msg, send_dingtalk_msg, batch_send_email_msg -from dtable_events.page_design.manager import conver_page_to_pdf_manager +from dtable_events.convert_page.manager import conver_page_to_pdf_manager from dtable_events.notification_rules.notification_rules_utils import send_notification, fill_msg_blanks_with_sql_row from dtable_events.utils import uuid_str_to_36_chars, is_valid_email, get_inner_dtable_server_url, \ normalize_file_path, gen_file_get_url, gen_random_option @@ -2851,7 +2851,8 @@ def do_action(self): 'workspace_id': self.workspace_id, 'file_names_dict': file_names_dict, 'target_column_key': self.target_column_key, - 'table_id': self.auto_rule.table_id + 'table_id': self.auto_rule.table_id, + 'plugin_type': 'page-design' }) except Full: self.auto_rule.append_warning({ diff --git a/dtable_events/page_design/__init__.py b/dtable_events/convert_page/__init__.py similarity index 100% rename from dtable_events/page_design/__init__.py rename to dtable_events/convert_page/__init__.py diff --git a/dtable_events/page_design/manager.py b/dtable_events/convert_page/manager.py similarity index 91% rename from dtable_events/page_design/manager.py rename to dtable_events/convert_page/manager.py index bd560a69..9552e8d2 100644 --- a/dtable_events/page_design/manager.py +++ b/dtable_events/convert_page/manager.py @@ -7,7 +7,7 @@ from seaserv import seafile_api from dtable_events.app.config import DTABLE_WEB_SERVICE_URL, INNER_DTABLE_DB_URL -from dtable_events.page_design.utils import get_driver, CHROME_DATA_DIR, open_page_view, wait_page_view +from dtable_events.convert_page.utils import get_chrome_data_dir, get_driver, open_page_view, wait_page_view from dtable_events.utils import get_inner_dtable_server_url, get_opt_from_conf_or_env from dtable_events.utils.dtable_server_api import DTableServerAPI, NotFoundException from dtable_events.utils.dtable_db_api import DTableDBAPI @@ -45,11 +45,11 @@ def init(self, config): def get_driver(self, index): driver = self.drivers.get(index) if not driver: - driver = get_driver(os.path.join(CHROME_DATA_DIR, f'convert-manager-{index}')) + driver = get_driver(get_chrome_data_dir(f'convert-manager-{index}')) self.drivers[index] = driver return driver - def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, page_id, table_name, target_column, step_row_ids, file_names_dict): + def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, plugin_type, page_id, table_name, target_column, step_row_ids, file_names_dict): dtable_server_api = DTableServerAPI('dtable-events', dtable_uuid, dtable_server_url, DTABLE_WEB_SERVICE_URL, repo_id, workspace_id) dtable_db_api = DTableDBAPI('dtable-events', dtable_uuid, INNER_DTABLE_DB_URL) rows_files_dict = {} @@ -57,14 +57,14 @@ def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, page_id # open rows for row_id in step_row_ids: - session_id = open_page_view(driver, dtable_uuid, page_id, row_id, dtable_server_api.internal_access_token) + session_id = open_page_view(driver, dtable_uuid, plugin_type, page_id, row_id, dtable_server_api.internal_access_token) row_session_dict[row_id] = session_id # wait for chrome windows rendering for row_id in step_row_ids: output = io.BytesIO() # receive pdf content session_id = row_session_dict[row_id] - wait_page_view(driver, session_id, row_id, output) + wait_page_view(driver, session_id, plugin_type, row_id, output) file_name = file_names_dict.get(row_id, f'{dtable_uuid}_{page_id}_{row_id}.pdf') if not file_name.endswith('.pdf'): file_name += '.pdf' @@ -92,7 +92,7 @@ def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, page_id }) dtable_server_api.batch_update_rows(table_name, updates) - def check_resources(self, dtable_uuid, page_id, table_id, target_column_key, row_ids): + def check_resources(self, dtable_uuid, plugin_type, page_id, table_id, target_column_key, row_ids): """ :return: resources -> dict or None, error_msg -> str or None """ @@ -115,7 +115,7 @@ def check_resources(self, dtable_uuid, page_id, table_id, target_column_key, row # plugin plugin_settings = metadata.get('plugin_settings') or {} - plugin = plugin_settings.get('page-design') or [] + plugin = plugin_settings.get(plugin_type) or [] if not plugin: return None, 'plugin not found' page = next(filter(lambda page: page.get('page_id') == page_id, plugin), None) @@ -150,6 +150,7 @@ def do_convert(self, index): logger.debug('do_convert task_info: %s', task_info) dtable_uuid = task_info.get('dtable_uuid') + plugin_type = task_info.get('plugin_type') page_id = task_info.get('page_id') row_ids = task_info.get('row_ids') target_column_key = task_info.get('target_column_key') @@ -164,7 +165,7 @@ def do_convert(self, index): # resource check # Rather than wait one minute to render a wrong page, a resources check is more effective try: - resources, error_msg = self.check_resources(dtable_uuid, page_id, table_id, target_column_key, row_ids) + resources, error_msg = self.check_resources(dtable_uuid, plugin_type, page_id, table_id, target_column_key, row_ids) if not resources: logger.warning('page design dtable: %s page: %s table: %s column: %s error: %s', dtable_uuid, page_id, table_id, target_column_key, error_msg) continue @@ -186,7 +187,7 @@ def do_convert(self, index): except Exception as e: logger.exception('get driver: %s error: %s', index, e) try: - self.batch_convert_rows(driver, repo_id, workspace_id, dtable_uuid, page_id, table['name'], target_column, step_row_ids, file_names_dict) + self.batch_convert_rows(driver, repo_id, workspace_id, dtable_uuid, plugin_type, page_id, table['name'], target_column, step_row_ids, file_names_dict) except Exception as e: logger.exception('convert task: %s error: %s', task_info, e) finally: diff --git a/dtable_events/page_design/utils.py b/dtable_events/convert_page/utils.py similarity index 79% rename from dtable_events/page_design/utils.py rename to dtable_events/convert_page/utils.py index c9f29c0c..5fb74ed6 100644 --- a/dtable_events/page_design/utils.py +++ b/dtable_events/convert_page/utils.py @@ -3,6 +3,7 @@ import json import logging import time +import os from selenium import webdriver from selenium.webdriver.chrome.options import Options @@ -16,6 +17,12 @@ CHROME_DATA_DIR = '/tmp/chrome-user-datas' +def get_chrome_data_dir(dir_name='tmp'): + if not os.path.isdir(CHROME_DATA_DIR): + os.makedirs(CHROME_DATA_DIR) + return os.path.join(CHROME_DATA_DIR, dir_name) + + def get_driver(user_data_path): webdriver_options = Options() @@ -29,10 +36,13 @@ def get_driver(user_data_path): return driver -def open_page_view(driver: webdriver.Chrome, dtable_uuid, page_id, row_id, access_token): - url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id) - if row_id: - url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id) +def open_page_view(driver: webdriver.Chrome, dtable_uuid, plugin_type, page_id, row_id, access_token): + if plugin_type == 'page-design': + url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id) + if row_id: + url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id) + elif plugin_type == 'document': + url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/document/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id) url += '?access-token=%s&need_convert=%s' % (access_token, 0) logger.debug('url: %s', url) @@ -40,7 +50,7 @@ def open_page_view(driver: webdriver.Chrome, dtable_uuid, page_id, row_id, acces return driver.window_handles[-1] -def wait_page_view(driver: webdriver.Chrome, session_id, row_id, output): +def wait_page_view(driver: webdriver.Chrome, session_id, plugin_type, row_id, output): def check_images_and_networks(driver, frequency=0.5): """ make sure all images complete @@ -82,9 +92,15 @@ def check_images_and_networks(driver, frequency=0.5): driver.switch_to.window(session_id) + monitor_dom_id = '' + if plugin_type == 'page-design': + monitor_dom_id = 'page-design-render-complete' + elif plugin_type == 'document': + monitor_dom_id = 'document-render-complete' + try: # make sure react is rendered, timeout await_react_render, rendering is not completed within 3 minutes, and rendering performance needs to be improved - WebDriverWait(driver, await_react_render).until(lambda driver: driver.find_element_by_id('page-design-render-complete') is not None, message='wait react timeout') + WebDriverWait(driver, await_react_render).until(lambda driver: driver.find_element_by_id(monitor_dom_id) is not None, message='wait react timeout') # make sure images from asset are rendered, timeout 120s WebDriverWait(driver, 120, poll_frequency=1).until(lambda driver: check_images_and_networks(driver), message='wait images and networks timeout') time.sleep(sleep_time) # wait for all rendering @@ -125,6 +141,6 @@ def check_images_and_networks(driver, frequency=0.5): # logger.debug('network logs end') -def convert_page_to_pdf(driver: webdriver.Chrome, dtable_uuid, page_id, row_id, access_token, output): - session_id = open_page_view(driver, dtable_uuid, page_id, row_id, access_token) - wait_page_view(driver, session_id, row_id, output) +def convert_page_to_pdf(driver: webdriver.Chrome, dtable_uuid, plugin_type, page_id, row_id, access_token, output): + session_id = open_page_view(driver, dtable_uuid, plugin_type, page_id, row_id, access_token) + wait_page_view(driver, session_id, plugin_type, row_id, output) diff --git a/dtable_events/dtable_io/__init__.py b/dtable_events/dtable_io/__init__.py index 4ff92b2e..9e0222da 100644 --- a/dtable_events/dtable_io/__init__.py +++ b/dtable_events/dtable_io/__init__.py @@ -33,7 +33,7 @@ import_excel_csv_add_table_by_dtable_server, update_parsed_file_by_dtable_server, \ parse_update_excel_upload_excel_to_json, parse_update_csv_upload_csv_to_json, parse_and_import_excel_csv_to_dtable, \ parse_and_import_excel_csv_to_table, parse_and_update_file_to_table, parse_and_append_excel_csv_to_table -from dtable_events.page_design.utils import CHROME_DATA_DIR, convert_page_to_pdf as _convert_page_to_pdf, get_driver +from dtable_events.convert_page.utils import get_chrome_data_dir, convert_page_to_pdf as _convert_page_to_pdf, get_driver from dtable_events.statistics.db import save_email_sending_records, batch_save_email_sending_records from dtable_events.data_sync.data_sync_utils import run_sync_emails from dtable_events.utils import get_inner_dtable_server_url, is_valid_email, uuid_str_to_36_chars @@ -823,7 +823,7 @@ def batch_send_email_msg(auth_info, send_info_list, username, config=None, db_se session.close() -def convert_page_to_pdf(dtable_uuid, page_id, row_id): +def convert_page_to_pdf(dtable_uuid, plugin_type, page_id, row_id): dtable_server_url = get_inner_dtable_server_url() access_token = DTableServerAPI('dtable-events', dtable_uuid, dtable_server_url).internal_access_token target_dir = '/tmp/dtable-io/convert-page-to-pdf' @@ -831,14 +831,15 @@ def convert_page_to_pdf(dtable_uuid, page_id, row_id): os.makedirs(target_dir) target_path = os.path.join(target_dir, '%s_%s_%s.pdf' % (dtable_uuid, page_id, row_id)) - if not os.path.isdir(CHROME_DATA_DIR): - os.makedirs(CHROME_DATA_DIR) - driver = get_driver(os.path.join(CHROME_DATA_DIR, 'dtable-io')) + chrome_data_dir_name = f'{dtable_uuid}-{page_id}-{row_id}' + driver = get_driver(get_chrome_data_dir(chrome_data_dir_name)) try: - _convert_page_to_pdf(driver, dtable_uuid, page_id, row_id, access_token, target_path) + _convert_page_to_pdf(driver, dtable_uuid, plugin_type, page_id, row_id, access_token, target_path) except Exception as e: dtable_io_logger.exception('convert dtable: %s page: %s row: %s error: %s', dtable_uuid, page_id, row_id, e) finally: + if os.path.exists(chrome_data_dir_name): + shutil.rmtree(chrome_data_dir_name) driver.quit() diff --git a/dtable_events/dtable_io/request_handler.py b/dtable_events/dtable_io/request_handler.py index 53c6bbd3..4346103a 100644 --- a/dtable_events/dtable_io/request_handler.py +++ b/dtable_events/dtable_io/request_handler.py @@ -410,12 +410,13 @@ def convert_page_to_pdf(): return make_response(('dtable io server busy.', 400)) dtable_uuid = request.args.get('dtable_uuid') + plugin_type = request.args.get('plugin_type') page_id = request.args.get('page_id') row_id = request.args.get('row_id') try: task_id = task_manager.convert_page_to_pdf( - dtable_uuid, page_id, row_id) + dtable_uuid, plugin_type, page_id, row_id) except Exception as e: logger.error(e) return make_response((e, 500)) diff --git a/dtable_events/dtable_io/task_manager.py b/dtable_events/dtable_io/task_manager.py index afa3fbcb..f6b42e51 100644 --- a/dtable_events/dtable_io/task_manager.py +++ b/dtable_events/dtable_io/task_manager.py @@ -256,12 +256,12 @@ def query_status(self, task_id): return True, task_result[6:] return False, None - def convert_page_to_pdf(self, dtable_uuid, page_id, row_id): + def convert_page_to_pdf(self, dtable_uuid, plugin_type, page_id, row_id): from dtable_events.dtable_io import convert_page_to_pdf task_id = str(uuid.uuid4()) task = (convert_page_to_pdf, - (dtable_uuid, page_id, row_id)) + (dtable_uuid, plugin_type, page_id, row_id)) self.tasks_queue.put(task_id) self.tasks_map[task_id] = task