Skip to content

Commit

Permalink
support export doc plugin to pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexCXC committed Sep 10, 2024
1 parent b536df2 commit 2ec4280
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 30 deletions.
2 changes: 1 addition & 1 deletion dtable_events/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dtable_events.data_sync.data_syncer import DataSyncer
from dtable_events.workflow.workflow_actions import WorkflowActionsHandler
from dtable_events.workflow.workflow_schedules_scanner import WorkflowSchedulesScanner
from dtable_events.page_design.manager import conver_page_to_pdf_manager
from dtable_events.convert_page.manager import conver_page_to_pdf_manager
from dtable_events.api_calls.api_calls_counter import APICallsCounter


Expand Down
5 changes: 3 additions & 2 deletions dtable_events/automations/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dtable_events.app.config import DTABLE_WEB_SERVICE_URL, DTABLE_PRIVATE_KEY, \
SEATABLE_FAAS_AUTH_TOKEN, SEATABLE_FAAS_URL, INNER_DTABLE_DB_URL
from dtable_events.dtable_io import send_wechat_msg, send_email_msg, send_dingtalk_msg, batch_send_email_msg
from dtable_events.page_design.manager import conver_page_to_pdf_manager
from dtable_events.convert_page.manager import conver_page_to_pdf_manager
from dtable_events.notification_rules.notification_rules_utils import send_notification, fill_msg_blanks_with_sql_row
from dtable_events.utils import uuid_str_to_36_chars, is_valid_email, get_inner_dtable_server_url, \
normalize_file_path, gen_file_get_url, gen_random_option
Expand Down Expand Up @@ -2851,7 +2851,8 @@ def do_action(self):
'workspace_id': self.workspace_id,
'file_names_dict': file_names_dict,
'target_column_key': self.target_column_key,
'table_id': self.auto_rule.table_id
'table_id': self.auto_rule.table_id,
'plugin_type': 'page-design'
})
except Full:
self.auto_rule.append_warning({
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from seaserv import seafile_api

from dtable_events.app.config import DTABLE_WEB_SERVICE_URL, INNER_DTABLE_DB_URL
from dtable_events.page_design.utils import get_driver, CHROME_DATA_DIR, open_page_view, wait_page_view
from dtable_events.convert_page.utils import get_chrome_data_dir, get_driver, open_page_view, wait_page_view
from dtable_events.utils import get_inner_dtable_server_url, get_opt_from_conf_or_env
from dtable_events.utils.dtable_server_api import DTableServerAPI, NotFoundException
from dtable_events.utils.dtable_db_api import DTableDBAPI
Expand Down Expand Up @@ -45,26 +45,26 @@ def init(self, config):
def get_driver(self, index):
driver = self.drivers.get(index)
if not driver:
driver = get_driver(os.path.join(CHROME_DATA_DIR, f'convert-manager-{index}'))
driver = get_driver(get_chrome_data_dir(f'convert-manager-{index}'))
self.drivers[index] = driver
return driver

def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, page_id, table_name, target_column, step_row_ids, file_names_dict):
def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, plugin_type, page_id, table_name, target_column, step_row_ids, file_names_dict):
dtable_server_api = DTableServerAPI('dtable-events', dtable_uuid, dtable_server_url, DTABLE_WEB_SERVICE_URL, repo_id, workspace_id)
dtable_db_api = DTableDBAPI('dtable-events', dtable_uuid, INNER_DTABLE_DB_URL)
rows_files_dict = {}
row_session_dict = {}

# open rows
for row_id in step_row_ids:
session_id = open_page_view(driver, dtable_uuid, page_id, row_id, dtable_server_api.internal_access_token)
session_id = open_page_view(driver, dtable_uuid, plugin_type, page_id, row_id, dtable_server_api.internal_access_token)
row_session_dict[row_id] = session_id

# wait for chrome windows rendering
for row_id in step_row_ids:
output = io.BytesIO() # receive pdf content
session_id = row_session_dict[row_id]
wait_page_view(driver, session_id, row_id, output)
wait_page_view(driver, session_id, plugin_type, row_id, output)
file_name = file_names_dict.get(row_id, f'{dtable_uuid}_{page_id}_{row_id}.pdf')
if not file_name.endswith('.pdf'):
file_name += '.pdf'
Expand Down Expand Up @@ -92,7 +92,7 @@ def batch_convert_rows(self, driver, repo_id, workspace_id, dtable_uuid, page_id
})
dtable_server_api.batch_update_rows(table_name, updates)

def check_resources(self, dtable_uuid, page_id, table_id, target_column_key, row_ids):
def check_resources(self, dtable_uuid, plugin_type, page_id, table_id, target_column_key, row_ids):
"""
:return: resources -> dict or None, error_msg -> str or None
"""
Expand All @@ -115,7 +115,7 @@ def check_resources(self, dtable_uuid, page_id, table_id, target_column_key, row

# plugin
plugin_settings = metadata.get('plugin_settings') or {}
plugin = plugin_settings.get('page-design') or []
plugin = plugin_settings.get(plugin_type) or []
if not plugin:
return None, 'plugin not found'
page = next(filter(lambda page: page.get('page_id') == page_id, plugin), None)
Expand Down Expand Up @@ -150,6 +150,7 @@ def do_convert(self, index):
logger.debug('do_convert task_info: %s', task_info)

dtable_uuid = task_info.get('dtable_uuid')
plugin_type = task_info.get('plugin_type')
page_id = task_info.get('page_id')
row_ids = task_info.get('row_ids')
target_column_key = task_info.get('target_column_key')
Expand All @@ -164,7 +165,7 @@ def do_convert(self, index):
# resource check
# Rather than wait one minute to render a wrong page, a resources check is more effective
try:
resources, error_msg = self.check_resources(dtable_uuid, page_id, table_id, target_column_key, row_ids)
resources, error_msg = self.check_resources(dtable_uuid, plugin_type, page_id, table_id, target_column_key, row_ids)
if not resources:
logger.warning('page design dtable: %s page: %s table: %s column: %s error: %s', dtable_uuid, page_id, table_id, target_column_key, error_msg)
continue
Expand All @@ -186,7 +187,7 @@ def do_convert(self, index):
except Exception as e:
logger.exception('get driver: %s error: %s', index, e)
try:
self.batch_convert_rows(driver, repo_id, workspace_id, dtable_uuid, page_id, table['name'], target_column, step_row_ids, file_names_dict)
self.batch_convert_rows(driver, repo_id, workspace_id, dtable_uuid, plugin_type, page_id, table['name'], target_column, step_row_ids, file_names_dict)
except Exception as e:
logger.exception('convert task: %s error: %s', task_info, e)
finally:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import logging
import time
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
Expand All @@ -16,6 +17,12 @@
CHROME_DATA_DIR = '/tmp/chrome-user-datas'


def get_chrome_data_dir(dir_name='tmp'):
if not os.path.isdir(CHROME_DATA_DIR):
os.makedirs(CHROME_DATA_DIR)
return os.path.join(CHROME_DATA_DIR, dir_name)


def get_driver(user_data_path):
webdriver_options = Options()

Expand All @@ -29,18 +36,21 @@ def get_driver(user_data_path):
return driver


def open_page_view(driver: webdriver.Chrome, dtable_uuid, page_id, row_id, access_token):
url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id)
if row_id:
url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id)
def open_page_view(driver: webdriver.Chrome, dtable_uuid, plugin_type, page_id, row_id, access_token):
if plugin_type == 'page-design':
url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id)
if row_id:
url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/page-design/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id)
elif plugin_type == 'document':
url = DTABLE_WEB_SERVICE_URL.strip('/') + '/dtable/%s/document/%s/row/%s/' % (uuid_str_to_36_chars(dtable_uuid), page_id, row_id)

url += '?access-token=%s&need_convert=%s' % (access_token, 0)
logger.debug('url: %s', url)
driver.execute_script(f"window.open('{url}')")
return driver.window_handles[-1]


def wait_page_view(driver: webdriver.Chrome, session_id, row_id, output):
def wait_page_view(driver: webdriver.Chrome, session_id, plugin_type, row_id, output):
def check_images_and_networks(driver, frequency=0.5):
"""
make sure all images complete
Expand Down Expand Up @@ -82,9 +92,15 @@ def check_images_and_networks(driver, frequency=0.5):

driver.switch_to.window(session_id)

monitor_dom_id = ''
if plugin_type == 'page-design':
monitor_dom_id = 'page-design-render-complete'
elif plugin_type == 'document':
monitor_dom_id = 'document-render-complete'

try:
# make sure react is rendered, timeout await_react_render, rendering is not completed within 3 minutes, and rendering performance needs to be improved
WebDriverWait(driver, await_react_render).until(lambda driver: driver.find_element_by_id('page-design-render-complete') is not None, message='wait react timeout')
WebDriverWait(driver, await_react_render).until(lambda driver: driver.find_element_by_id(monitor_dom_id) is not None, message='wait react timeout')
# make sure images from asset are rendered, timeout 120s
WebDriverWait(driver, 120, poll_frequency=1).until(lambda driver: check_images_and_networks(driver), message='wait images and networks timeout')
time.sleep(sleep_time) # wait for all rendering
Expand Down Expand Up @@ -125,6 +141,6 @@ def check_images_and_networks(driver, frequency=0.5):
# logger.debug('network logs end')


def convert_page_to_pdf(driver: webdriver.Chrome, dtable_uuid, page_id, row_id, access_token, output):
session_id = open_page_view(driver, dtable_uuid, page_id, row_id, access_token)
wait_page_view(driver, session_id, row_id, output)
def convert_page_to_pdf(driver: webdriver.Chrome, dtable_uuid, plugin_type, page_id, row_id, access_token, output):
session_id = open_page_view(driver, dtable_uuid, plugin_type, page_id, row_id, access_token)
wait_page_view(driver, session_id, plugin_type, row_id, output)
13 changes: 7 additions & 6 deletions dtable_events/dtable_io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import_excel_csv_add_table_by_dtable_server, update_parsed_file_by_dtable_server, \
parse_update_excel_upload_excel_to_json, parse_update_csv_upload_csv_to_json, parse_and_import_excel_csv_to_dtable, \
parse_and_import_excel_csv_to_table, parse_and_update_file_to_table, parse_and_append_excel_csv_to_table
from dtable_events.page_design.utils import CHROME_DATA_DIR, convert_page_to_pdf as _convert_page_to_pdf, get_driver
from dtable_events.convert_page.utils import get_chrome_data_dir, convert_page_to_pdf as _convert_page_to_pdf, get_driver
from dtable_events.statistics.db import save_email_sending_records, batch_save_email_sending_records
from dtable_events.data_sync.data_sync_utils import run_sync_emails
from dtable_events.utils import get_inner_dtable_server_url, is_valid_email, uuid_str_to_36_chars
Expand Down Expand Up @@ -823,22 +823,23 @@ def batch_send_email_msg(auth_info, send_info_list, username, config=None, db_se
session.close()


def convert_page_to_pdf(dtable_uuid, page_id, row_id):
def convert_page_to_pdf(dtable_uuid, plugin_type, page_id, row_id):
dtable_server_url = get_inner_dtable_server_url()
access_token = DTableServerAPI('dtable-events', dtable_uuid, dtable_server_url).internal_access_token
target_dir = '/tmp/dtable-io/convert-page-to-pdf'
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
target_path = os.path.join(target_dir, '%s_%s_%s.pdf' % (dtable_uuid, page_id, row_id))

if not os.path.isdir(CHROME_DATA_DIR):
os.makedirs(CHROME_DATA_DIR)
driver = get_driver(os.path.join(CHROME_DATA_DIR, 'dtable-io'))
chrome_data_dir_name = f'{dtable_uuid}-{page_id}-{row_id}'
driver = get_driver(get_chrome_data_dir(chrome_data_dir_name))
try:
_convert_page_to_pdf(driver, dtable_uuid, page_id, row_id, access_token, target_path)
_convert_page_to_pdf(driver, dtable_uuid, plugin_type, page_id, row_id, access_token, target_path)
except Exception as e:
dtable_io_logger.exception('convert dtable: %s page: %s row: %s error: %s', dtable_uuid, page_id, row_id, e)
finally:
if os.path.exists(chrome_data_dir_name):
shutil.rmtree(chrome_data_dir_name)
driver.quit()


Expand Down
3 changes: 2 additions & 1 deletion dtable_events/dtable_io/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,12 +410,13 @@ def convert_page_to_pdf():
return make_response(('dtable io server busy.', 400))

dtable_uuid = request.args.get('dtable_uuid')
plugin_type = request.args.get('plugin_type')
page_id = request.args.get('page_id')
row_id = request.args.get('row_id')

try:
task_id = task_manager.convert_page_to_pdf(
dtable_uuid, page_id, row_id)
dtable_uuid, plugin_type, page_id, row_id)
except Exception as e:
logger.error(e)
return make_response((e, 500))
Expand Down
4 changes: 2 additions & 2 deletions dtable_events/dtable_io/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,12 @@ def query_status(self, task_id):
return True, task_result[6:]
return False, None

def convert_page_to_pdf(self, dtable_uuid, page_id, row_id):
def convert_page_to_pdf(self, dtable_uuid, plugin_type, page_id, row_id):
from dtable_events.dtable_io import convert_page_to_pdf

task_id = str(uuid.uuid4())
task = (convert_page_to_pdf,
(dtable_uuid, page_id, row_id))
(dtable_uuid, plugin_type, page_id, row_id))
self.tasks_queue.put(task_id)
self.tasks_map[task_id] = task

Expand Down

0 comments on commit 2ec4280

Please sign in to comment.