Skip to content

Commit

Permalink
Fix import procedure
Browse files Browse the repository at this point in the history
  • Loading branch information
frafra committed Sep 21, 2023
1 parent 78ccd49 commit d8d4d27
Showing 1 changed file with 23 additions and 7 deletions.
30 changes: 23 additions & 7 deletions wizard/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@

import requests
from pywebio import start_server
from pywebio.input import actions, file_upload
from pywebio.input import actions, file_upload, input, input_group, NUMBER
from pywebio.output import clear, put_error, put_link, put_success, put_text, use_scope

logging.basicConfig(level=os.getenv("LOGGING", "INFO"))

OPENREFINE_URL = os.getenv("OPENREFINE_URL", "http://localhost:3333")
OPENREFINE_PUBLIC_URL = os.getenv("OPENREFINE_PUBLIC_URL", OPENREFINE_URL)
POSTGREST_URL = os.getenv("POSTGREST_URL", "http:://localhost:3000")
POSTGREST_URL = os.getenv("POSTGREST_URL", "http://localhost:3000")
POSTGREST_TOKEN = os.getenv("POSTGREST_TOKEN")

logging.debug(os.environ)
Expand Down Expand Up @@ -55,6 +55,7 @@ def request(self, command, method="GET", files=None, **params):
params=params,
stream=True,
)
response.raise_for_status()
return response

def create_project(self, name, files, file_format, index=0, **options):
Expand Down Expand Up @@ -87,13 +88,21 @@ def create_project(self, name, files, file_format, index=0, **options):

response = self.request("create-project-from-upload", method="POST", files=form)

project = parse_qs(urlparse(response.url).query)["project"]
project = int(parse_qs(urlparse(response.url).query)["project"][0])

# https://github.com/OpenRefine/OpenRefine/issues/5387
response = self.request("get-rows", project=project, start=0, limit=0)
self.get_rows(project)

return project

def get_rows(self, project, start=0, limit=0):
response = self.request("get-rows", project=project, start=start, limit=limit)
return response.json()

def get_metadata(self, project):
response = self.request("get-project-metadata", project=project)
return response.json()

def project_url(self, project):
internal_url = requests.get(
self.url + "/project", params={"project": project}
Expand All @@ -113,14 +122,21 @@ def project_export(self, project, file_format="csv"):
format=file_format,
)


def wizard():
openrefine = OpenRefine(OPENREFINE_URL, OPENREFINE_PUBLIC_URL)

files = file_upload("Select spreadsheets:", multiple=True)
data = input_group(
"Import", [
file_upload("Select spreadsheets:", multiple=True, name="files"),
input("Ignore first N line(s) at beginning of file", NUMBER, value=0, name="ignorelines"),
input("Parse next M line(s) as column headers", NUMBER, value=1, name="headerlines"),
]
)

files, ignorelines, headerlines = data["files"], data["ignorelines"], data["headerlines"]

project = openrefine.create_project(
files[0]["filename"], files, "binary/text/xml/xls/xlsx", ignoreLines=1
files[0]["filename"], files, files[0]["mime_type"], headerLines=headerlines, ignoreLines=ignorelines
)
project_url = openrefine.project_url(project)

Expand Down

0 comments on commit d8d4d27

Please sign in to comment.