Skip to content

Commit

Permalink
Merge pull request #1 from datopian/resoure-download
Browse files Browse the repository at this point in the history
Enable resource download for ckan 2.9.4 running on azure
  • Loading branch information
MuhammadIsmailShahzad authored Feb 10, 2022
2 parents b26389e + f7d3f89 commit d679b71
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 45 deletions.
5 changes: 4 additions & 1 deletion ckanext/cloudstorage/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from ckanext.cloudstorage import helpers
import ckanext.cloudstorage.logic.action.multipart as m_action
import ckanext.cloudstorage.logic.auth.multipart as m_auth
from ckanext.cloudstorage import views
import logging

log = logging.getLogger(__name__)

if plugins.toolkit.check_ckan_version(min_version='2.9.0'):
from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin
Expand All @@ -15,7 +19,6 @@

class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin):
plugins.implements(plugins.IUploader)
plugins.implements(plugins.IRoutes, inherit=True)
plugins.implements(plugins.IConfigurable)
plugins.implements(plugins.IConfigurer)
plugins.implements(plugins.IActions)
Expand Down
8 changes: 7 additions & 1 deletion ckanext/cloudstorage/plugin/flask_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import ckan.plugins as p
import ckanext.cloudstorage.cli as cli

from ckanext.cloudstorage import views

class MixinPlugin(p.SingletonPlugin):
p.implements(p.IClick)
Expand All @@ -12,3 +12,9 @@ class MixinPlugin(p.SingletonPlugin):

def get_commands(self):
return cli.get_commands()


def get_blueprint(self):
return [
views.resource_blueprint
]
98 changes: 57 additions & 41 deletions ckanext/cloudstorage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from werkzeug.datastructures import FileStorage as FlaskFileStorage
ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)

import logging

log = logging.getLogger(__name__)

def _get_underlying_file(wrapper):
if isinstance(wrapper, FlaskFileStorage):
Expand Down Expand Up @@ -154,6 +157,15 @@ def use_secure_urls(self):
config.get('ckanext.cloudstorage.use_secure_urls', False)
)


@property
def connection_link(self):
"""
The connection link to the container
"""
return config['ckanext.cloudstorage.connection_link']


@property
def aws_use_boto3_sessions(self):
"""
Expand Down Expand Up @@ -245,10 +257,11 @@ def __init__(self, resource):

# Check to see if a file has been provided
if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)):
self.filename = munge.munge_filename(upload_field_storage.filename)
self.file_upload = _get_underlying_file(upload_field_storage)
resource['url'] = self.filename
resource['url_type'] = 'upload'
if len(upload_field_storage.filename):
self.filename = munge.munge_filename(upload_field_storage.filename)
self.file_upload = _get_underlying_file(upload_field_storage)
resource['url'] = self.filename
resource['url_type'] = 'upload'
elif multipart_name and self.can_use_advanced_aws:
# This means that file was successfully uploaded and stored
# at cloud.
Expand Down Expand Up @@ -295,32 +308,24 @@ def upload(self, id, max_size=10):
:param max_size: Ignored.
"""
if self.filename:

if self.can_use_advanced_azure:
from azure.storage import blob as azure_blob
from azure.storage.blob.models import ContentSettings

blob_service = azure_blob.BlockBlobService(
self.driver_options['key'],
self.driver_options['secret']
)
content_settings = None
from azure.storage.blob import ContentSettings # type: ignore
from azure.storage.blob import BlobServiceClient

svc_client = BlobServiceClient.from_connection_string(self.connection_link)
container_client = svc_client.get_container_client(self.container_name)
blob_client = container_client.get_blob_client(self.path_from_filename(
id,
self.filename
))
stream = self.file_upload
blob_client.upload_blob(stream, overwrite=True)
if self.guess_mimetype:
content_type, _ = mimetypes.guess_type(self.filename)
if content_type:
content_settings = ContentSettings(
content_type=content_type
)
blob_client.set_http_headers(ContentSettings(content_type=mimetypes))
return stream.tell()

return blob_service.create_blob_from_stream(
container_name=self.container_name,
blob_name=self.path_from_filename(
id,
self.filename
),
stream=self.file_upload,
content_settings=content_settings
)
else:
# TODO: This might not be needed once libcloud is upgraded
if isinstance(self.file_upload, SpooledTemporaryFile):
Expand Down Expand Up @@ -374,23 +379,34 @@ def get_url_from_filename(self, rid, filename, content_type=None):
# If advanced azure features are enabled, generate a temporary
# shared access link instead of simply redirecting to the file.
if self.can_use_advanced_azure and self.use_secure_urls:
from azure.storage import blob as azure_blob
from azure.storage.blob import BlobClient, BlobSasPermissions, BlobServiceClient, generate_blob_sas

svc_client = BlobServiceClient.from_connection_string(self.connection_link)
container_client = svc_client.get_container_client(self.container_name)
blob_client = container_client.get_blob_client(path)
permissions = BlobSasPermissions(read=True)
token_expires = datetime.utcnow() + timedelta(hours=1)
sas_token = generate_blob_sas(account_name=blob_client.account_name,
account_key=blob_client.credential.account_key,
container_name=blob_client.container_name,
blob_name=blob_client.blob_name,
permission=permissions,
expiry=token_expires)

blob_client = BlobClient(svc_client.url,
container_name=blob_client.container_name,
blob_name=blob_client.blob_name,
credential=sas_token)

# The url from blob_client above actually generate the url for download
# but the file path is mixed with the filename e.g
# we want to download `example.csv` but the url generate this
# `resource12565-316r3example.csv` which is mung of the filename with its path
# hence the below method enable the actual download of the file with its name
url = f'https://{blob_client.account_name}.blob.core.windows.net/{self.container_name}/{path}?{sas_token}'

return url

blob_service = azure_blob.BlockBlobService(
self.driver_options['key'],
self.driver_options['secret']
)

return blob_service.make_blob_url(
container_name=self.container_name,
blob_name=path,
sas_token=blob_service.generate_blob_shared_access_signature(
container_name=self.container_name,
blob_name=path,
expiry=datetime.utcnow() + timedelta(hours=1),
permission=azure_blob.BlobPermissions.READ
)
)
elif self.can_use_advanced_aws and self.use_secure_urls:

from boto.s3.connection import S3Connection
Expand Down
1 change: 1 addition & 0 deletions ckanext/cloudstorage/views/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .resource_download import resource_blueprint
77 changes: 77 additions & 0 deletions ckanext/cloudstorage/views/resource_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os.path

from ckan.plugins.toolkit import c, _
from ckan import logic, model
from ckan.lib import base, uploader
import ckan.lib.helpers as h
from flask import Blueprint
import logging

log = logging.getLogger(__name__)

resource_blueprint = Blueprint('cloudresource', __name__)

def resource_download(id, resource_id, filename= None):
context = {
'model': model,
'session': model.Session,
'user': c.user or c.author,
'auth_user_obj': c.userobj
}

try:
resource = logic.get_action('resource_show')(
context,
{
'id': resource_id
}
)
except logic.NotFound:
base.abort(404, _('Resource not found'))
except logic.NotAuthorized:
base.abort(401, _('Unauthorized to read resource {0}'.format(id)))

# This isn't a file upload, so either redirect to the source
# (if available) or error out.
if resource.get('url_type') != 'upload':
url = resource.get('url')
if not url:
base.abort(404, _('No download is available'))
h.redirect_to(url)

if filename is None:
# No filename was provided so we'll try to get one from the url.
filename = os.path.basename(resource['url'])

upload = uploader.get_resource_uploader(resource)

# if the client requests with a Content-Type header (e.g. Text preview)
# we have to add the header to the signature
try:
content_type = getattr(c.pylons.request, "content_type", None)
except AttributeError:
content_type = None
uploaded_url = upload.get_url_from_filename(resource['id'], filename,
content_type=content_type)

# The uploaded file is missing for some reason, such as the
# provider being down.
if uploaded_url is None:
base.abort(404, _('No download is available'))

return h.redirect_to(uploaded_url)



resource_blueprint.add_url_rule(
rule='/dataset/{id}/resource/{resource_id}/download',
view_func = resource_download,
methods= [u'GET']
)


resource_blueprint.add_url_rule(
rule=u'/dataset/<id>/resource/<resource_id>/download/<filename>',
view_func = resource_download,
methods= [u'GET']
)
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ boto3==1.17.112
apache-libcloud==2.8.3
ckanapi>=1.0,<5
six
azure.storage
azure.storage.blob
azure-storage-blob==12.2.0

0 comments on commit d679b71

Please sign in to comment.