Add files via upload

fayharinn · Apr 8, 2024 · b6712be · b6712be
1 parent 04fa3f5
commit b6712be
Show file tree

Hide file tree

Showing 5 changed files with 390 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,85 @@
+
+# R2Client
+
+`R2Client` is a Python client library for interacting with Cloudflare R2 storage, facilitating easy and quick HTTP requests to manage files within an R2 bucket. This framework leverages Python's native packages to sign and send requests, making it straightforward to integrate into existing Python projects.
+
+## Features
+
+- File upload and download
+- Listing files and folders within a bucket
+
+## Installation
+
+To install `r2client`, simply use pip:
+
+```
+pip install r2client
+```
+
+## Quick Start
+
+Here's how to get started with `r2client`:
+
+### Setting Up
+
+First, import `R2Client` and initialize it with your credentials:
+
+```python
+from r2client import R2Client
+
+# Initialize the R2Client
+client = R2Client(
+    access_key='<ACCESS_KEY>',
+    secret_key='<SECRET_KEY>',
+    endpoint='<ENDPOINT> (example: "https://***.r2.cloudflarestorage.com")'
+)
+```
+
+### Uploading a File
+
+To upload a file to your R2 bucket:
+
+```python
+bucket_name = 'your-bucket-name'
+local_file_path = 'path/to/your/local/file'
+r2_file_key = 'desired/path/in/bucket'
+
+client.upload_file(bucket_name, local_file_path, r2_file_key)
+```
+
+### Downloading a File
+
+To download a file from your R2 bucket:
+
+```python
+file_key = 'path/to/the/file/in/bucket'
+local_file_name = 'path/to/save/the/downloaded/file'
+
+client.download_file(bucket_name, file_key, local_file_name)
+```
+
+### Listing Files
+
+To list files in a specific bucket:
+
+```python
+files_dict = client.list_files(bucket_name)
+print(files_dict)
+```
+
+### Listing Folders
+
+To list folders within a bucket:
+
+```python
+folders = client.list_folders(bucket_name)
+print(folders)
+```
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a pull request or create an issue for any bugs or feature requests.
+
+## License
+
+This project is licensed under the MIT License - see the LICENSE file for details.
diff --git a/r2client/R2Client.py b/r2client/R2Client.py
@@ -0,0 +1,216 @@
+import requests
+import hmac
+import hashlib
+import datetime
+import xml.etree.ElementTree as ET
+from mime_types import *
+
+
+class R2Client:
+    """
+    A client class for interacting with Cloudflare R2 storage with Python native packages.
+
+    :param access_key: The access key for authentication.
+    :param secret_key: The secret key for authentication.
+    :param account_id: The account ID for the R2 storage.
+    """
+
+    def __init__(self, access_key, secret_key, endpoint):
+        self.access_key = access_key
+        self.secret_key = secret_key
+        self.endpoint = endpoint
+
+    def sign(self, key, msg):
+        """
+        Sign a message using the provided key.
+
+        :param key: The key used for signing.
+        :param msg: The message to be signed.
+        :return: The signed message digest.
+        """
+        return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
+
+    def get_signature_key(self, key, date_stamp, region_name, service_name):
+        """
+        Generate a signature key based on the provided parameters.
+
+        :param key: The secret key.
+        :param date_stamp: The date stamp in the format 'YYYYMMDD'.
+        :param region_name: The region name.
+        :param service_name: The service name.
+        :return: The generated signature key.
+        """
+        k_date = self.sign(('AWS4' + key).encode('utf-8'), date_stamp)
+        k_region = self.sign(k_date, region_name)
+        k_service = self.sign(k_region, service_name)
+        k_signing = self.sign(k_service, 'aws4_request')
+        return k_signing
+
+    def create_request_headers_upload(self, bucket_name, file_key=None, payload_hash=None, method='PUT', content_type=None):
+        service = 's3'
+        region = 'auto'
+        host = self.endpoint.split("://")[-1]
+
+        t = datetime.datetime.utcnow()
+        amz_date = t.strftime('%Y%m%dT%H%M%SZ')
+        date_stamp = t.strftime('%Y%m%d')
+
+        canonical_uri = f'/{bucket_name}/{file_key}'
+        canonical_querystring = ''
+        canonical_headers = f"content-type:{content_type}\nhost:{host}\nx-amz-content-sha256:{payload_hash}\nx-amz-date:{amz_date}\n"
+        signed_headers = 'content-type;host;x-amz-content-sha256;x-amz-date'
+
+        canonical_request = f"{method}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}"
+
+        algorithm = 'AWS4-HMAC-SHA256'
+        credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
+        string_to_sign = f"{algorithm}\n{amz_date}\n{credential_scope}\n" + hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
+
+        signing_key = self.get_signature_key(self.secret_key, date_stamp, region, service)
+        signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
+
+        authorization_header = f"{algorithm} Credential={self.access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}"
+
+        headers = {
+            'x-amz-date': amz_date,
+            'x-amz-content-sha256': payload_hash,
+            'Authorization': authorization_header,
+            'Content-Type': content_type
+        }
+
+        return headers
+
+    def create_request_headers(self, bucket_name, file_key=None, payload_hash=None, method='GET', content_type=None):
+        service = 's3'
+        region = 'auto'
+        host = self.endpoint.split("://")[-1]
+
+        t = datetime.datetime.utcnow()
+        amz_date = t.strftime('%Y%m%dT%H%M%SZ')
+        date_stamp = t.strftime('%Y%m%d')
+
+        canonical_uri = f'/{bucket_name}/' if file_key is None else f'/{bucket_name}/{file_key}'
+        canonical_querystring = ''
+        canonical_headers = f"host:{host}\nx-amz-date:{amz_date}\n"
+
+        signed_headers = 'host;x-amz-date'
+        if content_type:
+            canonical_headers += f"content-type:{content_type}\n"
+            signed_headers += ';content-type'
+
+        payload_hash = payload_hash or hashlib.sha256(''.encode('utf-8')).hexdigest()
+        canonical_request = f"{method}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}"
+
+        algorithm = 'AWS4-HMAC-SHA256'
+        credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
+        string_to_sign = f"{algorithm}\n{amz_date}\n{credential_scope}\n" + hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
+
+        signing_key = self.get_signature_key(self.secret_key, date_stamp, region, service)
+        signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
+
+        authorization_header = f"{algorithm} Credential={self.access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}"
+
+        headers = {
+            'x-amz-date': amz_date,
+            'x-amz-content-sha256': payload_hash,
+            'Authorization': authorization_header
+        }
+
+        if content_type:
+            headers['Content-Type'] = content_type
+
+        return headers
+
+
+    def upload_file(self, bucket_name, local_file_path, r2_file_key):
+        file_url = f"{self.endpoint}/{bucket_name}/{r2_file_key}"
+
+        with open(local_file_path, 'rb') as file:
+            file_data = file.read()
+
+        payload_hash = hashlib.sha256(file_data).hexdigest()
+        mimetype = get_content_type(local_file_path)
+        headers = self.create_request_headers_upload(bucket_name, r2_file_key, payload_hash, 'PUT', mimetype)
+
+        response = requests.put(file_url, headers=headers, data=file_data)
+
+        if response.status_code == 200:
+            print(f"File {local_file_path} uploaded successfully as {r2_file_key}.")
+        else:
+            print(f"Failed to upload file {local_file_path}. Status code: {response.status_code}")
+            print("Response Content:", response.text)
+
+    def download_file(self, bucket_name, file_key, local_file_name):
+        """
+        Download a file from the specified bucket.
+
+        :param bucket_name: The name of the bucket.
+        :param file_key: The key of the file to download.
+        :param local_file_name: The local file name to save the downloaded file.
+        """
+        file_url = f"{self.endpoint}/{bucket_name}/{file_key}"
+        mimetype = get_content_type(file_url)
+        headers = self.create_request_headers(bucket_name, file_key)
+
+        response = requests.get(file_url, headers=headers)
+
+        if response.status_code == 200:
+            with open(local_file_name, "wb") as file:
+                file.write(response.content)
+            print(f"File {file_key} downloaded successfully.")
+        else:
+            print(f"Failed to download file {file_key}. Status code: {response.status_code}")
+
+    def list_files(self, bucket_name):
+        """
+        List all files in the specified bucket.
+
+        :param bucket_name: The name of the bucket.
+        :return: A dictionary containing folder names as keys and lists of file names as values.
+        """
+        headers = self.create_request_headers(bucket_name)
+
+        response = requests.get(f"{self.endpoint}/{bucket_name}/", headers=headers)
+
+        if response.status_code == 200:
+            root = ET.fromstring(response.content)
+            files_dict = {}
+            for content in root.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'):
+                file_key = content.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text
+                folder_name = file_key.split('/')[0] if '/' in file_key else ''
+                file_name = file_key.split('/')[-1]
+
+                if folder_name in files_dict:
+                    files_dict[folder_name].append(file_name)
+                else:
+                    files_dict[folder_name] = [file_name]
+
+            return files_dict
+        else:
+            print(f"Failed to retrieve file list. Status code: {response.status_code}")
+            return {}
+
+    def list_folders(self, bucket_name):
+        """
+        List all folders in the specified bucket.
+
+        :param bucket_name: The name of the bucket.
+        :return: A list of folder names.
+        """
+        headers = self.create_request_headers(bucket_name)
+
+        response = requests.get(f"{self.endpoint}/{bucket_name}/", headers=headers)
+
+        if response.status_code == 200:
+            root = ET.fromstring(response.content)
+            folders = set()
+            for content in root.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'):
+                file_key = content.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text
+                if '/' in file_key:
+                    folder_name = file_key.split('/')[0]
+                    folders.add(folder_name)
+            return list(folders)
+        else:
+            print(f"Failed to retrieve folder list. Status code: {response.status_code}")
+            return []
+
diff --git a/r2client/__init__.py b/r2client/__init__.py
diff --git a/r2client/mime_types.py b/r2client/mime_types.py
@@ -0,0 +1,71 @@
+import mimetypes
+
+mime_types = {
+    # Image formats
+    '.png': 'image/png',
+    '.jpg': 'image/jpeg',
+    '.jpeg': 'image/jpeg',
+    '.gif': 'image/gif',
+    '.svg': 'image/svg+xml',  # Scalable Vector Graphics
+    '.ico': 'image/x-icon',    # Icon file format
+
+    # Audio formats
+    '.m4a': 'audio/x-m4a',
+    '.mp3': 'audio/mpeg',
+    '.wav': 'audio/wav',
+    '.ogg': 'audio/ogg',       # Ogg audio format
+
+    # Video formats
+    '.mp4': 'video/mp4',
+    '.avi': 'video/x-msvideo',
+    '.mov': 'video/quicktime',
+    '.flv': 'video/x-flv',
+    '.wmv': 'video/x-ms-wmv',
+    '.webm': 'video/webm',     # WebM video format
+
+    # Document formats
+    '.pdf': 'application/pdf',
+    '.doc': 'application/msword',
+    '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    '.ppt': 'application/vnd.ms-powerpoint',
+    '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    '.xls': 'application/vnd.ms-excel',
+    '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    '.txt': 'text/plain',
+
+    # Web formats
+    '.html': 'text/html',
+    '.css': 'text/css',
+    '.js': 'application/javascript',
+    '.json': 'application/json',
+    '.xml': 'application/xml',
+
+    # Other formats
+    '.csv': 'text/csv',
+    '.zip': 'application/zip',
+    '.tar': 'application/x-tar',
+    '.gz': 'application/gzip',
+    '.rar': 'application/vnd.rar',
+    '.7z': 'application/x-7z-compressed',
+    '.eps': 'application/postscript',  # Encapsulated PostScript format
+    '.sql': 'application/sql',         # SQL files
+    '.java': 'text/x-java-source',      # Java source code
+    # Add more mappings as needed
+}
+
+
+def get_content_type(file_key):
+    """
+    Determine the MIME type based on the file extension using the mimetypes module.
+
+    :param file_key: The file key or file name from which to extract the extension.
+    :return: The MIME type as a string, defaults to 'application/octet-stream' if not detected.
+    """
+      # Try to get the MIME type from the provided dictionary
+    mime_type = mime_types.get("."+file_key.split(".")[-1].lower())
+    if mime_type:
+        return mime_type
+
+    # If not found in the provided dictionary, use the mimetypes module
+    mime_type, _ = mimetypes.guess_type(file_key)
+    return mime_type if mime_type is not None else 'application/octet-stream'
diff --git a/setup.py b/setup.py
@@ -0,0 +1,18 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='r2client',
+    version='0.2',
+    packages=find_packages(),
+    description='A lightweight framework to manage your R2 bucket.',
+    long_description=open('README.md', encoding='utf-8').read(),
+    long_description_content_type='text/markdown',  # Specify the content type as Markdown
+    url='https://github.com/fayharinn/R2-Client',
+    author='Younes Bensitel',
+    author_email='younes@fayhe.fr',
+    license='MIT',
+    install_requires=[
+        'requests',
+    ],
+    python_requires='>=3.6',
+)