automated terminal push
This commit is contained in:
887
cdk-env/lib/python3.12/site-packages/s3transfer/__init__.py
Normal file
887
cdk-env/lib/python3.12/site-packages/s3transfer/__init__.py
Normal file
@@ -0,0 +1,887 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
"""Abstractions over S3's upload/download operations.
|
||||
|
||||
This module provides high level abstractions for efficient
|
||||
uploads/downloads. It handles several things for the user:
|
||||
|
||||
* Automatically switching to multipart transfers when
|
||||
a file is over a specific size threshold
|
||||
* Uploading/downloading a file in parallel
|
||||
* Throttling based on max bandwidth
|
||||
* Progress callbacks to monitor transfers
|
||||
* Retries. While botocore handles retries for streaming uploads,
|
||||
it is not possible for it to handle retries for streaming
|
||||
downloads. This module handles retries for both cases so
|
||||
you don't need to implement any retry logic yourself.
|
||||
|
||||
This module has a reasonable set of defaults. It also allows you
|
||||
to configure many aspects of the transfer process including:
|
||||
|
||||
* Multipart threshold size
|
||||
* Max parallel downloads
|
||||
* Max bandwidth
|
||||
* Socket timeouts
|
||||
* Retry amounts
|
||||
|
||||
There is no support for s3->s3 multipart copies at this
|
||||
time.
|
||||
|
||||
|
||||
.. _ref_s3transfer_usage:
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
The simplest way to use this module is:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
client = boto3.client('s3', 'us-west-2')
|
||||
transfer = S3Transfer(client)
|
||||
# Upload /tmp/myfile to s3://bucket/key
|
||||
transfer.upload_file('/tmp/myfile', 'bucket', 'key')
|
||||
|
||||
# Download s3://bucket/key to /tmp/myfile
|
||||
transfer.download_file('bucket', 'key', '/tmp/myfile')
|
||||
|
||||
The ``upload_file`` and ``download_file`` methods also accept
|
||||
``**kwargs``, which will be forwarded through to the corresponding
|
||||
client operation. Here are a few examples using ``upload_file``::
|
||||
|
||||
# Making the object public
|
||||
transfer.upload_file('/tmp/myfile', 'bucket', 'key',
|
||||
extra_args={'ACL': 'public-read'})
|
||||
|
||||
# Setting metadata
|
||||
transfer.upload_file('/tmp/myfile', 'bucket', 'key',
|
||||
extra_args={'Metadata': {'a': 'b', 'c': 'd'}})
|
||||
|
||||
# Setting content type
|
||||
transfer.upload_file('/tmp/myfile.json', 'bucket', 'key',
|
||||
extra_args={'ContentType': "application/json"})
|
||||
|
||||
|
||||
The ``S3Transfer`` class also supports progress callbacks so you can
|
||||
provide transfer progress to users. Both the ``upload_file`` and
|
||||
``download_file`` methods take an optional ``callback`` parameter.
|
||||
Here's an example of how to print a simple progress percentage
|
||||
to the user:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class ProgressPercentage(object):
|
||||
def __init__(self, filename):
|
||||
self._filename = filename
|
||||
self._size = float(os.path.getsize(filename))
|
||||
self._seen_so_far = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def __call__(self, bytes_amount):
|
||||
# To simplify we'll assume this is hooked up
|
||||
# to a single filename.
|
||||
with self._lock:
|
||||
self._seen_so_far += bytes_amount
|
||||
percentage = (self._seen_so_far / self._size) * 100
|
||||
sys.stdout.write(
|
||||
"\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far,
|
||||
self._size, percentage))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
transfer = S3Transfer(boto3.client('s3', 'us-west-2'))
|
||||
# Upload /tmp/myfile to s3://bucket/key and print upload progress.
|
||||
transfer.upload_file('/tmp/myfile', 'bucket', 'key',
|
||||
callback=ProgressPercentage('/tmp/myfile'))
|
||||
|
||||
|
||||
|
||||
You can also provide a TransferConfig object to the S3Transfer
|
||||
object that gives you more fine grained control over the
|
||||
transfer. For example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
client = boto3.client('s3', 'us-west-2')
|
||||
config = TransferConfig(
|
||||
multipart_threshold=8 * 1024 * 1024,
|
||||
max_concurrency=10,
|
||||
num_download_attempts=10,
|
||||
)
|
||||
transfer = S3Transfer(client, config)
|
||||
transfer.upload_file('/tmp/foo', 'bucket', 'key')
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import concurrent.futures
|
||||
import functools
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import queue
|
||||
import random
|
||||
import socket
|
||||
import string
|
||||
import threading
|
||||
|
||||
from botocore.compat import six # noqa: F401
|
||||
from botocore.exceptions import IncompleteReadError, ResponseStreamingError
|
||||
from botocore.vendored.requests.packages.urllib3.exceptions import (
|
||||
ReadTimeoutError,
|
||||
)
|
||||
|
||||
import s3transfer.compat
|
||||
from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
|
||||
|
||||
__author__ = 'Amazon Web Services'
|
||||
__version__ = '0.13.0'
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(NullHandler())
|
||||
|
||||
MB = 1024 * 1024
|
||||
SHUTDOWN_SENTINEL = object()
|
||||
|
||||
|
||||
def random_file_extension(num_digits=8):
|
||||
return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
|
||||
|
||||
|
||||
def disable_upload_callbacks(request, operation_name, **kwargs):
|
||||
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
|
||||
request.body, 'disable_callback'
|
||||
):
|
||||
request.body.disable_callback()
|
||||
|
||||
|
||||
def enable_upload_callbacks(request, operation_name, **kwargs):
|
||||
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
|
||||
request.body, 'enable_callback'
|
||||
):
|
||||
request.body.enable_callback()
|
||||
|
||||
|
||||
class QueueShutdownError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ReadFileChunk:
|
||||
def __init__(
|
||||
self,
|
||||
fileobj,
|
||||
start_byte,
|
||||
chunk_size,
|
||||
full_file_size,
|
||||
callback=None,
|
||||
enable_callback=True,
|
||||
):
|
||||
"""
|
||||
|
||||
Given a file object shown below:
|
||||
|
||||
|___________________________________________________|
|
||||
0 | | full_file_size
|
||||
|----chunk_size---|
|
||||
start_byte
|
||||
|
||||
:type fileobj: file
|
||||
:param fileobj: File like object
|
||||
|
||||
:type start_byte: int
|
||||
:param start_byte: The first byte from which to start reading.
|
||||
|
||||
:type chunk_size: int
|
||||
:param chunk_size: The max chunk size to read. Trying to read
|
||||
pass the end of the chunk size will behave like you've
|
||||
reached the end of the file.
|
||||
|
||||
:type full_file_size: int
|
||||
:param full_file_size: The entire content length associated
|
||||
with ``fileobj``.
|
||||
|
||||
:type callback: function(amount_read)
|
||||
:param callback: Called whenever data is read from this object.
|
||||
|
||||
"""
|
||||
self._fileobj = fileobj
|
||||
self._start_byte = start_byte
|
||||
self._size = self._calculate_file_size(
|
||||
self._fileobj,
|
||||
requested_size=chunk_size,
|
||||
start_byte=start_byte,
|
||||
actual_file_size=full_file_size,
|
||||
)
|
||||
self._fileobj.seek(self._start_byte)
|
||||
self._amount_read = 0
|
||||
self._callback = callback
|
||||
self._callback_enabled = enable_callback
|
||||
|
||||
@classmethod
|
||||
def from_filename(
|
||||
cls,
|
||||
filename,
|
||||
start_byte,
|
||||
chunk_size,
|
||||
callback=None,
|
||||
enable_callback=True,
|
||||
):
|
||||
"""Convenience factory function to create from a filename.
|
||||
|
||||
:type start_byte: int
|
||||
:param start_byte: The first byte from which to start reading.
|
||||
|
||||
:type chunk_size: int
|
||||
:param chunk_size: The max chunk size to read. Trying to read
|
||||
pass the end of the chunk size will behave like you've
|
||||
reached the end of the file.
|
||||
|
||||
:type full_file_size: int
|
||||
:param full_file_size: The entire content length associated
|
||||
with ``fileobj``.
|
||||
|
||||
:type callback: function(amount_read)
|
||||
:param callback: Called whenever data is read from this object.
|
||||
|
||||
:type enable_callback: bool
|
||||
:param enable_callback: Indicate whether to invoke callback
|
||||
during read() calls.
|
||||
|
||||
:rtype: ``ReadFileChunk``
|
||||
:return: A new instance of ``ReadFileChunk``
|
||||
|
||||
"""
|
||||
f = open(filename, 'rb')
|
||||
file_size = os.fstat(f.fileno()).st_size
|
||||
return cls(
|
||||
f, start_byte, chunk_size, file_size, callback, enable_callback
|
||||
)
|
||||
|
||||
def _calculate_file_size(
|
||||
self, fileobj, requested_size, start_byte, actual_file_size
|
||||
):
|
||||
max_chunk_size = actual_file_size - start_byte
|
||||
return min(max_chunk_size, requested_size)
|
||||
|
||||
def read(self, amount=None):
|
||||
if amount is None:
|
||||
amount_to_read = self._size - self._amount_read
|
||||
else:
|
||||
amount_to_read = min(self._size - self._amount_read, amount)
|
||||
data = self._fileobj.read(amount_to_read)
|
||||
self._amount_read += len(data)
|
||||
if self._callback is not None and self._callback_enabled:
|
||||
self._callback(len(data))
|
||||
return data
|
||||
|
||||
def enable_callback(self):
|
||||
self._callback_enabled = True
|
||||
|
||||
def disable_callback(self):
|
||||
self._callback_enabled = False
|
||||
|
||||
def seek(self, where):
|
||||
self._fileobj.seek(self._start_byte + where)
|
||||
if self._callback is not None and self._callback_enabled:
|
||||
# To also rewind the callback() for an accurate progress report
|
||||
self._callback(where - self._amount_read)
|
||||
self._amount_read = where
|
||||
|
||||
def close(self):
|
||||
self._fileobj.close()
|
||||
|
||||
def tell(self):
|
||||
return self._amount_read
|
||||
|
||||
def __len__(self):
|
||||
# __len__ is defined because requests will try to determine the length
|
||||
# of the stream to set a content length. In the normal case
|
||||
# of the file it will just stat the file, but we need to change that
|
||||
# behavior. By providing a __len__, requests will use that instead
|
||||
# of stat'ing the file.
|
||||
return self._size
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.close()
|
||||
|
||||
def __iter__(self):
|
||||
# This is a workaround for http://bugs.python.org/issue17575
|
||||
# Basically httplib will try to iterate over the contents, even
|
||||
# if its a file like object. This wasn't noticed because we've
|
||||
# already exhausted the stream so iterating over the file immediately
|
||||
# stops, which is what we're simulating here.
|
||||
return iter([])
|
||||
|
||||
|
||||
class StreamReaderProgress:
|
||||
"""Wrapper for a read only stream that adds progress callbacks."""
|
||||
|
||||
def __init__(self, stream, callback=None):
|
||||
self._stream = stream
|
||||
self._callback = callback
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
value = self._stream.read(*args, **kwargs)
|
||||
if self._callback is not None:
|
||||
self._callback(len(value))
|
||||
return value
|
||||
|
||||
|
||||
class OSUtils:
|
||||
def get_file_size(self, filename):
|
||||
return os.path.getsize(filename)
|
||||
|
||||
def open_file_chunk_reader(self, filename, start_byte, size, callback):
|
||||
return ReadFileChunk.from_filename(
|
||||
filename, start_byte, size, callback, enable_callback=False
|
||||
)
|
||||
|
||||
def open(self, filename, mode):
|
||||
return open(filename, mode)
|
||||
|
||||
def remove_file(self, filename):
|
||||
"""Remove a file, noop if file does not exist."""
|
||||
# Unlike os.remove, if the file does not exist,
|
||||
# then this method does nothing.
|
||||
try:
|
||||
os.remove(filename)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def rename_file(self, current_filename, new_filename):
|
||||
s3transfer.compat.rename_file(current_filename, new_filename)
|
||||
|
||||
|
||||
class MultipartUploader:
|
||||
# These are the extra_args that need to be forwarded onto
|
||||
# subsequent upload_parts.
|
||||
UPLOAD_PART_ARGS = [
|
||||
'SSECustomerKey',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
executor_cls=concurrent.futures.ThreadPoolExecutor,
|
||||
):
|
||||
self._client = client
|
||||
self._config = config
|
||||
self._os = osutil
|
||||
self._executor_cls = executor_cls
|
||||
|
||||
def _extra_upload_part_args(self, extra_args):
|
||||
# Only the args in UPLOAD_PART_ARGS actually need to be passed
|
||||
# onto the upload_part calls.
|
||||
upload_parts_args = {}
|
||||
for key, value in extra_args.items():
|
||||
if key in self.UPLOAD_PART_ARGS:
|
||||
upload_parts_args[key] = value
|
||||
return upload_parts_args
|
||||
|
||||
def upload_file(self, filename, bucket, key, callback, extra_args):
|
||||
response = self._client.create_multipart_upload(
|
||||
Bucket=bucket, Key=key, **extra_args
|
||||
)
|
||||
upload_id = response['UploadId']
|
||||
try:
|
||||
parts = self._upload_parts(
|
||||
upload_id, filename, bucket, key, callback, extra_args
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Exception raised while uploading parts, "
|
||||
"aborting multipart upload.",
|
||||
exc_info=True,
|
||||
)
|
||||
self._client.abort_multipart_upload(
|
||||
Bucket=bucket, Key=key, UploadId=upload_id
|
||||
)
|
||||
raise S3UploadFailedError(
|
||||
"Failed to upload {} to {}: {}".format(
|
||||
filename, '/'.join([bucket, key]), e
|
||||
)
|
||||
)
|
||||
self._client.complete_multipart_upload(
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
MultipartUpload={'Parts': parts},
|
||||
)
|
||||
|
||||
def _upload_parts(
|
||||
self, upload_id, filename, bucket, key, callback, extra_args
|
||||
):
|
||||
upload_parts_extra_args = self._extra_upload_part_args(extra_args)
|
||||
parts = []
|
||||
part_size = self._config.multipart_chunksize
|
||||
num_parts = int(
|
||||
math.ceil(self._os.get_file_size(filename) / float(part_size))
|
||||
)
|
||||
max_workers = self._config.max_concurrency
|
||||
with self._executor_cls(max_workers=max_workers) as executor:
|
||||
upload_partial = functools.partial(
|
||||
self._upload_one_part,
|
||||
filename,
|
||||
bucket,
|
||||
key,
|
||||
upload_id,
|
||||
part_size,
|
||||
upload_parts_extra_args,
|
||||
callback,
|
||||
)
|
||||
for part in executor.map(upload_partial, range(1, num_parts + 1)):
|
||||
parts.append(part)
|
||||
return parts
|
||||
|
||||
def _upload_one_part(
|
||||
self,
|
||||
filename,
|
||||
bucket,
|
||||
key,
|
||||
upload_id,
|
||||
part_size,
|
||||
extra_args,
|
||||
callback,
|
||||
part_number,
|
||||
):
|
||||
open_chunk_reader = self._os.open_file_chunk_reader
|
||||
with open_chunk_reader(
|
||||
filename, part_size * (part_number - 1), part_size, callback
|
||||
) as body:
|
||||
response = self._client.upload_part(
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
PartNumber=part_number,
|
||||
Body=body,
|
||||
**extra_args,
|
||||
)
|
||||
etag = response['ETag']
|
||||
return {'ETag': etag, 'PartNumber': part_number}
|
||||
|
||||
|
||||
class ShutdownQueue(queue.Queue):
|
||||
"""A queue implementation that can be shutdown.
|
||||
|
||||
Shutting down a queue means that this class adds a
|
||||
trigger_shutdown method that will trigger all subsequent
|
||||
calls to put() to fail with a ``QueueShutdownError``.
|
||||
|
||||
It purposefully deviates from queue.Queue, and is *not* meant
|
||||
to be a drop in replacement for ``queue.Queue``.
|
||||
|
||||
"""
|
||||
|
||||
def _init(self, maxsize):
|
||||
self._shutdown = False
|
||||
self._shutdown_lock = threading.Lock()
|
||||
# queue.Queue is an old style class so we don't use super().
|
||||
return queue.Queue._init(self, maxsize)
|
||||
|
||||
def trigger_shutdown(self):
|
||||
with self._shutdown_lock:
|
||||
self._shutdown = True
|
||||
logger.debug("The IO queue is now shutdown.")
|
||||
|
||||
def put(self, item):
|
||||
# Note: this is not sufficient, it's still possible to deadlock!
|
||||
# Need to hook into the condition vars used by this class.
|
||||
with self._shutdown_lock:
|
||||
if self._shutdown:
|
||||
raise QueueShutdownError(
|
||||
"Cannot put item to queue when " "queue has been shutdown."
|
||||
)
|
||||
return queue.Queue.put(self, item)
|
||||
|
||||
|
||||
class MultipartDownloader:
|
||||
def __init__(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
executor_cls=concurrent.futures.ThreadPoolExecutor,
|
||||
):
|
||||
self._client = client
|
||||
self._config = config
|
||||
self._os = osutil
|
||||
self._executor_cls = executor_cls
|
||||
self._ioqueue = ShutdownQueue(self._config.max_io_queue)
|
||||
|
||||
def download_file(
|
||||
self, bucket, key, filename, object_size, extra_args, callback=None
|
||||
):
|
||||
with self._executor_cls(max_workers=2) as controller:
|
||||
# 1 thread for the future that manages the uploading of files
|
||||
# 1 thread for the future that manages IO writes.
|
||||
download_parts_handler = functools.partial(
|
||||
self._download_file_as_future,
|
||||
bucket,
|
||||
key,
|
||||
filename,
|
||||
object_size,
|
||||
callback,
|
||||
)
|
||||
parts_future = controller.submit(download_parts_handler)
|
||||
|
||||
io_writes_handler = functools.partial(
|
||||
self._perform_io_writes, filename
|
||||
)
|
||||
io_future = controller.submit(io_writes_handler)
|
||||
results = concurrent.futures.wait(
|
||||
[parts_future, io_future],
|
||||
return_when=concurrent.futures.FIRST_EXCEPTION,
|
||||
)
|
||||
self._process_future_results(results)
|
||||
|
||||
def _process_future_results(self, futures):
|
||||
finished, unfinished = futures
|
||||
for future in finished:
|
||||
future.result()
|
||||
|
||||
def _download_file_as_future(
|
||||
self, bucket, key, filename, object_size, callback
|
||||
):
|
||||
part_size = self._config.multipart_chunksize
|
||||
num_parts = int(math.ceil(object_size / float(part_size)))
|
||||
max_workers = self._config.max_concurrency
|
||||
download_partial = functools.partial(
|
||||
self._download_range,
|
||||
bucket,
|
||||
key,
|
||||
filename,
|
||||
part_size,
|
||||
num_parts,
|
||||
callback,
|
||||
)
|
||||
try:
|
||||
with self._executor_cls(max_workers=max_workers) as executor:
|
||||
list(executor.map(download_partial, range(num_parts)))
|
||||
finally:
|
||||
self._ioqueue.put(SHUTDOWN_SENTINEL)
|
||||
|
||||
def _calculate_range_param(self, part_size, part_index, num_parts):
|
||||
start_range = part_index * part_size
|
||||
if part_index == num_parts - 1:
|
||||
end_range = ''
|
||||
else:
|
||||
end_range = start_range + part_size - 1
|
||||
range_param = f'bytes={start_range}-{end_range}'
|
||||
return range_param
|
||||
|
||||
def _download_range(
|
||||
self, bucket, key, filename, part_size, num_parts, callback, part_index
|
||||
):
|
||||
try:
|
||||
range_param = self._calculate_range_param(
|
||||
part_size, part_index, num_parts
|
||||
)
|
||||
|
||||
max_attempts = self._config.num_download_attempts
|
||||
last_exception = None
|
||||
for i in range(max_attempts):
|
||||
try:
|
||||
logger.debug("Making get_object call.")
|
||||
response = self._client.get_object(
|
||||
Bucket=bucket, Key=key, Range=range_param
|
||||
)
|
||||
streaming_body = StreamReaderProgress(
|
||||
response['Body'], callback
|
||||
)
|
||||
buffer_size = 1024 * 16
|
||||
current_index = part_size * part_index
|
||||
for chunk in iter(
|
||||
lambda: streaming_body.read(buffer_size), b''
|
||||
):
|
||||
self._ioqueue.put((current_index, chunk))
|
||||
current_index += len(chunk)
|
||||
return
|
||||
except (
|
||||
socket.timeout,
|
||||
OSError,
|
||||
ReadTimeoutError,
|
||||
IncompleteReadError,
|
||||
ResponseStreamingError,
|
||||
) as e:
|
||||
logger.debug(
|
||||
"Retrying exception caught (%s), "
|
||||
"retrying request, (attempt %s / %s)",
|
||||
e,
|
||||
i,
|
||||
max_attempts,
|
||||
exc_info=True,
|
||||
)
|
||||
last_exception = e
|
||||
continue
|
||||
raise RetriesExceededError(last_exception)
|
||||
finally:
|
||||
logger.debug("EXITING _download_range for part: %s", part_index)
|
||||
|
||||
def _perform_io_writes(self, filename):
|
||||
with self._os.open(filename, 'wb') as f:
|
||||
while True:
|
||||
task = self._ioqueue.get()
|
||||
if task is SHUTDOWN_SENTINEL:
|
||||
logger.debug(
|
||||
"Shutdown sentinel received in IO handler, "
|
||||
"shutting down IO handler."
|
||||
)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
offset, data = task
|
||||
f.seek(offset)
|
||||
f.write(data)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Caught exception in IO thread: %s",
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
self._ioqueue.trigger_shutdown()
|
||||
raise
|
||||
|
||||
|
||||
class TransferConfig:
|
||||
def __init__(
|
||||
self,
|
||||
multipart_threshold=8 * MB,
|
||||
max_concurrency=10,
|
||||
multipart_chunksize=8 * MB,
|
||||
num_download_attempts=5,
|
||||
max_io_queue=100,
|
||||
):
|
||||
self.multipart_threshold = multipart_threshold
|
||||
self.max_concurrency = max_concurrency
|
||||
self.multipart_chunksize = multipart_chunksize
|
||||
self.num_download_attempts = num_download_attempts
|
||||
self.max_io_queue = max_io_queue
|
||||
|
||||
|
||||
class S3Transfer:
|
||||
ALLOWED_DOWNLOAD_ARGS = [
|
||||
'VersionId',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
]
|
||||
|
||||
ALLOWED_UPLOAD_ARGS = [
|
||||
'ACL',
|
||||
'CacheControl',
|
||||
'ContentDisposition',
|
||||
'ContentEncoding',
|
||||
'ContentLanguage',
|
||||
'ContentType',
|
||||
'Expires',
|
||||
'GrantFullControl',
|
||||
'GrantRead',
|
||||
'GrantReadACP',
|
||||
'GrantWriteACL',
|
||||
'Metadata',
|
||||
'RequestPayer',
|
||||
'ServerSideEncryption',
|
||||
'StorageClass',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerKeyMD5',
|
||||
'SSEKMSKeyId',
|
||||
'SSEKMSEncryptionContext',
|
||||
'Tagging',
|
||||
]
|
||||
|
||||
def __init__(self, client, config=None, osutil=None):
|
||||
self._client = client
|
||||
self._client.meta.events.register(
|
||||
'before-call.s3.*', self._update_checksum_context
|
||||
)
|
||||
if config is None:
|
||||
config = TransferConfig()
|
||||
self._config = config
|
||||
if osutil is None:
|
||||
osutil = OSUtils()
|
||||
self._osutil = osutil
|
||||
|
||||
def _update_checksum_context(self, params, **kwargs):
|
||||
request_context = params.get("context", {})
|
||||
checksum_context = request_context.get("checksum", {})
|
||||
if "request_algorithm" in checksum_context:
|
||||
# Force request checksum algorithm in the header if specified.
|
||||
checksum_context["request_algorithm"]["in"] = "header"
|
||||
|
||||
def upload_file(
|
||||
self, filename, bucket, key, callback=None, extra_args=None
|
||||
):
|
||||
"""Upload a file to an S3 object.
|
||||
|
||||
Variants have also been injected into S3 client, Bucket and Object.
|
||||
You don't have to use S3Transfer.upload_file() directly.
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
|
||||
events = self._client.meta.events
|
||||
events.register_first(
|
||||
'request-created.s3',
|
||||
disable_upload_callbacks,
|
||||
unique_id='s3upload-callback-disable',
|
||||
)
|
||||
events.register_last(
|
||||
'request-created.s3',
|
||||
enable_upload_callbacks,
|
||||
unique_id='s3upload-callback-enable',
|
||||
)
|
||||
if (
|
||||
self._osutil.get_file_size(filename)
|
||||
>= self._config.multipart_threshold
|
||||
):
|
||||
self._multipart_upload(filename, bucket, key, callback, extra_args)
|
||||
else:
|
||||
self._put_object(filename, bucket, key, callback, extra_args)
|
||||
|
||||
def _put_object(self, filename, bucket, key, callback, extra_args):
|
||||
# We're using open_file_chunk_reader so we can take advantage of the
|
||||
# progress callback functionality.
|
||||
open_chunk_reader = self._osutil.open_file_chunk_reader
|
||||
with open_chunk_reader(
|
||||
filename,
|
||||
0,
|
||||
self._osutil.get_file_size(filename),
|
||||
callback=callback,
|
||||
) as body:
|
||||
self._client.put_object(
|
||||
Bucket=bucket, Key=key, Body=body, **extra_args
|
||||
)
|
||||
|
||||
def download_file(
|
||||
self, bucket, key, filename, extra_args=None, callback=None
|
||||
):
|
||||
"""Download an S3 object to a file.
|
||||
|
||||
Variants have also been injected into S3 client, Bucket and Object.
|
||||
You don't have to use S3Transfer.download_file() directly.
|
||||
"""
|
||||
# This method will issue a ``head_object`` request to determine
|
||||
# the size of the S3 object. This is used to determine if the
|
||||
# object is downloaded in parallel.
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
|
||||
object_size = self._object_size(bucket, key, extra_args)
|
||||
temp_filename = filename + os.extsep + random_file_extension()
|
||||
try:
|
||||
self._download_file(
|
||||
bucket, key, temp_filename, object_size, extra_args, callback
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Exception caught in download_file, removing partial "
|
||||
"file: %s",
|
||||
temp_filename,
|
||||
exc_info=True,
|
||||
)
|
||||
self._osutil.remove_file(temp_filename)
|
||||
raise
|
||||
else:
|
||||
self._osutil.rename_file(temp_filename, filename)
|
||||
|
||||
def _download_file(
|
||||
self, bucket, key, filename, object_size, extra_args, callback
|
||||
):
|
||||
if object_size >= self._config.multipart_threshold:
|
||||
self._ranged_download(
|
||||
bucket, key, filename, object_size, extra_args, callback
|
||||
)
|
||||
else:
|
||||
self._get_object(bucket, key, filename, extra_args, callback)
|
||||
|
||||
def _validate_all_known_args(self, actual, allowed):
|
||||
for kwarg in actual:
|
||||
if kwarg not in allowed:
|
||||
raise ValueError(
|
||||
f"Invalid extra_args key '{kwarg}', "
|
||||
f"must be one of: {', '.join(allowed)}"
|
||||
)
|
||||
|
||||
def _ranged_download(
|
||||
self, bucket, key, filename, object_size, extra_args, callback
|
||||
):
|
||||
downloader = MultipartDownloader(
|
||||
self._client, self._config, self._osutil
|
||||
)
|
||||
downloader.download_file(
|
||||
bucket, key, filename, object_size, extra_args, callback
|
||||
)
|
||||
|
||||
def _get_object(self, bucket, key, filename, extra_args, callback):
|
||||
# precondition: num_download_attempts > 0
|
||||
max_attempts = self._config.num_download_attempts
|
||||
last_exception = None
|
||||
for i in range(max_attempts):
|
||||
try:
|
||||
return self._do_get_object(
|
||||
bucket, key, filename, extra_args, callback
|
||||
)
|
||||
except (
|
||||
socket.timeout,
|
||||
OSError,
|
||||
ReadTimeoutError,
|
||||
IncompleteReadError,
|
||||
ResponseStreamingError,
|
||||
) as e:
|
||||
# TODO: we need a way to reset the callback if the
|
||||
# download failed.
|
||||
logger.debug(
|
||||
"Retrying exception caught (%s), "
|
||||
"retrying request, (attempt %s / %s)",
|
||||
e,
|
||||
i,
|
||||
max_attempts,
|
||||
exc_info=True,
|
||||
)
|
||||
last_exception = e
|
||||
continue
|
||||
raise RetriesExceededError(last_exception)
|
||||
|
||||
def _do_get_object(self, bucket, key, filename, extra_args, callback):
|
||||
response = self._client.get_object(
|
||||
Bucket=bucket, Key=key, **extra_args
|
||||
)
|
||||
streaming_body = StreamReaderProgress(response['Body'], callback)
|
||||
with self._osutil.open(filename, 'wb') as f:
|
||||
for chunk in iter(lambda: streaming_body.read(8192), b''):
|
||||
f.write(chunk)
|
||||
|
||||
def _object_size(self, bucket, key, extra_args):
|
||||
return self._client.head_object(Bucket=bucket, Key=key, **extra_args)[
|
||||
'ContentLength'
|
||||
]
|
||||
|
||||
def _multipart_upload(self, filename, bucket, key, callback, extra_args):
|
||||
uploader = MultipartUploader(self._client, self._config, self._osutil)
|
||||
uploader.upload_file(filename, bucket, key, callback, extra_args)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
437
cdk-env/lib/python3.12/site-packages/s3transfer/bandwidth.py
Normal file
437
cdk-env/lib/python3.12/site-packages/s3transfer/bandwidth.py
Normal file
@@ -0,0 +1,437 @@
|
||||
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
class RequestExceededException(Exception):
|
||||
def __init__(self, requested_amt, retry_time):
|
||||
"""Error when requested amount exceeds what is allowed
|
||||
|
||||
The request that raised this error should be retried after waiting
|
||||
the time specified by ``retry_time``.
|
||||
|
||||
:type requested_amt: int
|
||||
:param requested_amt: The originally requested byte amount
|
||||
|
||||
:type retry_time: float
|
||||
:param retry_time: The length in time to wait to retry for the
|
||||
requested amount
|
||||
"""
|
||||
self.requested_amt = requested_amt
|
||||
self.retry_time = retry_time
|
||||
msg = f'Request amount {requested_amt} exceeded the amount available. Retry in {retry_time}'
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
class RequestToken:
|
||||
"""A token to pass as an identifier when consuming from the LeakyBucket"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TimeUtils:
|
||||
def time(self):
|
||||
"""Get the current time back
|
||||
|
||||
:rtype: float
|
||||
:returns: The current time in seconds
|
||||
"""
|
||||
return time.time()
|
||||
|
||||
def sleep(self, value):
|
||||
"""Sleep for a designated time
|
||||
|
||||
:type value: float
|
||||
:param value: The time to sleep for in seconds
|
||||
"""
|
||||
return time.sleep(value)
|
||||
|
||||
|
||||
class BandwidthLimiter:
|
||||
def __init__(self, leaky_bucket, time_utils=None):
|
||||
"""Limits bandwidth for shared S3 transfers
|
||||
|
||||
:type leaky_bucket: LeakyBucket
|
||||
:param leaky_bucket: The leaky bucket to use limit bandwidth
|
||||
|
||||
:type time_utils: TimeUtils
|
||||
:param time_utils: Time utility to use for interacting with time.
|
||||
"""
|
||||
self._leaky_bucket = leaky_bucket
|
||||
self._time_utils = time_utils
|
||||
if time_utils is None:
|
||||
self._time_utils = TimeUtils()
|
||||
|
||||
def get_bandwith_limited_stream(
|
||||
self, fileobj, transfer_coordinator, enabled=True
|
||||
):
|
||||
"""Wraps a fileobj in a bandwidth limited stream wrapper
|
||||
|
||||
:type fileobj: file-like obj
|
||||
:param fileobj: The file-like obj to wrap
|
||||
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
param transfer_coordinator: The coordinator for the general transfer
|
||||
that the wrapped stream is a part of
|
||||
|
||||
:type enabled: boolean
|
||||
:param enabled: Whether bandwidth limiting should be enabled to start
|
||||
"""
|
||||
stream = BandwidthLimitedStream(
|
||||
fileobj, self._leaky_bucket, transfer_coordinator, self._time_utils
|
||||
)
|
||||
if not enabled:
|
||||
stream.disable_bandwidth_limiting()
|
||||
return stream
|
||||
|
||||
|
||||
class BandwidthLimitedStream:
|
||||
def __init__(
|
||||
self,
|
||||
fileobj,
|
||||
leaky_bucket,
|
||||
transfer_coordinator,
|
||||
time_utils=None,
|
||||
bytes_threshold=256 * 1024,
|
||||
):
|
||||
"""Limits bandwidth for reads on a wrapped stream
|
||||
|
||||
:type fileobj: file-like object
|
||||
:param fileobj: The file like object to wrap
|
||||
|
||||
:type leaky_bucket: LeakyBucket
|
||||
:param leaky_bucket: The leaky bucket to use to throttle reads on
|
||||
the stream
|
||||
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
param transfer_coordinator: The coordinator for the general transfer
|
||||
that the wrapped stream is a part of
|
||||
|
||||
:type time_utils: TimeUtils
|
||||
:param time_utils: The time utility to use for interacting with time
|
||||
"""
|
||||
self._fileobj = fileobj
|
||||
self._leaky_bucket = leaky_bucket
|
||||
self._transfer_coordinator = transfer_coordinator
|
||||
self._time_utils = time_utils
|
||||
if time_utils is None:
|
||||
self._time_utils = TimeUtils()
|
||||
self._bandwidth_limiting_enabled = True
|
||||
self._request_token = RequestToken()
|
||||
self._bytes_seen = 0
|
||||
self._bytes_threshold = bytes_threshold
|
||||
|
||||
def enable_bandwidth_limiting(self):
|
||||
"""Enable bandwidth limiting on reads to the stream"""
|
||||
self._bandwidth_limiting_enabled = True
|
||||
|
||||
def disable_bandwidth_limiting(self):
|
||||
"""Disable bandwidth limiting on reads to the stream"""
|
||||
self._bandwidth_limiting_enabled = False
|
||||
|
||||
def read(self, amount):
|
||||
"""Read a specified amount
|
||||
|
||||
Reads will only be throttled if bandwidth limiting is enabled.
|
||||
"""
|
||||
if not self._bandwidth_limiting_enabled:
|
||||
return self._fileobj.read(amount)
|
||||
|
||||
# We do not want to be calling consume on every read as the read
|
||||
# amounts can be small causing the lock of the leaky bucket to
|
||||
# introduce noticeable overhead. So instead we keep track of
|
||||
# how many bytes we have seen and only call consume once we pass a
|
||||
# certain threshold.
|
||||
self._bytes_seen += amount
|
||||
if self._bytes_seen < self._bytes_threshold:
|
||||
return self._fileobj.read(amount)
|
||||
|
||||
self._consume_through_leaky_bucket()
|
||||
return self._fileobj.read(amount)
|
||||
|
||||
def _consume_through_leaky_bucket(self):
|
||||
# NOTE: If the read amount on the stream are high, it will result
|
||||
# in large bursty behavior as there is not an interface for partial
|
||||
# reads. However given the read's on this abstraction are at most 256KB
|
||||
# (via downloads), it reduces the burstiness to be small KB bursts at
|
||||
# worst.
|
||||
while not self._transfer_coordinator.exception:
|
||||
try:
|
||||
self._leaky_bucket.consume(
|
||||
self._bytes_seen, self._request_token
|
||||
)
|
||||
self._bytes_seen = 0
|
||||
return
|
||||
except RequestExceededException as e:
|
||||
self._time_utils.sleep(e.retry_time)
|
||||
else:
|
||||
raise self._transfer_coordinator.exception
|
||||
|
||||
def signal_transferring(self):
|
||||
"""Signal that data being read is being transferred to S3"""
|
||||
self.enable_bandwidth_limiting()
|
||||
|
||||
def signal_not_transferring(self):
|
||||
"""Signal that data being read is not being transferred to S3"""
|
||||
self.disable_bandwidth_limiting()
|
||||
|
||||
def seek(self, where, whence=0):
|
||||
self._fileobj.seek(where, whence)
|
||||
|
||||
def tell(self):
|
||||
return self._fileobj.tell()
|
||||
|
||||
def close(self):
|
||||
if self._bandwidth_limiting_enabled and self._bytes_seen:
|
||||
# This handles the case where the file is small enough to never
|
||||
# trigger the threshold and thus is never subjugated to the
|
||||
# leaky bucket on read(). This specifically happens for small
|
||||
# uploads. So instead to account for those bytes, have
|
||||
# it go through the leaky bucket when the file gets closed.
|
||||
self._consume_through_leaky_bucket()
|
||||
self._fileobj.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.close()
|
||||
|
||||
|
||||
class LeakyBucket:
|
||||
def __init__(
|
||||
self,
|
||||
max_rate,
|
||||
time_utils=None,
|
||||
rate_tracker=None,
|
||||
consumption_scheduler=None,
|
||||
):
|
||||
"""A leaky bucket abstraction to limit bandwidth consumption
|
||||
|
||||
:type rate: int
|
||||
:type rate: The maximum rate to allow. This rate is in terms of
|
||||
bytes per second.
|
||||
|
||||
:type time_utils: TimeUtils
|
||||
:param time_utils: The time utility to use for interacting with time
|
||||
|
||||
:type rate_tracker: BandwidthRateTracker
|
||||
:param rate_tracker: Tracks bandwidth consumption
|
||||
|
||||
:type consumption_scheduler: ConsumptionScheduler
|
||||
:param consumption_scheduler: Schedules consumption retries when
|
||||
necessary
|
||||
"""
|
||||
self._max_rate = float(max_rate)
|
||||
self._time_utils = time_utils
|
||||
if time_utils is None:
|
||||
self._time_utils = TimeUtils()
|
||||
self._lock = threading.Lock()
|
||||
self._rate_tracker = rate_tracker
|
||||
if rate_tracker is None:
|
||||
self._rate_tracker = BandwidthRateTracker()
|
||||
self._consumption_scheduler = consumption_scheduler
|
||||
if consumption_scheduler is None:
|
||||
self._consumption_scheduler = ConsumptionScheduler()
|
||||
|
||||
def consume(self, amt, request_token):
|
||||
"""Consume an a requested amount
|
||||
|
||||
:type amt: int
|
||||
:param amt: The amount of bytes to request to consume
|
||||
|
||||
:type request_token: RequestToken
|
||||
:param request_token: The token associated to the consumption
|
||||
request that is used to identify the request. So if a
|
||||
RequestExceededException is raised the token should be used
|
||||
in subsequent retry consume() request.
|
||||
|
||||
:raises RequestExceededException: If the consumption amount would
|
||||
exceed the maximum allocated bandwidth
|
||||
|
||||
:rtype: int
|
||||
:returns: The amount consumed
|
||||
"""
|
||||
with self._lock:
|
||||
time_now = self._time_utils.time()
|
||||
if self._consumption_scheduler.is_scheduled(request_token):
|
||||
return self._release_requested_amt_for_scheduled_request(
|
||||
amt, request_token, time_now
|
||||
)
|
||||
elif self._projected_to_exceed_max_rate(amt, time_now):
|
||||
self._raise_request_exceeded_exception(
|
||||
amt, request_token, time_now
|
||||
)
|
||||
else:
|
||||
return self._release_requested_amt(amt, time_now)
|
||||
|
||||
def _projected_to_exceed_max_rate(self, amt, time_now):
|
||||
projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
|
||||
return projected_rate > self._max_rate
|
||||
|
||||
def _release_requested_amt_for_scheduled_request(
|
||||
self, amt, request_token, time_now
|
||||
):
|
||||
self._consumption_scheduler.process_scheduled_consumption(
|
||||
request_token
|
||||
)
|
||||
return self._release_requested_amt(amt, time_now)
|
||||
|
||||
def _raise_request_exceeded_exception(self, amt, request_token, time_now):
|
||||
allocated_time = amt / float(self._max_rate)
|
||||
retry_time = self._consumption_scheduler.schedule_consumption(
|
||||
amt, request_token, allocated_time
|
||||
)
|
||||
raise RequestExceededException(
|
||||
requested_amt=amt, retry_time=retry_time
|
||||
)
|
||||
|
||||
def _release_requested_amt(self, amt, time_now):
|
||||
self._rate_tracker.record_consumption_rate(amt, time_now)
|
||||
return amt
|
||||
|
||||
|
||||
class ConsumptionScheduler:
|
||||
def __init__(self):
|
||||
"""Schedules when to consume a desired amount"""
|
||||
self._tokens_to_scheduled_consumption = {}
|
||||
self._total_wait = 0
|
||||
|
||||
def is_scheduled(self, token):
|
||||
"""Indicates if a consumption request has been scheduled
|
||||
|
||||
:type token: RequestToken
|
||||
:param token: The token associated to the consumption
|
||||
request that is used to identify the request.
|
||||
"""
|
||||
return token in self._tokens_to_scheduled_consumption
|
||||
|
||||
def schedule_consumption(self, amt, token, time_to_consume):
|
||||
"""Schedules a wait time to be able to consume an amount
|
||||
|
||||
:type amt: int
|
||||
:param amt: The amount of bytes scheduled to be consumed
|
||||
|
||||
:type token: RequestToken
|
||||
:param token: The token associated to the consumption
|
||||
request that is used to identify the request.
|
||||
|
||||
:type time_to_consume: float
|
||||
:param time_to_consume: The desired time it should take for that
|
||||
specific request amount to be consumed in regardless of previously
|
||||
scheduled consumption requests
|
||||
|
||||
:rtype: float
|
||||
:returns: The amount of time to wait for the specific request before
|
||||
actually consuming the specified amount.
|
||||
"""
|
||||
self._total_wait += time_to_consume
|
||||
self._tokens_to_scheduled_consumption[token] = {
|
||||
'wait_duration': self._total_wait,
|
||||
'time_to_consume': time_to_consume,
|
||||
}
|
||||
return self._total_wait
|
||||
|
||||
def process_scheduled_consumption(self, token):
|
||||
"""Processes a scheduled consumption request that has completed
|
||||
|
||||
:type token: RequestToken
|
||||
:param token: The token associated to the consumption
|
||||
request that is used to identify the request.
|
||||
"""
|
||||
scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
|
||||
self._total_wait = max(
|
||||
self._total_wait - scheduled_retry['time_to_consume'], 0
|
||||
)
|
||||
|
||||
|
||||
class BandwidthRateTracker:
|
||||
def __init__(self, alpha=0.8):
|
||||
"""Tracks the rate of bandwidth consumption
|
||||
|
||||
:type a: float
|
||||
:param a: The constant to use in calculating the exponentional moving
|
||||
average of the bandwidth rate. Specifically it is used in the
|
||||
following calculation:
|
||||
|
||||
current_rate = alpha * new_rate + (1 - alpha) * current_rate
|
||||
|
||||
This value of this constant should be between 0 and 1.
|
||||
"""
|
||||
self._alpha = alpha
|
||||
self._last_time = None
|
||||
self._current_rate = None
|
||||
|
||||
@property
|
||||
def current_rate(self):
|
||||
"""The current transfer rate
|
||||
|
||||
:rtype: float
|
||||
:returns: The current tracked transfer rate
|
||||
"""
|
||||
if self._last_time is None:
|
||||
return 0.0
|
||||
return self._current_rate
|
||||
|
||||
def get_projected_rate(self, amt, time_at_consumption):
|
||||
"""Get the projected rate using a provided amount and time
|
||||
|
||||
:type amt: int
|
||||
:param amt: The proposed amount to consume
|
||||
|
||||
:type time_at_consumption: float
|
||||
:param time_at_consumption: The proposed time to consume at
|
||||
|
||||
:rtype: float
|
||||
:returns: The consumption rate if that amt and time were consumed
|
||||
"""
|
||||
if self._last_time is None:
|
||||
return 0.0
|
||||
return self._calculate_exponential_moving_average_rate(
|
||||
amt, time_at_consumption
|
||||
)
|
||||
|
||||
def record_consumption_rate(self, amt, time_at_consumption):
|
||||
"""Record the consumption rate based off amount and time point
|
||||
|
||||
:type amt: int
|
||||
:param amt: The amount that got consumed
|
||||
|
||||
:type time_at_consumption: float
|
||||
:param time_at_consumption: The time at which the amount was consumed
|
||||
"""
|
||||
if self._last_time is None:
|
||||
self._last_time = time_at_consumption
|
||||
self._current_rate = 0.0
|
||||
return
|
||||
self._current_rate = self._calculate_exponential_moving_average_rate(
|
||||
amt, time_at_consumption
|
||||
)
|
||||
self._last_time = time_at_consumption
|
||||
|
||||
def _calculate_rate(self, amt, time_at_consumption):
|
||||
time_delta = time_at_consumption - self._last_time
|
||||
if time_delta <= 0:
|
||||
# While it is really unlikely to see this in an actual transfer,
|
||||
# we do not want to be returning back a negative rate or try to
|
||||
# divide the amount by zero. So instead return back an infinite
|
||||
# rate as the time delta is infinitesimally small.
|
||||
return float('inf')
|
||||
return amt / (time_delta)
|
||||
|
||||
def _calculate_exponential_moving_average_rate(
|
||||
self, amt, time_at_consumption
|
||||
):
|
||||
new_rate = self._calculate_rate(amt, time_at_consumption)
|
||||
return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
|
94
cdk-env/lib/python3.12/site-packages/s3transfer/compat.py
Normal file
94
cdk-env/lib/python3.12/site-packages/s3transfer/compat.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import errno
|
||||
import inspect
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
|
||||
from botocore.compat import six
|
||||
|
||||
if sys.platform.startswith('win'):
|
||||
def rename_file(current_filename, new_filename):
|
||||
try:
|
||||
os.remove(new_filename)
|
||||
except OSError as e:
|
||||
if not e.errno == errno.ENOENT:
|
||||
# We only want to a ignore trying to remove
|
||||
# a file that does not exist. If it fails
|
||||
# for any other reason we should be propagating
|
||||
# that exception.
|
||||
raise
|
||||
os.rename(current_filename, new_filename)
|
||||
else:
|
||||
rename_file = os.rename
|
||||
|
||||
|
||||
def accepts_kwargs(func):
|
||||
return inspect.getfullargspec(func)[2]
|
||||
|
||||
|
||||
# In python 3, socket.error is OSError, which is too general
|
||||
# for what we want (i.e FileNotFoundError is a subclass of OSError).
|
||||
# In python 3, all the socket related errors are in a newly created
|
||||
# ConnectionError.
|
||||
SOCKET_ERROR = ConnectionError
|
||||
MAXINT = None
|
||||
|
||||
|
||||
def seekable(fileobj):
|
||||
"""Backwards compat function to determine if a fileobj is seekable
|
||||
|
||||
:param fileobj: The file-like object to determine if seekable
|
||||
|
||||
:returns: True, if seekable. False, otherwise.
|
||||
"""
|
||||
# If the fileobj has a seekable attr, try calling the seekable()
|
||||
# method on it.
|
||||
if hasattr(fileobj, 'seekable'):
|
||||
return fileobj.seekable()
|
||||
# If there is no seekable attr, check if the object can be seeked
|
||||
# or telled. If it can, try to seek to the current position.
|
||||
elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
|
||||
try:
|
||||
fileobj.seek(0, 1)
|
||||
return True
|
||||
except OSError:
|
||||
# If an io related error was thrown then it is not seekable.
|
||||
return False
|
||||
# Else, the fileobj is not seekable
|
||||
return False
|
||||
|
||||
|
||||
def readable(fileobj):
|
||||
"""Determines whether or not a file-like object is readable.
|
||||
|
||||
:param fileobj: The file-like object to determine if readable
|
||||
|
||||
:returns: True, if readable. False otherwise.
|
||||
"""
|
||||
if hasattr(fileobj, 'readable'):
|
||||
return fileobj.readable()
|
||||
|
||||
return hasattr(fileobj, 'read')
|
||||
|
||||
|
||||
def fallocate(fileobj, size):
|
||||
if hasattr(os, 'posix_fallocate'):
|
||||
os.posix_fallocate(fileobj.fileno(), 0, size)
|
||||
else:
|
||||
fileobj.truncate(size)
|
||||
|
||||
|
||||
# Import at end of file to avoid circular dependencies
|
||||
from multiprocessing.managers import BaseManager # noqa: F401,E402
|
38
cdk-env/lib/python3.12/site-packages/s3transfer/constants.py
Normal file
38
cdk-env/lib/python3.12/site-packages/s3transfer/constants.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import s3transfer
|
||||
|
||||
KB = 1024
|
||||
MB = KB * KB
|
||||
GB = MB * KB
|
||||
|
||||
ALLOWED_DOWNLOAD_ARGS = [
|
||||
'ChecksumMode',
|
||||
'VersionId',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
]
|
||||
|
||||
FULL_OBJECT_CHECKSUM_ARGS = [
|
||||
'ChecksumCRC32',
|
||||
'ChecksumCRC32C',
|
||||
'ChecksumCRC64NVME',
|
||||
'ChecksumSHA1',
|
||||
'ChecksumSHA256',
|
||||
]
|
||||
|
||||
USER_AGENT = f's3transfer/{s3transfer.__version__}'
|
||||
PROCESS_USER_AGENT = f'{USER_AGENT} processpool'
|
388
cdk-env/lib/python3.12/site-packages/s3transfer/copies.py
Normal file
388
cdk-env/lib/python3.12/site-packages/s3transfer/copies.py
Normal file
@@ -0,0 +1,388 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import copy
|
||||
import math
|
||||
|
||||
from s3transfer.tasks import (
|
||||
CompleteMultipartUploadTask,
|
||||
CreateMultipartUploadTask,
|
||||
SubmissionTask,
|
||||
Task,
|
||||
)
|
||||
from s3transfer.utils import (
|
||||
ChunksizeAdjuster,
|
||||
calculate_range_parameter,
|
||||
get_callbacks,
|
||||
get_filtered_dict,
|
||||
)
|
||||
|
||||
|
||||
class CopySubmissionTask(SubmissionTask):
|
||||
"""Task for submitting tasks to execute a copy"""
|
||||
|
||||
EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
|
||||
'CopySourceIfMatch': 'IfMatch',
|
||||
'CopySourceIfModifiedSince': 'IfModifiedSince',
|
||||
'CopySourceIfNoneMatch': 'IfNoneMatch',
|
||||
'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
|
||||
'CopySourceSSECustomerKey': 'SSECustomerKey',
|
||||
'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
|
||||
'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
|
||||
'RequestPayer': 'RequestPayer',
|
||||
'ExpectedBucketOwner': 'ExpectedBucketOwner',
|
||||
}
|
||||
|
||||
UPLOAD_PART_COPY_ARGS = [
|
||||
'CopySourceIfMatch',
|
||||
'CopySourceIfModifiedSince',
|
||||
'CopySourceIfNoneMatch',
|
||||
'CopySourceIfUnmodifiedSince',
|
||||
'CopySourceSSECustomerKey',
|
||||
'CopySourceSSECustomerAlgorithm',
|
||||
'CopySourceSSECustomerKeyMD5',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
]
|
||||
|
||||
CREATE_MULTIPART_ARGS_BLACKLIST = [
|
||||
'CopySourceIfMatch',
|
||||
'CopySourceIfModifiedSince',
|
||||
'CopySourceIfNoneMatch',
|
||||
'CopySourceIfUnmodifiedSince',
|
||||
'CopySourceSSECustomerKey',
|
||||
'CopySourceSSECustomerAlgorithm',
|
||||
'CopySourceSSECustomerKeyMD5',
|
||||
'MetadataDirective',
|
||||
'TaggingDirective',
|
||||
]
|
||||
|
||||
COMPLETE_MULTIPART_ARGS = [
|
||||
'SSECustomerKey',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
]
|
||||
|
||||
def _submit(
|
||||
self, client, config, osutil, request_executor, transfer_future
|
||||
):
|
||||
"""
|
||||
:param client: The client associated with the transfer manager
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The transfer config associated with the transfer
|
||||
manager
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtil
|
||||
:param osutil: The os utility associated to the transfer manager
|
||||
|
||||
:type request_executor: s3transfer.futures.BoundedExecutor
|
||||
:param request_executor: The request executor associated with the
|
||||
transfer manager
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
"""
|
||||
# Determine the size if it was not provided
|
||||
if transfer_future.meta.size is None:
|
||||
# If a size was not provided figure out the size for the
|
||||
# user. Note that we will only use the client provided to
|
||||
# the TransferManager. If the object is outside of the region
|
||||
# of the client, they may have to provide the file size themselves
|
||||
# with a completely new client.
|
||||
call_args = transfer_future.meta.call_args
|
||||
head_object_request = (
|
||||
self._get_head_object_request_from_copy_source(
|
||||
call_args.copy_source
|
||||
)
|
||||
)
|
||||
extra_args = call_args.extra_args
|
||||
|
||||
# Map any values that may be used in the head object that is
|
||||
# used in the copy object
|
||||
for param, value in extra_args.items():
|
||||
if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
|
||||
head_object_request[
|
||||
self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]
|
||||
] = value
|
||||
|
||||
response = call_args.source_client.head_object(
|
||||
**head_object_request
|
||||
)
|
||||
transfer_future.meta.provide_transfer_size(
|
||||
response['ContentLength']
|
||||
)
|
||||
|
||||
# If it is greater than threshold do a multipart copy, otherwise
|
||||
# do a regular copy object.
|
||||
if transfer_future.meta.size < config.multipart_threshold:
|
||||
self._submit_copy_request(
|
||||
client, config, osutil, request_executor, transfer_future
|
||||
)
|
||||
else:
|
||||
self._submit_multipart_request(
|
||||
client, config, osutil, request_executor, transfer_future
|
||||
)
|
||||
|
||||
def _submit_copy_request(
|
||||
self, client, config, osutil, request_executor, transfer_future
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
# Get the needed progress callbacks for the task
|
||||
progress_callbacks = get_callbacks(transfer_future, 'progress')
|
||||
|
||||
# Submit the request of a single copy.
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CopyObjectTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'copy_source': call_args.copy_source,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': call_args.extra_args,
|
||||
'callbacks': progress_callbacks,
|
||||
'size': transfer_future.meta.size,
|
||||
},
|
||||
is_final=True,
|
||||
),
|
||||
)
|
||||
|
||||
def _submit_multipart_request(
|
||||
self, client, config, osutil, request_executor, transfer_future
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
# Submit the request to create a multipart upload and make sure it
|
||||
# does not include any of the arguments used for copy part.
|
||||
create_multipart_extra_args = {}
|
||||
for param, val in call_args.extra_args.items():
|
||||
if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
|
||||
create_multipart_extra_args[param] = val
|
||||
|
||||
create_multipart_future = self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CreateMultipartUploadTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': create_multipart_extra_args,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
# Determine how many parts are needed based on filesize and
|
||||
# desired chunksize.
|
||||
part_size = config.multipart_chunksize
|
||||
adjuster = ChunksizeAdjuster()
|
||||
part_size = adjuster.adjust_chunksize(
|
||||
part_size, transfer_future.meta.size
|
||||
)
|
||||
num_parts = int(
|
||||
math.ceil(transfer_future.meta.size / float(part_size))
|
||||
)
|
||||
|
||||
# Submit requests to upload the parts of the file.
|
||||
part_futures = []
|
||||
progress_callbacks = get_callbacks(transfer_future, 'progress')
|
||||
|
||||
for part_number in range(1, num_parts + 1):
|
||||
extra_part_args = self._extra_upload_part_args(
|
||||
call_args.extra_args
|
||||
)
|
||||
# The part number for upload part starts at 1 while the
|
||||
# range parameter starts at zero, so just subtract 1 off of
|
||||
# the part number
|
||||
extra_part_args['CopySourceRange'] = calculate_range_parameter(
|
||||
part_size,
|
||||
part_number - 1,
|
||||
num_parts,
|
||||
transfer_future.meta.size,
|
||||
)
|
||||
# Get the size of the part copy as well for the progress
|
||||
# callbacks.
|
||||
size = self._get_transfer_size(
|
||||
part_size,
|
||||
part_number - 1,
|
||||
num_parts,
|
||||
transfer_future.meta.size,
|
||||
)
|
||||
# Get the checksum algorithm of the multipart request.
|
||||
checksum_algorithm = call_args.extra_args.get("ChecksumAlgorithm")
|
||||
part_futures.append(
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CopyPartTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'copy_source': call_args.copy_source,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'part_number': part_number,
|
||||
'extra_args': extra_part_args,
|
||||
'callbacks': progress_callbacks,
|
||||
'size': size,
|
||||
'checksum_algorithm': checksum_algorithm,
|
||||
},
|
||||
pending_main_kwargs={
|
||||
'upload_id': create_multipart_future
|
||||
},
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
complete_multipart_extra_args = self._extra_complete_multipart_args(
|
||||
call_args.extra_args
|
||||
)
|
||||
# Submit the request to complete the multipart upload.
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CompleteMultipartUploadTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': complete_multipart_extra_args,
|
||||
},
|
||||
pending_main_kwargs={
|
||||
'upload_id': create_multipart_future,
|
||||
'parts': part_futures,
|
||||
},
|
||||
is_final=True,
|
||||
),
|
||||
)
|
||||
|
||||
def _get_head_object_request_from_copy_source(self, copy_source):
|
||||
if isinstance(copy_source, dict):
|
||||
return copy.copy(copy_source)
|
||||
else:
|
||||
raise TypeError(
|
||||
'Expecting dictionary formatted: '
|
||||
'{"Bucket": bucket_name, "Key": key} '
|
||||
f'but got {copy_source} or type {type(copy_source)}.'
|
||||
)
|
||||
|
||||
def _extra_upload_part_args(self, extra_args):
|
||||
# Only the args in COPY_PART_ARGS actually need to be passed
|
||||
# onto the upload_part_copy calls.
|
||||
return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
|
||||
|
||||
def _extra_complete_multipart_args(self, extra_args):
|
||||
return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
|
||||
|
||||
def _get_transfer_size(
|
||||
self, part_size, part_index, num_parts, total_transfer_size
|
||||
):
|
||||
if part_index == num_parts - 1:
|
||||
# The last part may be different in size then the rest of the
|
||||
# parts.
|
||||
return total_transfer_size - (part_index * part_size)
|
||||
return part_size
|
||||
|
||||
|
||||
class CopyObjectTask(Task):
|
||||
"""Task to do a nonmultipart copy"""
|
||||
|
||||
def _main(
|
||||
self, client, copy_source, bucket, key, extra_args, callbacks, size
|
||||
):
|
||||
"""
|
||||
:param client: The client to use when calling PutObject
|
||||
:param copy_source: The CopySource parameter to use
|
||||
:param bucket: The name of the bucket to copy to
|
||||
:param key: The name of the key to copy to
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in the upload.
|
||||
:param callbacks: List of callbacks to call after copy
|
||||
:param size: The size of the transfer. This value is passed into
|
||||
the callbacks
|
||||
|
||||
"""
|
||||
client.copy_object(
|
||||
CopySource=copy_source, Bucket=bucket, Key=key, **extra_args
|
||||
)
|
||||
for callback in callbacks:
|
||||
callback(bytes_transferred=size)
|
||||
|
||||
|
||||
class CopyPartTask(Task):
|
||||
"""Task to upload a part in a multipart copy"""
|
||||
|
||||
def _main(
|
||||
self,
|
||||
client,
|
||||
copy_source,
|
||||
bucket,
|
||||
key,
|
||||
upload_id,
|
||||
part_number,
|
||||
extra_args,
|
||||
callbacks,
|
||||
size,
|
||||
checksum_algorithm=None,
|
||||
):
|
||||
"""
|
||||
:param client: The client to use when calling PutObject
|
||||
:param copy_source: The CopySource parameter to use
|
||||
:param bucket: The name of the bucket to upload to
|
||||
:param key: The name of the key to upload to
|
||||
:param upload_id: The id of the upload
|
||||
:param part_number: The number representing the part of the multipart
|
||||
upload
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in the upload.
|
||||
:param callbacks: List of callbacks to call after copy part
|
||||
:param size: The size of the transfer. This value is passed into
|
||||
the callbacks
|
||||
:param checksum_algorithm: The algorithm that was used to create the multipart
|
||||
upload
|
||||
|
||||
:rtype: dict
|
||||
:returns: A dictionary representing a part::
|
||||
|
||||
{'Etag': etag_value, 'PartNumber': part_number}
|
||||
|
||||
This value can be appended to a list to be used to complete
|
||||
the multipart upload. If a checksum is in the response,
|
||||
it will also be included.
|
||||
"""
|
||||
response = client.upload_part_copy(
|
||||
CopySource=copy_source,
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
PartNumber=part_number,
|
||||
**extra_args,
|
||||
)
|
||||
for callback in callbacks:
|
||||
callback(bytes_transferred=size)
|
||||
etag = response['CopyPartResult']['ETag']
|
||||
part_metadata = {'ETag': etag, 'PartNumber': part_number}
|
||||
if checksum_algorithm:
|
||||
checksum_member = f'Checksum{checksum_algorithm.upper()}'
|
||||
if checksum_member in response['CopyPartResult']:
|
||||
part_metadata[checksum_member] = response['CopyPartResult'][
|
||||
checksum_member
|
||||
]
|
||||
return part_metadata
|
991
cdk-env/lib/python3.12/site-packages/s3transfer/crt.py
Normal file
991
cdk-env/lib/python3.12/site-packages/s3transfer/crt.py
Normal file
@@ -0,0 +1,991 @@
|
||||
# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from io import BytesIO
|
||||
|
||||
import awscrt.http
|
||||
import awscrt.s3
|
||||
import botocore.awsrequest
|
||||
import botocore.session
|
||||
from awscrt.auth import (
|
||||
AwsCredentials,
|
||||
AwsCredentialsProvider,
|
||||
AwsSigningAlgorithm,
|
||||
AwsSigningConfig,
|
||||
)
|
||||
from awscrt.io import (
|
||||
ClientBootstrap,
|
||||
ClientTlsContext,
|
||||
DefaultHostResolver,
|
||||
EventLoopGroup,
|
||||
TlsContextOptions,
|
||||
)
|
||||
from awscrt.s3 import S3Client, S3RequestTlsMode, S3RequestType
|
||||
from botocore import UNSIGNED
|
||||
from botocore.compat import urlsplit
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import NoCredentialsError
|
||||
from botocore.utils import ArnParser, InvalidArnException
|
||||
|
||||
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, MB
|
||||
from s3transfer.exceptions import TransferNotDoneError
|
||||
from s3transfer.futures import BaseTransferFuture, BaseTransferMeta
|
||||
from s3transfer.manager import TransferManager
|
||||
from s3transfer.utils import (
|
||||
CallArgs,
|
||||
OSUtils,
|
||||
get_callbacks,
|
||||
is_s3express_bucket,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CRT_S3_PROCESS_LOCK = None
|
||||
|
||||
|
||||
def acquire_crt_s3_process_lock(name):
|
||||
# Currently, the CRT S3 client performs best when there is only one
|
||||
# instance of it running on a host. This lock allows an application to
|
||||
# signal across processes whether there is another process of the same
|
||||
# application using the CRT S3 client and prevent spawning more than one
|
||||
# CRT S3 clients running on the system for that application.
|
||||
#
|
||||
# NOTE: When acquiring the CRT process lock, the lock automatically is
|
||||
# released when the lock object is garbage collected. So, the CRT process
|
||||
# lock is set as a global so that it is not unintentionally garbage
|
||||
# collected/released if reference of the lock is lost.
|
||||
global CRT_S3_PROCESS_LOCK
|
||||
if CRT_S3_PROCESS_LOCK is None:
|
||||
crt_lock = awscrt.s3.CrossProcessLock(name)
|
||||
try:
|
||||
crt_lock.acquire()
|
||||
except RuntimeError:
|
||||
# If there is another process that is holding the lock, the CRT
|
||||
# returns a RuntimeError. We return None here to signal that our
|
||||
# current process was not able to acquire the lock.
|
||||
return None
|
||||
CRT_S3_PROCESS_LOCK = crt_lock
|
||||
return CRT_S3_PROCESS_LOCK
|
||||
|
||||
|
||||
def create_s3_crt_client(
|
||||
region,
|
||||
crt_credentials_provider=None,
|
||||
num_threads=None,
|
||||
target_throughput=None,
|
||||
part_size=8 * MB,
|
||||
use_ssl=True,
|
||||
verify=None,
|
||||
):
|
||||
"""
|
||||
:type region: str
|
||||
:param region: The region used for signing
|
||||
|
||||
:type crt_credentials_provider:
|
||||
Optional[awscrt.auth.AwsCredentialsProvider]
|
||||
:param crt_credentials_provider: CRT AWS credentials provider
|
||||
to use to sign requests. If not set, requests will not be signed.
|
||||
|
||||
:type num_threads: Optional[int]
|
||||
:param num_threads: Number of worker threads generated. Default
|
||||
is the number of processors in the machine.
|
||||
|
||||
:type target_throughput: Optional[int]
|
||||
:param target_throughput: Throughput target in bytes per second.
|
||||
By default, CRT will automatically attempt to choose a target
|
||||
throughput that matches the system's maximum network throughput.
|
||||
Currently, if CRT is unable to determine the maximum network
|
||||
throughput, a fallback target throughput of ``1_250_000_000`` bytes
|
||||
per second (which translates to 10 gigabits per second, or 1.16
|
||||
gibibytes per second) is used. To set a specific target
|
||||
throughput, set a value for this parameter.
|
||||
|
||||
:type part_size: Optional[int]
|
||||
:param part_size: Size, in Bytes, of parts that files will be downloaded
|
||||
or uploaded in.
|
||||
|
||||
:type use_ssl: boolean
|
||||
:param use_ssl: Whether or not to use SSL. By default, SSL is used.
|
||||
Note that not all services support non-ssl connections.
|
||||
|
||||
:type verify: Optional[boolean/string]
|
||||
:param verify: Whether or not to verify SSL certificates.
|
||||
By default SSL certificates are verified. You can provide the
|
||||
following values:
|
||||
|
||||
* False - do not validate SSL certificates. SSL will still be
|
||||
used (unless use_ssl is False), but SSL certificates
|
||||
will not be verified.
|
||||
* path/to/cert/bundle.pem - A filename of the CA cert bundle to
|
||||
use. Specify this argument if you want to use a custom CA cert
|
||||
bundle instead of the default one on your system.
|
||||
"""
|
||||
event_loop_group = EventLoopGroup(num_threads)
|
||||
host_resolver = DefaultHostResolver(event_loop_group)
|
||||
bootstrap = ClientBootstrap(event_loop_group, host_resolver)
|
||||
tls_connection_options = None
|
||||
|
||||
tls_mode = (
|
||||
S3RequestTlsMode.ENABLED if use_ssl else S3RequestTlsMode.DISABLED
|
||||
)
|
||||
if verify is not None:
|
||||
tls_ctx_options = TlsContextOptions()
|
||||
if verify:
|
||||
tls_ctx_options.override_default_trust_store_from_path(
|
||||
ca_filepath=verify
|
||||
)
|
||||
else:
|
||||
tls_ctx_options.verify_peer = False
|
||||
client_tls_option = ClientTlsContext(tls_ctx_options)
|
||||
tls_connection_options = client_tls_option.new_connection_options()
|
||||
target_gbps = _get_crt_throughput_target_gbps(
|
||||
provided_throughput_target_bytes=target_throughput
|
||||
)
|
||||
return S3Client(
|
||||
bootstrap=bootstrap,
|
||||
region=region,
|
||||
credential_provider=crt_credentials_provider,
|
||||
part_size=part_size,
|
||||
tls_mode=tls_mode,
|
||||
tls_connection_options=tls_connection_options,
|
||||
throughput_target_gbps=target_gbps,
|
||||
enable_s3express=True,
|
||||
)
|
||||
|
||||
|
||||
def _get_crt_throughput_target_gbps(provided_throughput_target_bytes=None):
|
||||
if provided_throughput_target_bytes is None:
|
||||
target_gbps = awscrt.s3.get_recommended_throughput_target_gbps()
|
||||
logger.debug(
|
||||
'Recommended CRT throughput target in gbps: %s', target_gbps
|
||||
)
|
||||
if target_gbps is None:
|
||||
target_gbps = 10.0
|
||||
else:
|
||||
# NOTE: The GB constant in s3transfer is technically a gibibyte. The
|
||||
# GB constant is not used here because the CRT interprets gigabits
|
||||
# for networking as a base power of 10
|
||||
# (i.e. 1000 ** 3 instead of 1024 ** 3).
|
||||
target_gbps = provided_throughput_target_bytes * 8 / 1_000_000_000
|
||||
logger.debug('Using CRT throughput target in gbps: %s', target_gbps)
|
||||
return target_gbps
|
||||
|
||||
|
||||
class CRTTransferManager:
|
||||
ALLOWED_DOWNLOAD_ARGS = TransferManager.ALLOWED_DOWNLOAD_ARGS
|
||||
ALLOWED_UPLOAD_ARGS = TransferManager.ALLOWED_UPLOAD_ARGS
|
||||
ALLOWED_DELETE_ARGS = TransferManager.ALLOWED_DELETE_ARGS
|
||||
|
||||
VALIDATE_SUPPORTED_BUCKET_VALUES = True
|
||||
|
||||
_UNSUPPORTED_BUCKET_PATTERNS = TransferManager._UNSUPPORTED_BUCKET_PATTERNS
|
||||
|
||||
def __init__(self, crt_s3_client, crt_request_serializer, osutil=None):
|
||||
"""A transfer manager interface for Amazon S3 on CRT s3 client.
|
||||
|
||||
:type crt_s3_client: awscrt.s3.S3Client
|
||||
:param crt_s3_client: The CRT s3 client, handling all the
|
||||
HTTP requests and functions under then hood
|
||||
|
||||
:type crt_request_serializer: s3transfer.crt.BaseCRTRequestSerializer
|
||||
:param crt_request_serializer: Serializer, generates unsigned crt HTTP
|
||||
request.
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtils
|
||||
:param osutil: OSUtils object to use for os-related behavior when
|
||||
using with transfer manager.
|
||||
"""
|
||||
if osutil is None:
|
||||
self._osutil = OSUtils()
|
||||
self._crt_s3_client = crt_s3_client
|
||||
self._s3_args_creator = S3ClientArgsCreator(
|
||||
crt_request_serializer, self._osutil
|
||||
)
|
||||
self._crt_exception_translator = (
|
||||
crt_request_serializer.translate_crt_exception
|
||||
)
|
||||
self._future_coordinators = []
|
||||
self._semaphore = threading.Semaphore(128) # not configurable
|
||||
# A counter to create unique id's for each transfer submitted.
|
||||
self._id_counter = 0
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, *args):
|
||||
cancel = False
|
||||
if exc_type:
|
||||
cancel = True
|
||||
self._shutdown(cancel)
|
||||
|
||||
def download(
|
||||
self, bucket, key, fileobj, extra_args=None, subscribers=None
|
||||
):
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = {}
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
callargs = CallArgs(
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
fileobj=fileobj,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
return self._submit_transfer("get_object", callargs)
|
||||
|
||||
def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = {}
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
self._validate_checksum_algorithm_supported(extra_args)
|
||||
callargs = CallArgs(
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
fileobj=fileobj,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
return self._submit_transfer("put_object", callargs)
|
||||
|
||||
def delete(self, bucket, key, extra_args=None, subscribers=None):
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = {}
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
callargs = CallArgs(
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
return self._submit_transfer("delete_object", callargs)
|
||||
|
||||
def shutdown(self, cancel=False):
|
||||
self._shutdown(cancel)
|
||||
|
||||
def _validate_if_bucket_supported(self, bucket):
|
||||
# s3 high level operations don't support some resources
|
||||
# (eg. S3 Object Lambda) only direct API calls are available
|
||||
# for such resources
|
||||
if self.VALIDATE_SUPPORTED_BUCKET_VALUES:
|
||||
for resource, pattern in self._UNSUPPORTED_BUCKET_PATTERNS.items():
|
||||
match = pattern.match(bucket)
|
||||
if match:
|
||||
raise ValueError(
|
||||
f'TransferManager methods do not support {resource} '
|
||||
'resource. Use direct client calls instead.'
|
||||
)
|
||||
|
||||
def _validate_all_known_args(self, actual, allowed):
|
||||
for kwarg in actual:
|
||||
if kwarg not in allowed:
|
||||
raise ValueError(
|
||||
f"Invalid extra_args key '{kwarg}', "
|
||||
f"must be one of: {', '.join(allowed)}"
|
||||
)
|
||||
|
||||
def _validate_checksum_algorithm_supported(self, extra_args):
|
||||
checksum_algorithm = extra_args.get('ChecksumAlgorithm')
|
||||
if checksum_algorithm is None:
|
||||
return
|
||||
supported_algorithms = list(awscrt.s3.S3ChecksumAlgorithm.__members__)
|
||||
if checksum_algorithm.upper() not in supported_algorithms:
|
||||
raise ValueError(
|
||||
f'ChecksumAlgorithm: {checksum_algorithm} not supported. '
|
||||
f'Supported algorithms are: {supported_algorithms}'
|
||||
)
|
||||
|
||||
def _cancel_transfers(self):
|
||||
for coordinator in self._future_coordinators:
|
||||
if not coordinator.done():
|
||||
coordinator.cancel()
|
||||
|
||||
def _finish_transfers(self):
|
||||
for coordinator in self._future_coordinators:
|
||||
coordinator.result()
|
||||
|
||||
def _wait_transfers_done(self):
|
||||
for coordinator in self._future_coordinators:
|
||||
coordinator.wait_until_on_done_callbacks_complete()
|
||||
|
||||
def _shutdown(self, cancel=False):
|
||||
if cancel:
|
||||
self._cancel_transfers()
|
||||
try:
|
||||
self._finish_transfers()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self._cancel_transfers()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._wait_transfers_done()
|
||||
|
||||
def _release_semaphore(self, **kwargs):
|
||||
self._semaphore.release()
|
||||
|
||||
def _submit_transfer(self, request_type, call_args):
|
||||
on_done_after_calls = [self._release_semaphore]
|
||||
coordinator = CRTTransferCoordinator(
|
||||
transfer_id=self._id_counter,
|
||||
exception_translator=self._crt_exception_translator,
|
||||
)
|
||||
components = {
|
||||
'meta': CRTTransferMeta(self._id_counter, call_args),
|
||||
'coordinator': coordinator,
|
||||
}
|
||||
future = CRTTransferFuture(**components)
|
||||
afterdone = AfterDoneHandler(coordinator)
|
||||
on_done_after_calls.append(afterdone)
|
||||
|
||||
try:
|
||||
self._semaphore.acquire()
|
||||
on_queued = self._s3_args_creator.get_crt_callback(
|
||||
future, 'queued'
|
||||
)
|
||||
on_queued()
|
||||
crt_callargs = self._s3_args_creator.get_make_request_args(
|
||||
request_type,
|
||||
call_args,
|
||||
coordinator,
|
||||
future,
|
||||
on_done_after_calls,
|
||||
)
|
||||
crt_s3_request = self._crt_s3_client.make_request(**crt_callargs)
|
||||
except Exception as e:
|
||||
coordinator.set_exception(e, True)
|
||||
on_done = self._s3_args_creator.get_crt_callback(
|
||||
future, 'done', after_subscribers=on_done_after_calls
|
||||
)
|
||||
on_done(error=e)
|
||||
else:
|
||||
coordinator.set_s3_request(crt_s3_request)
|
||||
self._future_coordinators.append(coordinator)
|
||||
|
||||
self._id_counter += 1
|
||||
return future
|
||||
|
||||
|
||||
class CRTTransferMeta(BaseTransferMeta):
|
||||
"""Holds metadata about the CRTTransferFuture"""
|
||||
|
||||
def __init__(self, transfer_id=None, call_args=None):
|
||||
self._transfer_id = transfer_id
|
||||
self._call_args = call_args
|
||||
self._user_context = {}
|
||||
|
||||
@property
|
||||
def call_args(self):
|
||||
return self._call_args
|
||||
|
||||
@property
|
||||
def transfer_id(self):
|
||||
return self._transfer_id
|
||||
|
||||
@property
|
||||
def user_context(self):
|
||||
return self._user_context
|
||||
|
||||
|
||||
class CRTTransferFuture(BaseTransferFuture):
|
||||
def __init__(self, meta=None, coordinator=None):
|
||||
"""The future associated to a submitted transfer request via CRT S3 client
|
||||
|
||||
:type meta: s3transfer.crt.CRTTransferMeta
|
||||
:param meta: The metadata associated to the transfer future.
|
||||
|
||||
:type coordinator: s3transfer.crt.CRTTransferCoordinator
|
||||
:param coordinator: The coordinator associated to the transfer future.
|
||||
"""
|
||||
self._meta = meta
|
||||
if meta is None:
|
||||
self._meta = CRTTransferMeta()
|
||||
self._coordinator = coordinator
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
return self._meta
|
||||
|
||||
def done(self):
|
||||
return self._coordinator.done()
|
||||
|
||||
def result(self, timeout=None):
|
||||
self._coordinator.result(timeout)
|
||||
|
||||
def cancel(self):
|
||||
self._coordinator.cancel()
|
||||
|
||||
def set_exception(self, exception):
|
||||
"""Sets the exception on the future."""
|
||||
if not self.done():
|
||||
raise TransferNotDoneError(
|
||||
'set_exception can only be called once the transfer is '
|
||||
'complete.'
|
||||
)
|
||||
self._coordinator.set_exception(exception, override=True)
|
||||
|
||||
|
||||
class BaseCRTRequestSerializer:
|
||||
def serialize_http_request(self, transfer_type, future):
|
||||
"""Serialize CRT HTTP requests.
|
||||
|
||||
:type transfer_type: string
|
||||
:param transfer_type: the type of transfer made,
|
||||
e.g 'put_object', 'get_object', 'delete_object'
|
||||
|
||||
:type future: s3transfer.crt.CRTTransferFuture
|
||||
|
||||
:rtype: awscrt.http.HttpRequest
|
||||
:returns: An unsigned HTTP request to be used for the CRT S3 client
|
||||
"""
|
||||
raise NotImplementedError('serialize_http_request()')
|
||||
|
||||
def translate_crt_exception(self, exception):
|
||||
raise NotImplementedError('translate_crt_exception()')
|
||||
|
||||
|
||||
class BotocoreCRTRequestSerializer(BaseCRTRequestSerializer):
|
||||
def __init__(self, session, client_kwargs=None):
|
||||
"""Serialize CRT HTTP request using botocore logic
|
||||
It also takes into account configuration from both the session
|
||||
and any keyword arguments that could be passed to
|
||||
`Session.create_client()` when serializing the request.
|
||||
|
||||
:type session: botocore.session.Session
|
||||
|
||||
:type client_kwargs: Optional[Dict[str, str]])
|
||||
:param client_kwargs: The kwargs for the botocore
|
||||
s3 client initialization.
|
||||
"""
|
||||
self._session = session
|
||||
if client_kwargs is None:
|
||||
client_kwargs = {}
|
||||
self._resolve_client_config(session, client_kwargs)
|
||||
self._client = session.create_client(**client_kwargs)
|
||||
self._client.meta.events.register(
|
||||
'request-created.s3.*', self._capture_http_request
|
||||
)
|
||||
self._client.meta.events.register(
|
||||
'after-call.s3.*', self._change_response_to_serialized_http_request
|
||||
)
|
||||
self._client.meta.events.register(
|
||||
'before-send.s3.*', self._make_fake_http_response
|
||||
)
|
||||
self._client.meta.events.register(
|
||||
'before-call.s3.*', self._remove_checksum_context
|
||||
)
|
||||
|
||||
def _resolve_client_config(self, session, client_kwargs):
|
||||
user_provided_config = None
|
||||
if session.get_default_client_config():
|
||||
user_provided_config = session.get_default_client_config()
|
||||
if 'config' in client_kwargs:
|
||||
user_provided_config = client_kwargs['config']
|
||||
|
||||
client_config = Config(signature_version=UNSIGNED)
|
||||
if user_provided_config:
|
||||
client_config = user_provided_config.merge(client_config)
|
||||
client_kwargs['config'] = client_config
|
||||
client_kwargs["service_name"] = "s3"
|
||||
|
||||
def _crt_request_from_aws_request(self, aws_request):
|
||||
url_parts = urlsplit(aws_request.url)
|
||||
crt_path = url_parts.path
|
||||
if url_parts.query:
|
||||
crt_path = f'{crt_path}?{url_parts.query}'
|
||||
headers_list = []
|
||||
for name, value in aws_request.headers.items():
|
||||
if isinstance(value, str):
|
||||
headers_list.append((name, value))
|
||||
else:
|
||||
headers_list.append((name, str(value, 'utf-8')))
|
||||
|
||||
crt_headers = awscrt.http.HttpHeaders(headers_list)
|
||||
|
||||
crt_request = awscrt.http.HttpRequest(
|
||||
method=aws_request.method,
|
||||
path=crt_path,
|
||||
headers=crt_headers,
|
||||
body_stream=aws_request.body,
|
||||
)
|
||||
return crt_request
|
||||
|
||||
def _convert_to_crt_http_request(self, botocore_http_request):
|
||||
# Logic that does CRTUtils.crt_request_from_aws_request
|
||||
crt_request = self._crt_request_from_aws_request(botocore_http_request)
|
||||
if crt_request.headers.get("host") is None:
|
||||
# If host is not set, set it for the request before using CRT s3
|
||||
url_parts = urlsplit(botocore_http_request.url)
|
||||
crt_request.headers.set("host", url_parts.netloc)
|
||||
if crt_request.headers.get('Content-MD5') is not None:
|
||||
crt_request.headers.remove("Content-MD5")
|
||||
|
||||
# In general, the CRT S3 client expects a content length header. It
|
||||
# only expects a missing content length header if the body is not
|
||||
# seekable. However, botocore does not set the content length header
|
||||
# for GetObject API requests and so we set the content length to zero
|
||||
# to meet the CRT S3 client's expectation that the content length
|
||||
# header is set even if there is no body.
|
||||
if crt_request.headers.get('Content-Length') is None:
|
||||
if botocore_http_request.body is None:
|
||||
crt_request.headers.add('Content-Length', "0")
|
||||
|
||||
# Botocore sets the Transfer-Encoding header when it cannot determine
|
||||
# the content length of the request body (e.g. it's not seekable).
|
||||
# However, CRT does not support this header, but it supports
|
||||
# non-seekable bodies. So we remove this header to not cause issues
|
||||
# in the downstream CRT S3 request.
|
||||
if crt_request.headers.get('Transfer-Encoding') is not None:
|
||||
crt_request.headers.remove('Transfer-Encoding')
|
||||
|
||||
return crt_request
|
||||
|
||||
def _capture_http_request(self, request, **kwargs):
|
||||
request.context['http_request'] = request
|
||||
|
||||
def _change_response_to_serialized_http_request(
|
||||
self, context, parsed, **kwargs
|
||||
):
|
||||
request = context['http_request']
|
||||
parsed['HTTPRequest'] = request.prepare()
|
||||
|
||||
def _make_fake_http_response(self, request, **kwargs):
|
||||
return botocore.awsrequest.AWSResponse(
|
||||
None,
|
||||
200,
|
||||
{},
|
||||
FakeRawResponse(b""),
|
||||
)
|
||||
|
||||
def _get_botocore_http_request(self, client_method, call_args):
|
||||
return getattr(self._client, client_method)(
|
||||
Bucket=call_args.bucket, Key=call_args.key, **call_args.extra_args
|
||||
)['HTTPRequest']
|
||||
|
||||
def serialize_http_request(self, transfer_type, future):
|
||||
botocore_http_request = self._get_botocore_http_request(
|
||||
transfer_type, future.meta.call_args
|
||||
)
|
||||
crt_request = self._convert_to_crt_http_request(botocore_http_request)
|
||||
return crt_request
|
||||
|
||||
def translate_crt_exception(self, exception):
|
||||
if isinstance(exception, awscrt.s3.S3ResponseError):
|
||||
return self._translate_crt_s3_response_error(exception)
|
||||
else:
|
||||
return None
|
||||
|
||||
def _translate_crt_s3_response_error(self, s3_response_error):
|
||||
status_code = s3_response_error.status_code
|
||||
if status_code < 301:
|
||||
# Botocore's exception parsing only
|
||||
# runs on status codes >= 301
|
||||
return None
|
||||
|
||||
headers = {k: v for k, v in s3_response_error.headers}
|
||||
operation_name = s3_response_error.operation_name
|
||||
if operation_name is not None:
|
||||
service_model = self._client.meta.service_model
|
||||
shape = service_model.operation_model(operation_name).output_shape
|
||||
else:
|
||||
shape = None
|
||||
|
||||
response_dict = {
|
||||
'headers': botocore.awsrequest.HeadersDict(headers),
|
||||
'status_code': status_code,
|
||||
'body': s3_response_error.body,
|
||||
}
|
||||
parsed_response = self._client._response_parser.parse(
|
||||
response_dict, shape=shape
|
||||
)
|
||||
|
||||
error_code = parsed_response.get("Error", {}).get("Code")
|
||||
error_class = self._client.exceptions.from_code(error_code)
|
||||
return error_class(parsed_response, operation_name=operation_name)
|
||||
|
||||
def _remove_checksum_context(self, params, **kwargs):
|
||||
request_context = params.get("context", {})
|
||||
if "checksum" in request_context:
|
||||
del request_context["checksum"]
|
||||
|
||||
|
||||
class FakeRawResponse(BytesIO):
|
||||
def stream(self, amt=1024, decode_content=None):
|
||||
while True:
|
||||
chunk = self.read(amt)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
|
||||
class BotocoreCRTCredentialsWrapper:
|
||||
def __init__(self, resolved_botocore_credentials):
|
||||
self._resolved_credentials = resolved_botocore_credentials
|
||||
|
||||
def __call__(self):
|
||||
credentials = self._get_credentials().get_frozen_credentials()
|
||||
return AwsCredentials(
|
||||
credentials.access_key, credentials.secret_key, credentials.token
|
||||
)
|
||||
|
||||
def to_crt_credentials_provider(self):
|
||||
return AwsCredentialsProvider.new_delegate(self)
|
||||
|
||||
def _get_credentials(self):
|
||||
if self._resolved_credentials is None:
|
||||
raise NoCredentialsError()
|
||||
return self._resolved_credentials
|
||||
|
||||
|
||||
class CRTTransferCoordinator:
|
||||
"""A helper class for managing CRTTransferFuture"""
|
||||
|
||||
def __init__(
|
||||
self, transfer_id=None, s3_request=None, exception_translator=None
|
||||
):
|
||||
self.transfer_id = transfer_id
|
||||
self._exception_translator = exception_translator
|
||||
self._s3_request = s3_request
|
||||
self._lock = threading.Lock()
|
||||
self._exception = None
|
||||
self._crt_future = None
|
||||
self._done_event = threading.Event()
|
||||
|
||||
@property
|
||||
def s3_request(self):
|
||||
return self._s3_request
|
||||
|
||||
def set_done_callbacks_complete(self):
|
||||
self._done_event.set()
|
||||
|
||||
def wait_until_on_done_callbacks_complete(self, timeout=None):
|
||||
self._done_event.wait(timeout)
|
||||
|
||||
def set_exception(self, exception, override=False):
|
||||
with self._lock:
|
||||
if not self.done() or override:
|
||||
self._exception = exception
|
||||
|
||||
def cancel(self):
|
||||
if self._s3_request:
|
||||
self._s3_request.cancel()
|
||||
|
||||
def result(self, timeout=None):
|
||||
if self._exception:
|
||||
raise self._exception
|
||||
try:
|
||||
self._crt_future.result(timeout)
|
||||
except KeyboardInterrupt:
|
||||
self.cancel()
|
||||
self._crt_future.result(timeout)
|
||||
raise
|
||||
except Exception as e:
|
||||
self.handle_exception(e)
|
||||
finally:
|
||||
if self._s3_request:
|
||||
self._s3_request = None
|
||||
|
||||
def handle_exception(self, exc):
|
||||
translated_exc = None
|
||||
if self._exception_translator:
|
||||
try:
|
||||
translated_exc = self._exception_translator(exc)
|
||||
except Exception as e:
|
||||
# Bail out if we hit an issue translating
|
||||
# and raise the original error.
|
||||
logger.debug("Unable to translate exception.", exc_info=e)
|
||||
pass
|
||||
if translated_exc is not None:
|
||||
raise translated_exc from exc
|
||||
else:
|
||||
raise exc
|
||||
|
||||
def done(self):
|
||||
if self._crt_future is None:
|
||||
return False
|
||||
return self._crt_future.done()
|
||||
|
||||
def set_s3_request(self, s3_request):
|
||||
self._s3_request = s3_request
|
||||
self._crt_future = self._s3_request.finished_future
|
||||
|
||||
|
||||
class S3ClientArgsCreator:
|
||||
def __init__(self, crt_request_serializer, os_utils):
|
||||
self._request_serializer = crt_request_serializer
|
||||
self._os_utils = os_utils
|
||||
|
||||
def get_make_request_args(
|
||||
self, request_type, call_args, coordinator, future, on_done_after_calls
|
||||
):
|
||||
request_args_handler = getattr(
|
||||
self,
|
||||
f'_get_make_request_args_{request_type}',
|
||||
self._default_get_make_request_args,
|
||||
)
|
||||
return request_args_handler(
|
||||
request_type=request_type,
|
||||
call_args=call_args,
|
||||
coordinator=coordinator,
|
||||
future=future,
|
||||
on_done_before_calls=[],
|
||||
on_done_after_calls=on_done_after_calls,
|
||||
)
|
||||
|
||||
def get_crt_callback(
|
||||
self,
|
||||
future,
|
||||
callback_type,
|
||||
before_subscribers=None,
|
||||
after_subscribers=None,
|
||||
):
|
||||
def invoke_all_callbacks(*args, **kwargs):
|
||||
callbacks_list = []
|
||||
if before_subscribers is not None:
|
||||
callbacks_list += before_subscribers
|
||||
callbacks_list += get_callbacks(future, callback_type)
|
||||
if after_subscribers is not None:
|
||||
callbacks_list += after_subscribers
|
||||
for callback in callbacks_list:
|
||||
# The get_callbacks helper will set the first augment
|
||||
# by keyword, the other augments need to be set by keyword
|
||||
# as well
|
||||
if callback_type == "progress":
|
||||
callback(bytes_transferred=args[0])
|
||||
else:
|
||||
callback(*args, **kwargs)
|
||||
|
||||
return invoke_all_callbacks
|
||||
|
||||
def _get_make_request_args_put_object(
|
||||
self,
|
||||
request_type,
|
||||
call_args,
|
||||
coordinator,
|
||||
future,
|
||||
on_done_before_calls,
|
||||
on_done_after_calls,
|
||||
):
|
||||
send_filepath = None
|
||||
if isinstance(call_args.fileobj, str):
|
||||
send_filepath = call_args.fileobj
|
||||
data_len = self._os_utils.get_file_size(send_filepath)
|
||||
call_args.extra_args["ContentLength"] = data_len
|
||||
else:
|
||||
call_args.extra_args["Body"] = call_args.fileobj
|
||||
|
||||
checksum_config = None
|
||||
if not any(
|
||||
checksum_arg in call_args.extra_args
|
||||
for checksum_arg in FULL_OBJECT_CHECKSUM_ARGS
|
||||
):
|
||||
checksum_algorithm = call_args.extra_args.pop(
|
||||
'ChecksumAlgorithm', 'CRC32'
|
||||
).upper()
|
||||
checksum_config = awscrt.s3.S3ChecksumConfig(
|
||||
algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm],
|
||||
location=awscrt.s3.S3ChecksumLocation.TRAILER,
|
||||
)
|
||||
# Suppress botocore's automatic MD5 calculation by setting an override
|
||||
# value that will get deleted in the BotocoreCRTRequestSerializer.
|
||||
# As part of the CRT S3 request, we request the CRT S3 client to
|
||||
# automatically add trailing checksums to its uploads.
|
||||
call_args.extra_args["ContentMD5"] = "override-to-be-removed"
|
||||
|
||||
make_request_args = self._default_get_make_request_args(
|
||||
request_type=request_type,
|
||||
call_args=call_args,
|
||||
coordinator=coordinator,
|
||||
future=future,
|
||||
on_done_before_calls=on_done_before_calls,
|
||||
on_done_after_calls=on_done_after_calls,
|
||||
)
|
||||
make_request_args['send_filepath'] = send_filepath
|
||||
make_request_args['checksum_config'] = checksum_config
|
||||
return make_request_args
|
||||
|
||||
def _get_make_request_args_get_object(
|
||||
self,
|
||||
request_type,
|
||||
call_args,
|
||||
coordinator,
|
||||
future,
|
||||
on_done_before_calls,
|
||||
on_done_after_calls,
|
||||
):
|
||||
recv_filepath = None
|
||||
on_body = None
|
||||
checksum_config = awscrt.s3.S3ChecksumConfig(validate_response=True)
|
||||
if isinstance(call_args.fileobj, str):
|
||||
final_filepath = call_args.fileobj
|
||||
recv_filepath = self._os_utils.get_temp_filename(final_filepath)
|
||||
on_done_before_calls.append(
|
||||
RenameTempFileHandler(
|
||||
coordinator, final_filepath, recv_filepath, self._os_utils
|
||||
)
|
||||
)
|
||||
else:
|
||||
on_body = OnBodyFileObjWriter(call_args.fileobj)
|
||||
|
||||
make_request_args = self._default_get_make_request_args(
|
||||
request_type=request_type,
|
||||
call_args=call_args,
|
||||
coordinator=coordinator,
|
||||
future=future,
|
||||
on_done_before_calls=on_done_before_calls,
|
||||
on_done_after_calls=on_done_after_calls,
|
||||
)
|
||||
make_request_args['recv_filepath'] = recv_filepath
|
||||
make_request_args['on_body'] = on_body
|
||||
make_request_args['checksum_config'] = checksum_config
|
||||
return make_request_args
|
||||
|
||||
def _default_get_make_request_args(
|
||||
self,
|
||||
request_type,
|
||||
call_args,
|
||||
coordinator,
|
||||
future,
|
||||
on_done_before_calls,
|
||||
on_done_after_calls,
|
||||
):
|
||||
make_request_args = {
|
||||
'request': self._request_serializer.serialize_http_request(
|
||||
request_type, future
|
||||
),
|
||||
'type': getattr(
|
||||
S3RequestType, request_type.upper(), S3RequestType.DEFAULT
|
||||
),
|
||||
'on_done': self.get_crt_callback(
|
||||
future, 'done', on_done_before_calls, on_done_after_calls
|
||||
),
|
||||
'on_progress': self.get_crt_callback(future, 'progress'),
|
||||
}
|
||||
|
||||
# For DEFAULT requests, CRT requires the official S3 operation name.
|
||||
# So transform string like "delete_object" -> "DeleteObject".
|
||||
if make_request_args['type'] == S3RequestType.DEFAULT:
|
||||
make_request_args['operation_name'] = ''.join(
|
||||
x.title() for x in request_type.split('_')
|
||||
)
|
||||
|
||||
arn_handler = _S3ArnParamHandler()
|
||||
if (
|
||||
accesspoint_arn_details := arn_handler.handle_arn(call_args.bucket)
|
||||
) and accesspoint_arn_details['region'] == "":
|
||||
# Configure our region to `*` to propogate in `x-amz-region-set`
|
||||
# for multi-region support in MRAP accesspoints.
|
||||
# use_double_uri_encode and should_normalize_uri_path are defaulted to be True
|
||||
# But SDK already encoded the URI, and it's for S3, so set both to False
|
||||
make_request_args['signing_config'] = AwsSigningConfig(
|
||||
algorithm=AwsSigningAlgorithm.V4_ASYMMETRIC,
|
||||
region="*",
|
||||
use_double_uri_encode=False,
|
||||
should_normalize_uri_path=False,
|
||||
)
|
||||
call_args.bucket = accesspoint_arn_details['resource_name']
|
||||
elif is_s3express_bucket(call_args.bucket):
|
||||
# use_double_uri_encode and should_normalize_uri_path are defaulted to be True
|
||||
# But SDK already encoded the URI, and it's for S3, so set both to False
|
||||
make_request_args['signing_config'] = AwsSigningConfig(
|
||||
algorithm=AwsSigningAlgorithm.V4_S3EXPRESS,
|
||||
use_double_uri_encode=False,
|
||||
should_normalize_uri_path=False,
|
||||
)
|
||||
return make_request_args
|
||||
|
||||
|
||||
class RenameTempFileHandler:
|
||||
def __init__(self, coordinator, final_filename, temp_filename, osutil):
|
||||
self._coordinator = coordinator
|
||||
self._final_filename = final_filename
|
||||
self._temp_filename = temp_filename
|
||||
self._osutil = osutil
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
error = kwargs['error']
|
||||
if error:
|
||||
self._osutil.remove_file(self._temp_filename)
|
||||
else:
|
||||
try:
|
||||
self._osutil.rename_file(
|
||||
self._temp_filename, self._final_filename
|
||||
)
|
||||
except Exception as e:
|
||||
self._osutil.remove_file(self._temp_filename)
|
||||
# the CRT future has done already at this point
|
||||
self._coordinator.set_exception(e)
|
||||
|
||||
|
||||
class AfterDoneHandler:
|
||||
def __init__(self, coordinator):
|
||||
self._coordinator = coordinator
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
self._coordinator.set_done_callbacks_complete()
|
||||
|
||||
|
||||
class OnBodyFileObjWriter:
|
||||
def __init__(self, fileobj):
|
||||
self._fileobj = fileobj
|
||||
|
||||
def __call__(self, chunk, **kwargs):
|
||||
self._fileobj.write(chunk)
|
||||
|
||||
|
||||
class _S3ArnParamHandler:
|
||||
"""Partial port of S3ArnParamHandler from botocore.
|
||||
|
||||
This is used to make a determination on MRAP accesspoints for signing
|
||||
purposes. This should be safe to remove once we properly integrate auth
|
||||
resolution from Botocore into the CRT transfer integration.
|
||||
"""
|
||||
|
||||
_RESOURCE_REGEX = re.compile(
|
||||
r'^(?P<resource_type>accesspoint|outpost)[/:](?P<resource_name>.+)$'
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self._arn_parser = ArnParser()
|
||||
|
||||
def handle_arn(self, bucket):
|
||||
arn_details = self._get_arn_details_from_bucket(bucket)
|
||||
if arn_details is None:
|
||||
return
|
||||
if arn_details['resource_type'] == 'accesspoint':
|
||||
return arn_details
|
||||
|
||||
def _get_arn_details_from_bucket(self, bucket):
|
||||
try:
|
||||
arn_details = self._arn_parser.parse_arn(bucket)
|
||||
self._add_resource_type_and_name(arn_details)
|
||||
return arn_details
|
||||
except InvalidArnException:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _add_resource_type_and_name(self, arn_details):
|
||||
match = self._RESOURCE_REGEX.match(arn_details['resource'])
|
||||
if match:
|
||||
arn_details['resource_type'] = match.group('resource_type')
|
||||
arn_details['resource_name'] = match.group('resource_name')
|
71
cdk-env/lib/python3.12/site-packages/s3transfer/delete.py
Normal file
71
cdk-env/lib/python3.12/site-packages/s3transfer/delete.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
from s3transfer.tasks import SubmissionTask, Task
|
||||
|
||||
|
||||
class DeleteSubmissionTask(SubmissionTask):
|
||||
"""Task for submitting tasks to execute an object deletion."""
|
||||
|
||||
def _submit(self, client, request_executor, transfer_future, **kwargs):
|
||||
"""
|
||||
:param client: The client associated with the transfer manager
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The transfer config associated with the transfer
|
||||
manager
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtil
|
||||
:param osutil: The os utility associated to the transfer manager
|
||||
|
||||
:type request_executor: s3transfer.futures.BoundedExecutor
|
||||
:param request_executor: The request executor associated with the
|
||||
transfer manager
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
"""
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
DeleteObjectTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': call_args.extra_args,
|
||||
},
|
||||
is_final=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class DeleteObjectTask(Task):
|
||||
def _main(self, client, bucket, key, extra_args):
|
||||
"""
|
||||
|
||||
:param client: The S3 client to use when calling DeleteObject
|
||||
|
||||
:type bucket: str
|
||||
:param bucket: The name of the bucket.
|
||||
|
||||
:type key: str
|
||||
:param key: The name of the object to delete.
|
||||
|
||||
:type extra_args: dict
|
||||
:param extra_args: Extra arguments to pass to the DeleteObject call.
|
||||
|
||||
"""
|
||||
client.delete_object(Bucket=bucket, Key=key, **extra_args)
|
834
cdk-env/lib/python3.12/site-packages/s3transfer/download.py
Normal file
834
cdk-env/lib/python3.12/site-packages/s3transfer/download.py
Normal file
@@ -0,0 +1,834 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import heapq
|
||||
import logging
|
||||
import threading
|
||||
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from s3transfer.compat import seekable
|
||||
from s3transfer.exceptions import RetriesExceededError, S3DownloadFailedError
|
||||
from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
|
||||
from s3transfer.tasks import SubmissionTask, Task
|
||||
from s3transfer.utils import (
|
||||
S3_RETRYABLE_DOWNLOAD_ERRORS,
|
||||
CountCallbackInvoker,
|
||||
DeferredOpenFile,
|
||||
FunctionContainer,
|
||||
StreamReaderProgress,
|
||||
calculate_num_parts,
|
||||
calculate_range_parameter,
|
||||
get_callbacks,
|
||||
invoke_progress_callbacks,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DownloadOutputManager:
|
||||
"""Base manager class for handling various types of files for downloads
|
||||
|
||||
This class is typically used for the DownloadSubmissionTask class to help
|
||||
determine the following:
|
||||
|
||||
* Provides the fileobj to write to downloads to
|
||||
* Get a task to complete once everything downloaded has been written
|
||||
|
||||
The answers/implementations differ for the various types of file outputs
|
||||
that may be accepted. All implementations must subclass and override
|
||||
public methods from this class.
|
||||
"""
|
||||
|
||||
def __init__(self, osutil, transfer_coordinator, io_executor):
|
||||
self._osutil = osutil
|
||||
self._transfer_coordinator = transfer_coordinator
|
||||
self._io_executor = io_executor
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, download_target, osutil):
|
||||
"""Determines if the target for the download is compatible with manager
|
||||
|
||||
:param download_target: The target for which the upload will write
|
||||
data to.
|
||||
|
||||
:param osutil: The os utility to be used for the transfer
|
||||
|
||||
:returns: True if the manager can handle the type of target specified
|
||||
otherwise returns False.
|
||||
"""
|
||||
raise NotImplementedError('must implement is_compatible()')
|
||||
|
||||
def get_download_task_tag(self):
|
||||
"""Get the tag (if any) to associate all GetObjectTasks
|
||||
|
||||
:rtype: s3transfer.futures.TaskTag
|
||||
:returns: The tag to associate all GetObjectTasks with
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_fileobj_for_io_writes(self, transfer_future):
|
||||
"""Get file-like object to use for io writes in the io executor
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The future associated with upload request
|
||||
|
||||
returns: A file-like object to write to
|
||||
"""
|
||||
raise NotImplementedError('must implement get_fileobj_for_io_writes()')
|
||||
|
||||
def queue_file_io_task(self, fileobj, data, offset):
|
||||
"""Queue IO write for submission to the IO executor.
|
||||
|
||||
This method accepts an IO executor and information about the
|
||||
downloaded data, and handles submitting this to the IO executor.
|
||||
|
||||
This method may defer submission to the IO executor if necessary.
|
||||
|
||||
"""
|
||||
self._transfer_coordinator.submit(
|
||||
self._io_executor, self.get_io_write_task(fileobj, data, offset)
|
||||
)
|
||||
|
||||
def get_io_write_task(self, fileobj, data, offset):
|
||||
"""Get an IO write task for the requested set of data
|
||||
|
||||
This task can be ran immediately or be submitted to the IO executor
|
||||
for it to run.
|
||||
|
||||
:type fileobj: file-like object
|
||||
:param fileobj: The file-like object to write to
|
||||
|
||||
:type data: bytes
|
||||
:param data: The data to write out
|
||||
|
||||
:type offset: integer
|
||||
:param offset: The offset to write the data to in the file-like object
|
||||
|
||||
:returns: An IO task to be used to write data to a file-like object
|
||||
"""
|
||||
return IOWriteTask(
|
||||
self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'fileobj': fileobj,
|
||||
'data': data,
|
||||
'offset': offset,
|
||||
},
|
||||
)
|
||||
|
||||
def get_final_io_task(self):
|
||||
"""Get the final io task to complete the download
|
||||
|
||||
This is needed because based on the architecture of the TransferManager
|
||||
the final tasks will be sent to the IO executor, but the executor
|
||||
needs a final task for it to signal that the transfer is done and
|
||||
all done callbacks can be run.
|
||||
|
||||
:rtype: s3transfer.tasks.Task
|
||||
:returns: A final task to completed in the io executor
|
||||
"""
|
||||
raise NotImplementedError('must implement get_final_io_task()')
|
||||
|
||||
def _get_fileobj_from_filename(self, filename):
|
||||
f = DeferredOpenFile(
|
||||
filename, mode='wb', open_function=self._osutil.open
|
||||
)
|
||||
# Make sure the file gets closed and we remove the temporary file
|
||||
# if anything goes wrong during the process.
|
||||
self._transfer_coordinator.add_failure_cleanup(f.close)
|
||||
return f
|
||||
|
||||
|
||||
class DownloadFilenameOutputManager(DownloadOutputManager):
|
||||
def __init__(self, osutil, transfer_coordinator, io_executor):
|
||||
super().__init__(osutil, transfer_coordinator, io_executor)
|
||||
self._final_filename = None
|
||||
self._temp_filename = None
|
||||
self._temp_fileobj = None
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, download_target, osutil):
|
||||
return isinstance(download_target, str)
|
||||
|
||||
def get_fileobj_for_io_writes(self, transfer_future):
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
self._final_filename = fileobj
|
||||
self._temp_filename = self._osutil.get_temp_filename(fileobj)
|
||||
self._temp_fileobj = self._get_temp_fileobj()
|
||||
return self._temp_fileobj
|
||||
|
||||
def get_final_io_task(self):
|
||||
# A task to rename the file from the temporary file to its final
|
||||
# location is needed. This should be the last task needed to complete
|
||||
# the download.
|
||||
return IORenameFileTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'fileobj': self._temp_fileobj,
|
||||
'final_filename': self._final_filename,
|
||||
'osutil': self._osutil,
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
|
||||
def _get_temp_fileobj(self):
|
||||
f = self._get_fileobj_from_filename(self._temp_filename)
|
||||
self._transfer_coordinator.add_failure_cleanup(
|
||||
self._osutil.remove_file, self._temp_filename
|
||||
)
|
||||
return f
|
||||
|
||||
|
||||
class DownloadSeekableOutputManager(DownloadOutputManager):
|
||||
@classmethod
|
||||
def is_compatible(cls, download_target, osutil):
|
||||
return seekable(download_target)
|
||||
|
||||
def get_fileobj_for_io_writes(self, transfer_future):
|
||||
# Return the fileobj provided to the future.
|
||||
return transfer_future.meta.call_args.fileobj
|
||||
|
||||
def get_final_io_task(self):
|
||||
# This task will serve the purpose of signaling when all of the io
|
||||
# writes have finished so done callbacks can be called.
|
||||
return CompleteDownloadNOOPTask(
|
||||
transfer_coordinator=self._transfer_coordinator
|
||||
)
|
||||
|
||||
|
||||
class DownloadNonSeekableOutputManager(DownloadOutputManager):
|
||||
def __init__(
|
||||
self, osutil, transfer_coordinator, io_executor, defer_queue=None
|
||||
):
|
||||
super().__init__(osutil, transfer_coordinator, io_executor)
|
||||
if defer_queue is None:
|
||||
defer_queue = DeferQueue()
|
||||
self._defer_queue = defer_queue
|
||||
self._io_submit_lock = threading.Lock()
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, download_target, osutil):
|
||||
return hasattr(download_target, 'write')
|
||||
|
||||
def get_download_task_tag(self):
|
||||
return IN_MEMORY_DOWNLOAD_TAG
|
||||
|
||||
def get_fileobj_for_io_writes(self, transfer_future):
|
||||
return transfer_future.meta.call_args.fileobj
|
||||
|
||||
def get_final_io_task(self):
|
||||
return CompleteDownloadNOOPTask(
|
||||
transfer_coordinator=self._transfer_coordinator
|
||||
)
|
||||
|
||||
def queue_file_io_task(self, fileobj, data, offset):
|
||||
with self._io_submit_lock:
|
||||
writes = self._defer_queue.request_writes(offset, data)
|
||||
for write in writes:
|
||||
data = write['data']
|
||||
logger.debug(
|
||||
"Queueing IO offset %s for fileobj: %s",
|
||||
write['offset'],
|
||||
fileobj,
|
||||
)
|
||||
super().queue_file_io_task(fileobj, data, offset)
|
||||
|
||||
def get_io_write_task(self, fileobj, data, offset):
|
||||
return IOStreamingWriteTask(
|
||||
self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'fileobj': fileobj,
|
||||
'data': data,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
|
||||
def __init__(
|
||||
self, osutil, transfer_coordinator, io_executor, defer_queue=None
|
||||
):
|
||||
super().__init__(
|
||||
osutil, transfer_coordinator, io_executor, defer_queue
|
||||
)
|
||||
self._fileobj = None
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, download_target, osutil):
|
||||
return isinstance(download_target, str) and osutil.is_special_file(
|
||||
download_target
|
||||
)
|
||||
|
||||
def get_fileobj_for_io_writes(self, transfer_future):
|
||||
filename = transfer_future.meta.call_args.fileobj
|
||||
self._fileobj = self._get_fileobj_from_filename(filename)
|
||||
return self._fileobj
|
||||
|
||||
def get_final_io_task(self):
|
||||
# Make sure the file gets closed once the transfer is done.
|
||||
return IOCloseTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
is_final=True,
|
||||
main_kwargs={'fileobj': self._fileobj},
|
||||
)
|
||||
|
||||
|
||||
class DownloadSubmissionTask(SubmissionTask):
|
||||
"""Task for submitting tasks to execute a download"""
|
||||
|
||||
def _get_download_output_manager_cls(self, transfer_future, osutil):
|
||||
"""Retrieves a class for managing output for a download
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future for the request
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtils
|
||||
:param osutil: The os utility associated to the transfer
|
||||
|
||||
:rtype: class of DownloadOutputManager
|
||||
:returns: The appropriate class to use for managing a specific type of
|
||||
input for downloads.
|
||||
"""
|
||||
download_manager_resolver_chain = [
|
||||
DownloadSpecialFilenameOutputManager,
|
||||
DownloadFilenameOutputManager,
|
||||
DownloadSeekableOutputManager,
|
||||
DownloadNonSeekableOutputManager,
|
||||
]
|
||||
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
for download_manager_cls in download_manager_resolver_chain:
|
||||
if download_manager_cls.is_compatible(fileobj, osutil):
|
||||
return download_manager_cls
|
||||
raise RuntimeError(
|
||||
f'Output {fileobj} of type: {type(fileobj)} is not supported.'
|
||||
)
|
||||
|
||||
def _submit(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
io_executor,
|
||||
transfer_future,
|
||||
bandwidth_limiter=None,
|
||||
):
|
||||
"""
|
||||
:param client: The client associated with the transfer manager
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The transfer config associated with the transfer
|
||||
manager
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtil
|
||||
:param osutil: The os utility associated to the transfer manager
|
||||
|
||||
:type request_executor: s3transfer.futures.BoundedExecutor
|
||||
:param request_executor: The request executor associated with the
|
||||
transfer manager
|
||||
|
||||
:type io_executor: s3transfer.futures.BoundedExecutor
|
||||
:param io_executor: The io executor associated with the
|
||||
transfer manager
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
|
||||
:type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
|
||||
:param bandwidth_limiter: The bandwidth limiter to use when
|
||||
downloading streams
|
||||
"""
|
||||
if (
|
||||
transfer_future.meta.size is None
|
||||
or transfer_future.meta.etag is None
|
||||
):
|
||||
response = client.head_object(
|
||||
Bucket=transfer_future.meta.call_args.bucket,
|
||||
Key=transfer_future.meta.call_args.key,
|
||||
**transfer_future.meta.call_args.extra_args,
|
||||
)
|
||||
# If a size was not provided figure out the size for the
|
||||
# user.
|
||||
transfer_future.meta.provide_transfer_size(
|
||||
response['ContentLength']
|
||||
)
|
||||
# Provide an etag to ensure a stored object is not modified
|
||||
# during a multipart download.
|
||||
transfer_future.meta.provide_object_etag(response.get('ETag'))
|
||||
|
||||
download_output_manager = self._get_download_output_manager_cls(
|
||||
transfer_future, osutil
|
||||
)(osutil, self._transfer_coordinator, io_executor)
|
||||
|
||||
# If it is greater than threshold do a ranged download, otherwise
|
||||
# do a regular GetObject download.
|
||||
if transfer_future.meta.size < config.multipart_threshold:
|
||||
self._submit_download_request(
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
io_executor,
|
||||
download_output_manager,
|
||||
transfer_future,
|
||||
bandwidth_limiter,
|
||||
)
|
||||
else:
|
||||
self._submit_ranged_download_request(
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
io_executor,
|
||||
download_output_manager,
|
||||
transfer_future,
|
||||
bandwidth_limiter,
|
||||
)
|
||||
|
||||
def _submit_download_request(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
io_executor,
|
||||
download_output_manager,
|
||||
transfer_future,
|
||||
bandwidth_limiter,
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
# Get a handle to the file that will be used for writing downloaded
|
||||
# contents
|
||||
fileobj = download_output_manager.get_fileobj_for_io_writes(
|
||||
transfer_future
|
||||
)
|
||||
|
||||
# Get the needed callbacks for the task
|
||||
progress_callbacks = get_callbacks(transfer_future, 'progress')
|
||||
|
||||
# Get any associated tags for the get object task.
|
||||
get_object_tag = download_output_manager.get_download_task_tag()
|
||||
|
||||
# Get the final io task to run once the download is complete.
|
||||
final_task = download_output_manager.get_final_io_task()
|
||||
|
||||
# Submit the task to download the object.
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
ImmediatelyWriteIOGetObjectTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'fileobj': fileobj,
|
||||
'extra_args': call_args.extra_args,
|
||||
'callbacks': progress_callbacks,
|
||||
'max_attempts': config.num_download_attempts,
|
||||
'download_output_manager': download_output_manager,
|
||||
'io_chunksize': config.io_chunksize,
|
||||
'bandwidth_limiter': bandwidth_limiter,
|
||||
},
|
||||
done_callbacks=[final_task],
|
||||
),
|
||||
tag=get_object_tag,
|
||||
)
|
||||
|
||||
def _submit_ranged_download_request(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
io_executor,
|
||||
download_output_manager,
|
||||
transfer_future,
|
||||
bandwidth_limiter,
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
# Get the needed progress callbacks for the task
|
||||
progress_callbacks = get_callbacks(transfer_future, 'progress')
|
||||
|
||||
# Get a handle to the file that will be used for writing downloaded
|
||||
# contents
|
||||
fileobj = download_output_manager.get_fileobj_for_io_writes(
|
||||
transfer_future
|
||||
)
|
||||
|
||||
# Determine the number of parts
|
||||
part_size = config.multipart_chunksize
|
||||
num_parts = calculate_num_parts(transfer_future.meta.size, part_size)
|
||||
|
||||
# Get any associated tags for the get object task.
|
||||
get_object_tag = download_output_manager.get_download_task_tag()
|
||||
|
||||
# Callback invoker to submit the final io task once all downloads
|
||||
# are complete.
|
||||
finalize_download_invoker = CountCallbackInvoker(
|
||||
self._get_final_io_task_submission_callback(
|
||||
download_output_manager, io_executor
|
||||
)
|
||||
)
|
||||
for i in range(num_parts):
|
||||
# Calculate the range parameter
|
||||
range_parameter = calculate_range_parameter(
|
||||
part_size, i, num_parts
|
||||
)
|
||||
|
||||
# Inject extra parameters to be passed in as extra args
|
||||
extra_args = {
|
||||
'Range': range_parameter,
|
||||
}
|
||||
if transfer_future.meta.etag is not None:
|
||||
extra_args['IfMatch'] = transfer_future.meta.etag
|
||||
extra_args.update(call_args.extra_args)
|
||||
finalize_download_invoker.increment()
|
||||
# Submit the ranged downloads
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
GetObjectTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'fileobj': fileobj,
|
||||
'extra_args': extra_args,
|
||||
'callbacks': progress_callbacks,
|
||||
'max_attempts': config.num_download_attempts,
|
||||
'start_index': i * part_size,
|
||||
'download_output_manager': download_output_manager,
|
||||
'io_chunksize': config.io_chunksize,
|
||||
'bandwidth_limiter': bandwidth_limiter,
|
||||
},
|
||||
done_callbacks=[finalize_download_invoker.decrement],
|
||||
),
|
||||
tag=get_object_tag,
|
||||
)
|
||||
finalize_download_invoker.finalize()
|
||||
|
||||
def _get_final_io_task_submission_callback(
|
||||
self, download_manager, io_executor
|
||||
):
|
||||
final_task = download_manager.get_final_io_task()
|
||||
return FunctionContainer(
|
||||
self._transfer_coordinator.submit, io_executor, final_task
|
||||
)
|
||||
|
||||
def _calculate_range_param(self, part_size, part_index, num_parts):
|
||||
# Used to calculate the Range parameter
|
||||
start_range = part_index * part_size
|
||||
if part_index == num_parts - 1:
|
||||
end_range = ''
|
||||
else:
|
||||
end_range = start_range + part_size - 1
|
||||
range_param = f'bytes={start_range}-{end_range}'
|
||||
return range_param
|
||||
|
||||
|
||||
class GetObjectTask(Task):
|
||||
def _main(
|
||||
self,
|
||||
client,
|
||||
bucket,
|
||||
key,
|
||||
fileobj,
|
||||
extra_args,
|
||||
callbacks,
|
||||
max_attempts,
|
||||
download_output_manager,
|
||||
io_chunksize,
|
||||
start_index=0,
|
||||
bandwidth_limiter=None,
|
||||
):
|
||||
"""Downloads an object and places content into io queue
|
||||
|
||||
:param client: The client to use when calling GetObject
|
||||
:param bucket: The bucket to download from
|
||||
:param key: The key to download from
|
||||
:param fileobj: The file handle to write content to
|
||||
:param exta_args: Any extra arguments to include in GetObject request
|
||||
:param callbacks: List of progress callbacks to invoke on download
|
||||
:param max_attempts: The number of retries to do when downloading
|
||||
:param download_output_manager: The download output manager associated
|
||||
with the current download.
|
||||
:param io_chunksize: The size of each io chunk to read from the
|
||||
download stream and queue in the io queue.
|
||||
:param start_index: The location in the file to start writing the
|
||||
content of the key to.
|
||||
:param bandwidth_limiter: The bandwidth limiter to use when throttling
|
||||
the downloading of data in streams.
|
||||
"""
|
||||
last_exception = None
|
||||
for i in range(max_attempts):
|
||||
try:
|
||||
current_index = start_index
|
||||
response = client.get_object(
|
||||
Bucket=bucket, Key=key, **extra_args
|
||||
)
|
||||
streaming_body = StreamReaderProgress(
|
||||
response['Body'], callbacks
|
||||
)
|
||||
if bandwidth_limiter:
|
||||
streaming_body = (
|
||||
bandwidth_limiter.get_bandwith_limited_stream(
|
||||
streaming_body, self._transfer_coordinator
|
||||
)
|
||||
)
|
||||
|
||||
chunks = DownloadChunkIterator(streaming_body, io_chunksize)
|
||||
for chunk in chunks:
|
||||
# If the transfer is done because of a cancellation
|
||||
# or error somewhere else, stop trying to submit more
|
||||
# data to be written and break out of the download.
|
||||
if not self._transfer_coordinator.done():
|
||||
self._handle_io(
|
||||
download_output_manager,
|
||||
fileobj,
|
||||
chunk,
|
||||
current_index,
|
||||
)
|
||||
current_index += len(chunk)
|
||||
else:
|
||||
return
|
||||
return
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
if error_code == "PreconditionFailed":
|
||||
raise S3DownloadFailedError(
|
||||
f'Contents of stored object "{key}" in bucket '
|
||||
f'"{bucket}" did not match expected ETag.'
|
||||
)
|
||||
else:
|
||||
raise
|
||||
except S3_RETRYABLE_DOWNLOAD_ERRORS as e:
|
||||
logger.debug(
|
||||
"Retrying exception caught (%s), "
|
||||
"retrying request, (attempt %s / %s)",
|
||||
e,
|
||||
i,
|
||||
max_attempts,
|
||||
exc_info=True,
|
||||
)
|
||||
last_exception = e
|
||||
# Also invoke the progress callbacks to indicate that we
|
||||
# are trying to download the stream again and all progress
|
||||
# for this GetObject has been lost.
|
||||
invoke_progress_callbacks(
|
||||
callbacks, start_index - current_index
|
||||
)
|
||||
continue
|
||||
raise RetriesExceededError(last_exception)
|
||||
|
||||
def _handle_io(self, download_output_manager, fileobj, chunk, index):
|
||||
download_output_manager.queue_file_io_task(fileobj, chunk, index)
|
||||
|
||||
|
||||
class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
|
||||
"""GetObjectTask that immediately writes to the provided file object
|
||||
|
||||
This is useful for downloads where it is known only one thread is
|
||||
downloading the object so there is no reason to go through the
|
||||
overhead of using an IO queue and executor.
|
||||
"""
|
||||
|
||||
def _handle_io(self, download_output_manager, fileobj, chunk, index):
|
||||
task = download_output_manager.get_io_write_task(fileobj, chunk, index)
|
||||
task()
|
||||
|
||||
|
||||
class IOWriteTask(Task):
|
||||
def _main(self, fileobj, data, offset):
|
||||
"""Pulls off an io queue to write contents to a file
|
||||
|
||||
:param fileobj: The file handle to write content to
|
||||
:param data: The data to write
|
||||
:param offset: The offset to write the data to.
|
||||
"""
|
||||
fileobj.seek(offset)
|
||||
fileobj.write(data)
|
||||
|
||||
|
||||
class IOStreamingWriteTask(Task):
|
||||
"""Task for writing data to a non-seekable stream."""
|
||||
|
||||
def _main(self, fileobj, data):
|
||||
"""Write data to a fileobj.
|
||||
|
||||
Data will be written directly to the fileobj without
|
||||
any prior seeking.
|
||||
|
||||
:param fileobj: The fileobj to write content to
|
||||
:param data: The data to write
|
||||
|
||||
"""
|
||||
fileobj.write(data)
|
||||
|
||||
|
||||
class IORenameFileTask(Task):
|
||||
"""A task to rename a temporary file to its final filename
|
||||
|
||||
:param fileobj: The file handle that content was written to.
|
||||
:param final_filename: The final name of the file to rename to
|
||||
upon completion of writing the contents.
|
||||
:param osutil: OS utility
|
||||
"""
|
||||
|
||||
def _main(self, fileobj, final_filename, osutil):
|
||||
fileobj.close()
|
||||
osutil.rename_file(fileobj.name, final_filename)
|
||||
|
||||
|
||||
class IOCloseTask(Task):
|
||||
"""A task to close out a file once the download is complete.
|
||||
|
||||
:param fileobj: The fileobj to close.
|
||||
"""
|
||||
|
||||
def _main(self, fileobj):
|
||||
fileobj.close()
|
||||
|
||||
|
||||
class CompleteDownloadNOOPTask(Task):
|
||||
"""A NOOP task to serve as an indicator that the download is complete
|
||||
|
||||
Note that the default for is_final is set to True because this should
|
||||
always be the last task.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
transfer_coordinator,
|
||||
main_kwargs=None,
|
||||
pending_main_kwargs=None,
|
||||
done_callbacks=None,
|
||||
is_final=True,
|
||||
):
|
||||
super().__init__(
|
||||
transfer_coordinator=transfer_coordinator,
|
||||
main_kwargs=main_kwargs,
|
||||
pending_main_kwargs=pending_main_kwargs,
|
||||
done_callbacks=done_callbacks,
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
def _main(self):
|
||||
pass
|
||||
|
||||
|
||||
class DownloadChunkIterator:
|
||||
def __init__(self, body, chunksize):
|
||||
"""Iterator to chunk out a downloaded S3 stream
|
||||
|
||||
:param body: A readable file-like object
|
||||
:param chunksize: The amount to read each time
|
||||
"""
|
||||
self._body = body
|
||||
self._chunksize = chunksize
|
||||
self._num_reads = 0
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
chunk = self._body.read(self._chunksize)
|
||||
self._num_reads += 1
|
||||
if chunk:
|
||||
return chunk
|
||||
elif self._num_reads == 1:
|
||||
# Even though the response may have not had any
|
||||
# content, we still want to account for an empty object's
|
||||
# existence so return the empty chunk for that initial
|
||||
# read.
|
||||
return chunk
|
||||
raise StopIteration()
|
||||
|
||||
next = __next__
|
||||
|
||||
|
||||
class DeferQueue:
|
||||
"""IO queue that defers write requests until they are queued sequentially.
|
||||
|
||||
This class is used to track IO data for a *single* fileobj.
|
||||
|
||||
You can send data to this queue, and it will defer any IO write requests
|
||||
until it has the next contiguous block available (starting at 0).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._writes = []
|
||||
self._pending_offsets = {}
|
||||
self._next_offset = 0
|
||||
|
||||
def request_writes(self, offset, data):
|
||||
"""Request any available writes given new incoming data.
|
||||
|
||||
You call this method by providing new data along with the
|
||||
offset associated with the data. If that new data unlocks
|
||||
any contiguous writes that can now be submitted, this
|
||||
method will return all applicable writes.
|
||||
|
||||
This is done with 1 method call so you don't have to
|
||||
make two method calls (put(), get()) which acquires a lock
|
||||
each method call.
|
||||
|
||||
"""
|
||||
if offset + len(data) <= self._next_offset:
|
||||
# This is a request for a write that we've already
|
||||
# seen. This can happen in the event of a retry
|
||||
# where if we retry at at offset N/2, we'll requeue
|
||||
# offsets 0-N/2 again.
|
||||
return []
|
||||
writes = []
|
||||
if offset < self._next_offset:
|
||||
# This is a special case where the write request contains
|
||||
# both seen AND unseen data. This can happen in the case
|
||||
# that we queue part of a chunk due to an incomplete read,
|
||||
# then pop the incomplete data for writing, then we receive the retry
|
||||
# for the incomplete read which contains both the previously-seen
|
||||
# partial chunk followed by the rest of the chunk (unseen).
|
||||
#
|
||||
# In this case, we discard the bytes of the data we've already
|
||||
# queued before, and only queue the unseen bytes.
|
||||
seen_bytes = self._next_offset - offset
|
||||
data = data[seen_bytes:]
|
||||
offset = self._next_offset
|
||||
if offset in self._pending_offsets:
|
||||
queued_data = self._pending_offsets[offset]
|
||||
if len(data) <= len(queued_data):
|
||||
# We already have a write request queued with the same offset
|
||||
# with at least as much data that is present in this
|
||||
# request. In this case we should ignore this request
|
||||
# and prefer what's already queued.
|
||||
return []
|
||||
else:
|
||||
# We have a write request queued with the same offset,
|
||||
# but this request contains more data. This can happen
|
||||
# in the case of a retried request due to an incomplete
|
||||
# read, followed by a retry containing the full response
|
||||
# body. In this case, we should overwrite the queued
|
||||
# request with this one since it contains more data.
|
||||
self._pending_offsets[offset] = data
|
||||
else:
|
||||
heapq.heappush(self._writes, offset)
|
||||
self._pending_offsets[offset] = data
|
||||
while self._writes and self._writes[0] == self._next_offset:
|
||||
next_write_offset = heapq.heappop(self._writes)
|
||||
next_write = self._pending_offsets[next_write_offset]
|
||||
writes.append({'offset': next_write_offset, 'data': next_write})
|
||||
del self._pending_offsets[next_write_offset]
|
||||
self._next_offset += len(next_write)
|
||||
return writes
|
@@ -0,0 +1,41 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
from concurrent.futures import CancelledError
|
||||
|
||||
|
||||
class RetriesExceededError(Exception):
|
||||
def __init__(self, last_exception, msg='Max Retries Exceeded'):
|
||||
super().__init__(msg)
|
||||
self.last_exception = last_exception
|
||||
|
||||
|
||||
class S3UploadFailedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class S3DownloadFailedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSubscriberMethodError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TransferNotDoneError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FatalError(CancelledError):
|
||||
"""A CancelledError raised from an error in the TransferManager"""
|
||||
|
||||
pass
|
628
cdk-env/lib/python3.12/site-packages/s3transfer/futures.py
Normal file
628
cdk-env/lib/python3.12/site-packages/s3transfer/futures.py
Normal file
@@ -0,0 +1,628 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import copy
|
||||
import logging
|
||||
import sys
|
||||
import threading
|
||||
from collections import namedtuple
|
||||
from concurrent import futures
|
||||
|
||||
from s3transfer.compat import MAXINT
|
||||
from s3transfer.exceptions import CancelledError, TransferNotDoneError
|
||||
from s3transfer.utils import FunctionContainer, TaskSemaphore
|
||||
|
||||
try:
|
||||
from botocore.context import get_context
|
||||
except ImportError:
|
||||
|
||||
def get_context():
|
||||
return None
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseTransferFuture:
|
||||
@property
|
||||
def meta(self):
|
||||
"""The metadata associated to the TransferFuture"""
|
||||
raise NotImplementedError('meta')
|
||||
|
||||
def done(self):
|
||||
"""Determines if a TransferFuture has completed
|
||||
|
||||
:returns: True if completed. False, otherwise.
|
||||
"""
|
||||
raise NotImplementedError('done()')
|
||||
|
||||
def result(self):
|
||||
"""Waits until TransferFuture is done and returns the result
|
||||
|
||||
If the TransferFuture succeeded, it will return the result. If the
|
||||
TransferFuture failed, it will raise the exception associated to the
|
||||
failure.
|
||||
"""
|
||||
raise NotImplementedError('result()')
|
||||
|
||||
def cancel(self):
|
||||
"""Cancels the request associated with the TransferFuture"""
|
||||
raise NotImplementedError('cancel()')
|
||||
|
||||
|
||||
class BaseTransferMeta:
|
||||
@property
|
||||
def call_args(self):
|
||||
"""The call args used in the transfer request"""
|
||||
raise NotImplementedError('call_args')
|
||||
|
||||
@property
|
||||
def transfer_id(self):
|
||||
"""The unique id of the transfer"""
|
||||
raise NotImplementedError('transfer_id')
|
||||
|
||||
@property
|
||||
def user_context(self):
|
||||
"""A dictionary that requesters can store data in"""
|
||||
raise NotImplementedError('user_context')
|
||||
|
||||
|
||||
class TransferFuture(BaseTransferFuture):
|
||||
def __init__(self, meta=None, coordinator=None):
|
||||
"""The future associated to a submitted transfer request
|
||||
|
||||
:type meta: TransferMeta
|
||||
:param meta: The metadata associated to the request. This object
|
||||
is visible to the requester.
|
||||
|
||||
:type coordinator: TransferCoordinator
|
||||
:param coordinator: The coordinator associated to the request. This
|
||||
object is not visible to the requester.
|
||||
"""
|
||||
self._meta = meta
|
||||
if meta is None:
|
||||
self._meta = TransferMeta()
|
||||
|
||||
self._coordinator = coordinator
|
||||
if coordinator is None:
|
||||
self._coordinator = TransferCoordinator()
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
return self._meta
|
||||
|
||||
def done(self):
|
||||
return self._coordinator.done()
|
||||
|
||||
def result(self):
|
||||
try:
|
||||
# Usually the result() method blocks until the transfer is done,
|
||||
# however if a KeyboardInterrupt is raised we want want to exit
|
||||
# out of this and propagate the exception.
|
||||
return self._coordinator.result()
|
||||
except KeyboardInterrupt as e:
|
||||
self.cancel()
|
||||
raise e
|
||||
|
||||
def cancel(self):
|
||||
self._coordinator.cancel()
|
||||
|
||||
def set_exception(self, exception):
|
||||
"""Sets the exception on the future."""
|
||||
if not self.done():
|
||||
raise TransferNotDoneError(
|
||||
'set_exception can only be called once the transfer is '
|
||||
'complete.'
|
||||
)
|
||||
self._coordinator.set_exception(exception, override=True)
|
||||
|
||||
|
||||
class TransferMeta(BaseTransferMeta):
|
||||
"""Holds metadata about the TransferFuture"""
|
||||
|
||||
def __init__(self, call_args=None, transfer_id=None):
|
||||
self._call_args = call_args
|
||||
self._transfer_id = transfer_id
|
||||
self._size = None
|
||||
self._user_context = {}
|
||||
self._etag = None
|
||||
|
||||
@property
|
||||
def call_args(self):
|
||||
"""The call args used in the transfer request"""
|
||||
return self._call_args
|
||||
|
||||
@property
|
||||
def transfer_id(self):
|
||||
"""The unique id of the transfer"""
|
||||
return self._transfer_id
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""The size of the transfer request if known"""
|
||||
return self._size
|
||||
|
||||
@property
|
||||
def user_context(self):
|
||||
"""A dictionary that requesters can store data in"""
|
||||
return self._user_context
|
||||
|
||||
@property
|
||||
def etag(self):
|
||||
"""The etag of the stored object for validating multipart downloads"""
|
||||
return self._etag
|
||||
|
||||
def provide_transfer_size(self, size):
|
||||
"""A method to provide the size of a transfer request
|
||||
|
||||
By providing this value, the TransferManager will not try to
|
||||
call HeadObject or use the use OS to determine the size of the
|
||||
transfer.
|
||||
"""
|
||||
self._size = size
|
||||
|
||||
def provide_object_etag(self, etag):
|
||||
"""A method to provide the etag of a transfer request
|
||||
|
||||
By providing this value, the TransferManager will validate
|
||||
multipart downloads by supplying an IfMatch parameter with
|
||||
the etag as the value to GetObject requests.
|
||||
"""
|
||||
self._etag = etag
|
||||
|
||||
|
||||
class TransferCoordinator:
|
||||
"""A helper class for managing TransferFuture"""
|
||||
|
||||
def __init__(self, transfer_id=None):
|
||||
self.transfer_id = transfer_id
|
||||
self._status = 'not-started'
|
||||
self._result = None
|
||||
self._exception = None
|
||||
self._associated_futures = set()
|
||||
self._failure_cleanups = []
|
||||
self._done_callbacks = []
|
||||
self._done_event = threading.Event()
|
||||
self._lock = threading.Lock()
|
||||
self._associated_futures_lock = threading.Lock()
|
||||
self._done_callbacks_lock = threading.Lock()
|
||||
self._failure_cleanups_lock = threading.Lock()
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}(transfer_id={self.transfer_id})'
|
||||
|
||||
@property
|
||||
def exception(self):
|
||||
return self._exception
|
||||
|
||||
@property
|
||||
def associated_futures(self):
|
||||
"""The list of futures associated to the inprogress TransferFuture
|
||||
|
||||
Once the transfer finishes this list becomes empty as the transfer
|
||||
is considered done and there should be no running futures left.
|
||||
"""
|
||||
with self._associated_futures_lock:
|
||||
# We return a copy of the list because we do not want to
|
||||
# processing the returned list while another thread is adding
|
||||
# more futures to the actual list.
|
||||
return copy.copy(self._associated_futures)
|
||||
|
||||
@property
|
||||
def failure_cleanups(self):
|
||||
"""The list of callbacks to call when the TransferFuture fails"""
|
||||
return self._failure_cleanups
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
"""The status of the TransferFuture
|
||||
|
||||
The currently supported states are:
|
||||
* not-started - Has yet to start. If in this state, a transfer
|
||||
can be canceled immediately and nothing will happen.
|
||||
* queued - SubmissionTask is about to submit tasks
|
||||
* running - Is inprogress. In-progress as of now means that
|
||||
the SubmissionTask that runs the transfer is being executed. So
|
||||
there is no guarantee any transfer requests had been made to
|
||||
S3 if this state is reached.
|
||||
* cancelled - Was cancelled
|
||||
* failed - An exception other than CancelledError was thrown
|
||||
* success - No exceptions were thrown and is done.
|
||||
"""
|
||||
return self._status
|
||||
|
||||
def set_result(self, result):
|
||||
"""Set a result for the TransferFuture
|
||||
|
||||
Implies that the TransferFuture succeeded. This will always set a
|
||||
result because it is invoked on the final task where there is only
|
||||
ever one final task and it is ran at the very end of a transfer
|
||||
process. So if a result is being set for this final task, the transfer
|
||||
succeeded even if something came a long and canceled the transfer
|
||||
on the final task.
|
||||
"""
|
||||
with self._lock:
|
||||
self._exception = None
|
||||
self._result = result
|
||||
self._status = 'success'
|
||||
|
||||
def set_exception(self, exception, override=False):
|
||||
"""Set an exception for the TransferFuture
|
||||
|
||||
Implies the TransferFuture failed.
|
||||
|
||||
:param exception: The exception that cause the transfer to fail.
|
||||
:param override: If True, override any existing state.
|
||||
"""
|
||||
with self._lock:
|
||||
if not self.done() or override:
|
||||
self._exception = exception
|
||||
self._status = 'failed'
|
||||
|
||||
def result(self):
|
||||
"""Waits until TransferFuture is done and returns the result
|
||||
|
||||
If the TransferFuture succeeded, it will return the result. If the
|
||||
TransferFuture failed, it will raise the exception associated to the
|
||||
failure.
|
||||
"""
|
||||
# Doing a wait() with no timeout cannot be interrupted in python2 but
|
||||
# can be interrupted in python3 so we just wait with the largest
|
||||
# possible value integer value, which is on the scale of billions of
|
||||
# years...
|
||||
self._done_event.wait(MAXINT)
|
||||
|
||||
# Once done waiting, raise an exception if present or return the
|
||||
# final result.
|
||||
if self._exception:
|
||||
raise self._exception
|
||||
return self._result
|
||||
|
||||
def cancel(self, msg='', exc_type=CancelledError):
|
||||
"""Cancels the TransferFuture
|
||||
|
||||
:param msg: The message to attach to the cancellation
|
||||
:param exc_type: The type of exception to set for the cancellation
|
||||
"""
|
||||
with self._lock:
|
||||
if not self.done():
|
||||
should_announce_done = False
|
||||
logger.debug('%s cancel(%s) called', self, msg)
|
||||
self._exception = exc_type(msg)
|
||||
if self._status == 'not-started':
|
||||
should_announce_done = True
|
||||
self._status = 'cancelled'
|
||||
if should_announce_done:
|
||||
self.announce_done()
|
||||
|
||||
def set_status_to_queued(self):
|
||||
"""Sets the TransferFutrue's status to running"""
|
||||
self._transition_to_non_done_state('queued')
|
||||
|
||||
def set_status_to_running(self):
|
||||
"""Sets the TransferFuture's status to running"""
|
||||
self._transition_to_non_done_state('running')
|
||||
|
||||
def _transition_to_non_done_state(self, desired_state):
|
||||
with self._lock:
|
||||
if self.done():
|
||||
raise RuntimeError(
|
||||
f'Unable to transition from done state {self.status} to non-done '
|
||||
f'state {desired_state}.'
|
||||
)
|
||||
self._status = desired_state
|
||||
|
||||
def submit(self, executor, task, tag=None):
|
||||
"""Submits a task to a provided executor
|
||||
|
||||
:type executor: s3transfer.futures.BoundedExecutor
|
||||
:param executor: The executor to submit the callable to
|
||||
|
||||
:type task: s3transfer.tasks.Task
|
||||
:param task: The task to submit to the executor
|
||||
|
||||
:type tag: s3transfer.futures.TaskTag
|
||||
:param tag: A tag to associate to the submitted task
|
||||
|
||||
:rtype: concurrent.futures.Future
|
||||
:returns: A future representing the submitted task
|
||||
"""
|
||||
logger.debug(
|
||||
f"Submitting task {task} to executor {executor} for transfer request: {self.transfer_id}."
|
||||
)
|
||||
future = executor.submit(task, tag=tag)
|
||||
# Add this created future to the list of associated future just
|
||||
# in case it is needed during cleanups.
|
||||
self.add_associated_future(future)
|
||||
future.add_done_callback(
|
||||
FunctionContainer(self.remove_associated_future, future)
|
||||
)
|
||||
return future
|
||||
|
||||
def done(self):
|
||||
"""Determines if a TransferFuture has completed
|
||||
|
||||
:returns: False if status is equal to 'failed', 'cancelled', or
|
||||
'success'. True, otherwise
|
||||
"""
|
||||
return self.status in ['failed', 'cancelled', 'success']
|
||||
|
||||
def add_associated_future(self, future):
|
||||
"""Adds a future to be associated with the TransferFuture"""
|
||||
with self._associated_futures_lock:
|
||||
self._associated_futures.add(future)
|
||||
|
||||
def remove_associated_future(self, future):
|
||||
"""Removes a future's association to the TransferFuture"""
|
||||
with self._associated_futures_lock:
|
||||
self._associated_futures.remove(future)
|
||||
|
||||
def add_done_callback(self, function, *args, **kwargs):
|
||||
"""Add a done callback to be invoked when transfer is done"""
|
||||
with self._done_callbacks_lock:
|
||||
self._done_callbacks.append(
|
||||
FunctionContainer(function, *args, **kwargs)
|
||||
)
|
||||
|
||||
def add_failure_cleanup(self, function, *args, **kwargs):
|
||||
"""Adds a callback to call upon failure"""
|
||||
with self._failure_cleanups_lock:
|
||||
self._failure_cleanups.append(
|
||||
FunctionContainer(function, *args, **kwargs)
|
||||
)
|
||||
|
||||
def announce_done(self):
|
||||
"""Announce that future is done running and run associated callbacks
|
||||
|
||||
This will run any failure cleanups if the transfer failed if not
|
||||
they have not been run, allows the result() to be unblocked, and will
|
||||
run any done callbacks associated to the TransferFuture if they have
|
||||
not already been ran.
|
||||
"""
|
||||
if self.status != 'success':
|
||||
self._run_failure_cleanups()
|
||||
self._done_event.set()
|
||||
self._run_done_callbacks()
|
||||
|
||||
def _run_done_callbacks(self):
|
||||
# Run the callbacks and remove the callbacks from the internal
|
||||
# list so they do not get ran again if done is announced more than
|
||||
# once.
|
||||
with self._done_callbacks_lock:
|
||||
self._run_callbacks(self._done_callbacks)
|
||||
self._done_callbacks = []
|
||||
|
||||
def _run_failure_cleanups(self):
|
||||
# Run the cleanup callbacks and remove the callbacks from the internal
|
||||
# list so they do not get ran again if done is announced more than
|
||||
# once.
|
||||
with self._failure_cleanups_lock:
|
||||
self._run_callbacks(self.failure_cleanups)
|
||||
self._failure_cleanups = []
|
||||
|
||||
def _run_callbacks(self, callbacks):
|
||||
for callback in callbacks:
|
||||
self._run_callback(callback)
|
||||
|
||||
def _run_callback(self, callback):
|
||||
try:
|
||||
callback()
|
||||
# We do not want a callback interrupting the process, especially
|
||||
# in the failure cleanups. So log and catch, the exception.
|
||||
except Exception:
|
||||
logger.debug(f"Exception raised in {callback}.", exc_info=True)
|
||||
|
||||
|
||||
class BoundedExecutor:
|
||||
EXECUTOR_CLS = futures.ThreadPoolExecutor
|
||||
|
||||
def __init__(
|
||||
self, max_size, max_num_threads, tag_semaphores=None, executor_cls=None
|
||||
):
|
||||
"""An executor implementation that has a maximum queued up tasks
|
||||
|
||||
The executor will block if the number of tasks that have been
|
||||
submitted and is currently working on is past its maximum.
|
||||
|
||||
:params max_size: The maximum number of inflight futures. An inflight
|
||||
future means that the task is either queued up or is currently
|
||||
being executed. A size of None or 0 means that the executor will
|
||||
have no bound in terms of the number of inflight futures.
|
||||
|
||||
:params max_num_threads: The maximum number of threads the executor
|
||||
uses.
|
||||
|
||||
:type tag_semaphores: dict
|
||||
:params tag_semaphores: A dictionary where the key is the name of the
|
||||
tag and the value is the semaphore to use when limiting the
|
||||
number of tasks the executor is processing at a time.
|
||||
|
||||
:type executor_cls: BaseExecutor
|
||||
:param underlying_executor_cls: The executor class that
|
||||
get bounded by this executor. If None is provided, the
|
||||
concurrent.futures.ThreadPoolExecutor class is used.
|
||||
"""
|
||||
self._max_num_threads = max_num_threads
|
||||
if executor_cls is None:
|
||||
executor_cls = self.EXECUTOR_CLS
|
||||
self._executor = executor_cls(max_workers=self._max_num_threads)
|
||||
self._semaphore = TaskSemaphore(max_size)
|
||||
self._tag_semaphores = tag_semaphores
|
||||
|
||||
def submit(self, task, tag=None, block=True):
|
||||
"""Submit a task to complete
|
||||
|
||||
:type task: s3transfer.tasks.Task
|
||||
:param task: The task to run __call__ on
|
||||
|
||||
|
||||
:type tag: s3transfer.futures.TaskTag
|
||||
:param tag: An optional tag to associate to the task. This
|
||||
is used to override which semaphore to use.
|
||||
|
||||
:type block: boolean
|
||||
:param block: True if to wait till it is possible to submit a task.
|
||||
False, if not to wait and raise an error if not able to submit
|
||||
a task.
|
||||
|
||||
:returns: The future associated to the submitted task
|
||||
"""
|
||||
semaphore = self._semaphore
|
||||
# If a tag was provided, use the semaphore associated to that
|
||||
# tag.
|
||||
if tag:
|
||||
semaphore = self._tag_semaphores[tag]
|
||||
|
||||
# Call acquire on the semaphore.
|
||||
acquire_token = semaphore.acquire(task.transfer_id, block)
|
||||
# Create a callback to invoke when task is done in order to call
|
||||
# release on the semaphore.
|
||||
release_callback = FunctionContainer(
|
||||
semaphore.release, task.transfer_id, acquire_token
|
||||
)
|
||||
# Submit the task to the underlying executor.
|
||||
# Pass the current context to ensure child threads persist the
|
||||
# parent thread's context.
|
||||
future = ExecutorFuture(self._executor.submit(task, get_context()))
|
||||
# Add the Semaphore.release() callback to the future such that
|
||||
# it is invoked once the future completes.
|
||||
future.add_done_callback(release_callback)
|
||||
return future
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
self._executor.shutdown(wait)
|
||||
|
||||
|
||||
class ExecutorFuture:
|
||||
def __init__(self, future):
|
||||
"""A future returned from the executor
|
||||
|
||||
Currently, it is just a wrapper around a concurrent.futures.Future.
|
||||
However, this can eventually grow to implement the needed functionality
|
||||
of concurrent.futures.Future if we move off of the library and not
|
||||
affect the rest of the codebase.
|
||||
|
||||
:type future: concurrent.futures.Future
|
||||
:param future: The underlying future
|
||||
"""
|
||||
self._future = future
|
||||
|
||||
def result(self):
|
||||
return self._future.result()
|
||||
|
||||
def add_done_callback(self, fn):
|
||||
"""Adds a callback to be completed once future is done
|
||||
|
||||
:param fn: A callable that takes no arguments. Note that is different
|
||||
than concurrent.futures.Future.add_done_callback that requires
|
||||
a single argument for the future.
|
||||
"""
|
||||
|
||||
# The done callback for concurrent.futures.Future will always pass a
|
||||
# the future in as the only argument. So we need to create the
|
||||
# proper signature wrapper that will invoke the callback provided.
|
||||
def done_callback(future_passed_to_callback):
|
||||
return fn()
|
||||
|
||||
self._future.add_done_callback(done_callback)
|
||||
|
||||
def done(self):
|
||||
return self._future.done()
|
||||
|
||||
|
||||
class BaseExecutor:
|
||||
"""Base Executor class implementation needed to work with s3transfer"""
|
||||
|
||||
def __init__(self, max_workers=None):
|
||||
pass
|
||||
|
||||
def submit(self, fn, *args, **kwargs):
|
||||
raise NotImplementedError('submit()')
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
raise NotImplementedError('shutdown()')
|
||||
|
||||
|
||||
class NonThreadedExecutor(BaseExecutor):
|
||||
"""A drop-in replacement non-threaded version of ThreadPoolExecutor"""
|
||||
|
||||
def submit(self, fn, *args, **kwargs):
|
||||
future = NonThreadedExecutorFuture()
|
||||
try:
|
||||
result = fn(*args, **kwargs)
|
||||
future.set_result(result)
|
||||
except Exception:
|
||||
e, tb = sys.exc_info()[1:]
|
||||
logger.debug(
|
||||
'Setting exception for %s to %s with traceback %s',
|
||||
future,
|
||||
e,
|
||||
tb,
|
||||
)
|
||||
future.set_exception_info(e, tb)
|
||||
return future
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
pass
|
||||
|
||||
|
||||
class NonThreadedExecutorFuture:
|
||||
"""The Future returned from NonThreadedExecutor
|
||||
|
||||
Note that this future is **not** thread-safe as it is being used
|
||||
from the context of a non-threaded environment.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._result = None
|
||||
self._exception = None
|
||||
self._traceback = None
|
||||
self._done = False
|
||||
self._done_callbacks = []
|
||||
|
||||
def set_result(self, result):
|
||||
self._result = result
|
||||
self._set_done()
|
||||
|
||||
def set_exception_info(self, exception, traceback):
|
||||
self._exception = exception
|
||||
self._traceback = traceback
|
||||
self._set_done()
|
||||
|
||||
def result(self, timeout=None):
|
||||
if self._exception:
|
||||
raise self._exception.with_traceback(self._traceback)
|
||||
return self._result
|
||||
|
||||
def _set_done(self):
|
||||
self._done = True
|
||||
for done_callback in self._done_callbacks:
|
||||
self._invoke_done_callback(done_callback)
|
||||
self._done_callbacks = []
|
||||
|
||||
def _invoke_done_callback(self, done_callback):
|
||||
return done_callback(self)
|
||||
|
||||
def done(self):
|
||||
return self._done
|
||||
|
||||
def add_done_callback(self, fn):
|
||||
if self._done:
|
||||
self._invoke_done_callback(fn)
|
||||
else:
|
||||
self._done_callbacks.append(fn)
|
||||
|
||||
|
||||
TaskTag = namedtuple('TaskTag', ['name'])
|
||||
|
||||
IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
|
||||
IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
|
754
cdk-env/lib/python3.12/site-packages/s3transfer/manager.py
Normal file
754
cdk-env/lib/python3.12/site-packages/s3transfer/manager.py
Normal file
@@ -0,0 +1,754 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import copy
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
|
||||
from s3transfer.bandwidth import BandwidthLimiter, LeakyBucket
|
||||
from s3transfer.constants import (
|
||||
ALLOWED_DOWNLOAD_ARGS,
|
||||
FULL_OBJECT_CHECKSUM_ARGS,
|
||||
KB,
|
||||
MB,
|
||||
)
|
||||
from s3transfer.copies import CopySubmissionTask
|
||||
from s3transfer.delete import DeleteSubmissionTask
|
||||
from s3transfer.download import DownloadSubmissionTask
|
||||
from s3transfer.exceptions import CancelledError, FatalError
|
||||
from s3transfer.futures import (
|
||||
IN_MEMORY_DOWNLOAD_TAG,
|
||||
IN_MEMORY_UPLOAD_TAG,
|
||||
BoundedExecutor,
|
||||
TransferCoordinator,
|
||||
TransferFuture,
|
||||
TransferMeta,
|
||||
)
|
||||
from s3transfer.upload import UploadSubmissionTask
|
||||
from s3transfer.utils import (
|
||||
CallArgs,
|
||||
OSUtils,
|
||||
SlidingWindowSemaphore,
|
||||
TaskSemaphore,
|
||||
get_callbacks,
|
||||
set_default_checksum_algorithm,
|
||||
signal_not_transferring,
|
||||
signal_transferring,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TransferConfig:
|
||||
def __init__(
|
||||
self,
|
||||
multipart_threshold=8 * MB,
|
||||
multipart_chunksize=8 * MB,
|
||||
max_request_concurrency=10,
|
||||
max_submission_concurrency=5,
|
||||
max_request_queue_size=1000,
|
||||
max_submission_queue_size=1000,
|
||||
max_io_queue_size=1000,
|
||||
io_chunksize=256 * KB,
|
||||
num_download_attempts=5,
|
||||
max_in_memory_upload_chunks=10,
|
||||
max_in_memory_download_chunks=10,
|
||||
max_bandwidth=None,
|
||||
):
|
||||
"""Configurations for the transfer manager
|
||||
|
||||
:param multipart_threshold: The threshold for which multipart
|
||||
transfers occur.
|
||||
|
||||
:param max_request_concurrency: The maximum number of S3 API
|
||||
transfer-related requests that can happen at a time.
|
||||
|
||||
:param max_submission_concurrency: The maximum number of threads
|
||||
processing a call to a TransferManager method. Processing a
|
||||
call usually entails determining which S3 API requests that need
|
||||
to be enqueued, but does **not** entail making any of the
|
||||
S3 API data transferring requests needed to perform the transfer.
|
||||
The threads controlled by ``max_request_concurrency`` is
|
||||
responsible for that.
|
||||
|
||||
:param multipart_chunksize: The size of each transfer if a request
|
||||
becomes a multipart transfer.
|
||||
|
||||
:param max_request_queue_size: The maximum amount of S3 API requests
|
||||
that can be queued at a time.
|
||||
|
||||
:param max_submission_queue_size: The maximum amount of
|
||||
TransferManager method calls that can be queued at a time.
|
||||
|
||||
:param max_io_queue_size: The maximum amount of read parts that
|
||||
can be queued to be written to disk per download. The default
|
||||
size for each elementin this queue is 8 KB.
|
||||
|
||||
:param io_chunksize: The max size of each chunk in the io queue.
|
||||
Currently, this is size used when reading from the downloaded
|
||||
stream as well.
|
||||
|
||||
:param num_download_attempts: The number of download attempts that
|
||||
will be tried upon errors with downloading an object in S3. Note
|
||||
that these retries account for errors that occur when streaming
|
||||
down the data from s3 (i.e. socket errors and read timeouts that
|
||||
occur after receiving an OK response from s3).
|
||||
Other retryable exceptions such as throttling errors and 5xx errors
|
||||
are already retried by botocore (this default is 5). The
|
||||
``num_download_attempts`` does not take into account the
|
||||
number of exceptions retried by botocore.
|
||||
|
||||
:param max_in_memory_upload_chunks: The number of chunks that can
|
||||
be stored in memory at a time for all ongoing upload requests.
|
||||
This pertains to chunks of data that need to be stored in memory
|
||||
during an upload if the data is sourced from a file-like object.
|
||||
The total maximum memory footprint due to a in-memory upload
|
||||
chunks is roughly equal to:
|
||||
|
||||
max_in_memory_upload_chunks * multipart_chunksize
|
||||
+ max_submission_concurrency * multipart_chunksize
|
||||
|
||||
``max_submission_concurrency`` has an affect on this value because
|
||||
for each thread pulling data off of a file-like object, they may
|
||||
be waiting with a single read chunk to be submitted for upload
|
||||
because the ``max_in_memory_upload_chunks`` value has been reached
|
||||
by the threads making the upload request.
|
||||
|
||||
:param max_in_memory_download_chunks: The number of chunks that can
|
||||
be buffered in memory and **not** in the io queue at a time for all
|
||||
ongoing download requests. This pertains specifically to file-like
|
||||
objects that cannot be seeked. The total maximum memory footprint
|
||||
due to a in-memory download chunks is roughly equal to:
|
||||
|
||||
max_in_memory_download_chunks * multipart_chunksize
|
||||
|
||||
:param max_bandwidth: The maximum bandwidth that will be consumed
|
||||
in uploading and downloading file content. The value is in terms of
|
||||
bytes per second.
|
||||
"""
|
||||
self.multipart_threshold = multipart_threshold
|
||||
self.multipart_chunksize = multipart_chunksize
|
||||
self.max_request_concurrency = max_request_concurrency
|
||||
self.max_submission_concurrency = max_submission_concurrency
|
||||
self.max_request_queue_size = max_request_queue_size
|
||||
self.max_submission_queue_size = max_submission_queue_size
|
||||
self.max_io_queue_size = max_io_queue_size
|
||||
self.io_chunksize = io_chunksize
|
||||
self.num_download_attempts = num_download_attempts
|
||||
self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
|
||||
self.max_in_memory_download_chunks = max_in_memory_download_chunks
|
||||
self.max_bandwidth = max_bandwidth
|
||||
self._validate_attrs_are_nonzero()
|
||||
|
||||
def _validate_attrs_are_nonzero(self):
|
||||
for attr, attr_val in self.__dict__.items():
|
||||
if attr_val is not None and attr_val <= 0:
|
||||
raise ValueError(
|
||||
f'Provided parameter {attr} of value {attr_val} must '
|
||||
'be greater than 0.'
|
||||
)
|
||||
|
||||
|
||||
class TransferManager:
|
||||
ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS
|
||||
|
||||
_ALLOWED_SHARED_ARGS = [
|
||||
'ACL',
|
||||
'CacheControl',
|
||||
'ChecksumAlgorithm',
|
||||
'ContentDisposition',
|
||||
'ContentEncoding',
|
||||
'ContentLanguage',
|
||||
'ContentType',
|
||||
'ExpectedBucketOwner',
|
||||
'Expires',
|
||||
'GrantFullControl',
|
||||
'GrantRead',
|
||||
'GrantReadACP',
|
||||
'GrantWriteACP',
|
||||
'Metadata',
|
||||
'ObjectLockLegalHoldStatus',
|
||||
'ObjectLockMode',
|
||||
'ObjectLockRetainUntilDate',
|
||||
'RequestPayer',
|
||||
'ServerSideEncryption',
|
||||
'StorageClass',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerKeyMD5',
|
||||
'SSEKMSKeyId',
|
||||
'SSEKMSEncryptionContext',
|
||||
'Tagging',
|
||||
'WebsiteRedirectLocation',
|
||||
]
|
||||
|
||||
ALLOWED_UPLOAD_ARGS = (
|
||||
_ALLOWED_SHARED_ARGS
|
||||
+ [
|
||||
'ChecksumType',
|
||||
'MpuObjectSize',
|
||||
]
|
||||
+ FULL_OBJECT_CHECKSUM_ARGS
|
||||
)
|
||||
|
||||
ALLOWED_COPY_ARGS = _ALLOWED_SHARED_ARGS + [
|
||||
'CopySourceIfMatch',
|
||||
'CopySourceIfModifiedSince',
|
||||
'CopySourceIfNoneMatch',
|
||||
'CopySourceIfUnmodifiedSince',
|
||||
'CopySourceSSECustomerAlgorithm',
|
||||
'CopySourceSSECustomerKey',
|
||||
'CopySourceSSECustomerKeyMD5',
|
||||
'MetadataDirective',
|
||||
'TaggingDirective',
|
||||
]
|
||||
|
||||
ALLOWED_DELETE_ARGS = [
|
||||
'MFA',
|
||||
'VersionId',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
]
|
||||
|
||||
VALIDATE_SUPPORTED_BUCKET_VALUES = True
|
||||
|
||||
_UNSUPPORTED_BUCKET_PATTERNS = {
|
||||
'S3 Object Lambda': re.compile(
|
||||
r'^arn:(aws).*:s3-object-lambda:[a-z\-0-9]+:[0-9]{12}:'
|
||||
r'accesspoint[/:][a-zA-Z0-9\-]{1,63}'
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, client, config=None, osutil=None, executor_cls=None):
|
||||
"""A transfer manager interface for Amazon S3
|
||||
|
||||
:param client: Client to be used by the manager
|
||||
:param config: TransferConfig to associate specific configurations
|
||||
:param osutil: OSUtils object to use for os-related behavior when
|
||||
using with transfer manager.
|
||||
|
||||
:type executor_cls: s3transfer.futures.BaseExecutor
|
||||
:param executor_cls: The class of executor to use with the transfer
|
||||
manager. By default, concurrent.futures.ThreadPoolExecutor is used.
|
||||
"""
|
||||
self._client = client
|
||||
self._config = config
|
||||
if config is None:
|
||||
self._config = TransferConfig()
|
||||
self._osutil = osutil
|
||||
if osutil is None:
|
||||
self._osutil = OSUtils()
|
||||
self._coordinator_controller = TransferCoordinatorController()
|
||||
# A counter to create unique id's for each transfer submitted.
|
||||
self._id_counter = 0
|
||||
|
||||
# The executor responsible for making S3 API transfer requests
|
||||
self._request_executor = BoundedExecutor(
|
||||
max_size=self._config.max_request_queue_size,
|
||||
max_num_threads=self._config.max_request_concurrency,
|
||||
tag_semaphores={
|
||||
IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
|
||||
self._config.max_in_memory_upload_chunks
|
||||
),
|
||||
IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
|
||||
self._config.max_in_memory_download_chunks
|
||||
),
|
||||
},
|
||||
executor_cls=executor_cls,
|
||||
)
|
||||
|
||||
# The executor responsible for submitting the necessary tasks to
|
||||
# perform the desired transfer
|
||||
self._submission_executor = BoundedExecutor(
|
||||
max_size=self._config.max_submission_queue_size,
|
||||
max_num_threads=self._config.max_submission_concurrency,
|
||||
executor_cls=executor_cls,
|
||||
)
|
||||
|
||||
# There is one thread available for writing to disk. It will handle
|
||||
# downloads for all files.
|
||||
self._io_executor = BoundedExecutor(
|
||||
max_size=self._config.max_io_queue_size,
|
||||
max_num_threads=1,
|
||||
executor_cls=executor_cls,
|
||||
)
|
||||
|
||||
# The component responsible for limiting bandwidth usage if it
|
||||
# is configured.
|
||||
self._bandwidth_limiter = None
|
||||
if self._config.max_bandwidth is not None:
|
||||
logger.debug(
|
||||
'Setting max_bandwidth to %s', self._config.max_bandwidth
|
||||
)
|
||||
leaky_bucket = LeakyBucket(self._config.max_bandwidth)
|
||||
self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
|
||||
|
||||
self._register_handlers()
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
return self._client
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self._config
|
||||
|
||||
def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
|
||||
"""Uploads a file to S3
|
||||
|
||||
:type fileobj: str or seekable file-like object
|
||||
:param fileobj: The name of a file to upload or a seekable file-like
|
||||
object to upload. It is recommended to use a filename because
|
||||
file-like objects may result in higher memory usage.
|
||||
|
||||
:type bucket: str
|
||||
:param bucket: The name of the bucket to upload to
|
||||
|
||||
:type key: str
|
||||
:param key: The name of the key to upload to
|
||||
|
||||
:type extra_args: dict
|
||||
:param extra_args: Extra arguments that may be passed to the
|
||||
client operation
|
||||
|
||||
:type subscribers: list(s3transfer.subscribers.BaseSubscriber)
|
||||
:param subscribers: The list of subscribers to be invoked in the
|
||||
order provided based on the event emit during the process of
|
||||
the transfer request.
|
||||
|
||||
:rtype: s3transfer.futures.TransferFuture
|
||||
:returns: Transfer future representing the upload
|
||||
"""
|
||||
|
||||
extra_args = extra_args.copy() if extra_args else {}
|
||||
if subscribers is None:
|
||||
subscribers = []
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
self._add_operation_defaults(extra_args)
|
||||
call_args = CallArgs(
|
||||
fileobj=fileobj,
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
extra_main_kwargs = {}
|
||||
if self._bandwidth_limiter:
|
||||
extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
|
||||
return self._submit_transfer(
|
||||
call_args, UploadSubmissionTask, extra_main_kwargs
|
||||
)
|
||||
|
||||
def download(
|
||||
self, bucket, key, fileobj, extra_args=None, subscribers=None
|
||||
):
|
||||
"""Downloads a file from S3
|
||||
|
||||
:type bucket: str
|
||||
:param bucket: The name of the bucket to download from
|
||||
|
||||
:type key: str
|
||||
:param key: The name of the key to download from
|
||||
|
||||
:type fileobj: str or seekable file-like object
|
||||
:param fileobj: The name of a file to download or a seekable file-like
|
||||
object to download. It is recommended to use a filename because
|
||||
file-like objects may result in higher memory usage.
|
||||
|
||||
:type extra_args: dict
|
||||
:param extra_args: Extra arguments that may be passed to the
|
||||
client operation
|
||||
|
||||
:type subscribers: list(s3transfer.subscribers.BaseSubscriber)
|
||||
:param subscribers: The list of subscribers to be invoked in the
|
||||
order provided based on the event emit during the process of
|
||||
the transfer request.
|
||||
|
||||
:rtype: s3transfer.futures.TransferFuture
|
||||
:returns: Transfer future representing the download
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = []
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
call_args = CallArgs(
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
fileobj=fileobj,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
extra_main_kwargs = {'io_executor': self._io_executor}
|
||||
if self._bandwidth_limiter:
|
||||
extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
|
||||
return self._submit_transfer(
|
||||
call_args, DownloadSubmissionTask, extra_main_kwargs
|
||||
)
|
||||
|
||||
def copy(
|
||||
self,
|
||||
copy_source,
|
||||
bucket,
|
||||
key,
|
||||
extra_args=None,
|
||||
subscribers=None,
|
||||
source_client=None,
|
||||
):
|
||||
"""Copies a file in S3
|
||||
|
||||
:type copy_source: dict
|
||||
:param copy_source: The name of the source bucket, key name of the
|
||||
source object, and optional version ID of the source object. The
|
||||
dictionary format is:
|
||||
``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
|
||||
that the ``VersionId`` key is optional and may be omitted.
|
||||
|
||||
:type bucket: str
|
||||
:param bucket: The name of the bucket to copy to
|
||||
|
||||
:type key: str
|
||||
:param key: The name of the key to copy to
|
||||
|
||||
:type extra_args: dict
|
||||
:param extra_args: Extra arguments that may be passed to the
|
||||
client operation
|
||||
|
||||
:type subscribers: a list of subscribers
|
||||
:param subscribers: The list of subscribers to be invoked in the
|
||||
order provided based on the event emit during the process of
|
||||
the transfer request.
|
||||
|
||||
:type source_client: botocore or boto3 Client
|
||||
:param source_client: The client to be used for operation that
|
||||
may happen at the source object. For example, this client is
|
||||
used for the head_object that determines the size of the copy.
|
||||
If no client is provided, the transfer manager's client is used
|
||||
as the client for the source object.
|
||||
|
||||
:rtype: s3transfer.futures.TransferFuture
|
||||
:returns: Transfer future representing the copy
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = []
|
||||
if source_client is None:
|
||||
source_client = self._client
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
|
||||
if isinstance(copy_source, dict):
|
||||
self._validate_if_bucket_supported(copy_source.get('Bucket'))
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
call_args = CallArgs(
|
||||
copy_source=copy_source,
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
source_client=source_client,
|
||||
)
|
||||
return self._submit_transfer(call_args, CopySubmissionTask)
|
||||
|
||||
def delete(self, bucket, key, extra_args=None, subscribers=None):
|
||||
"""Delete an S3 object.
|
||||
|
||||
:type bucket: str
|
||||
:param bucket: The name of the bucket.
|
||||
|
||||
:type key: str
|
||||
:param key: The name of the S3 object to delete.
|
||||
|
||||
:type extra_args: dict
|
||||
:param extra_args: Extra arguments that may be passed to the
|
||||
DeleteObject call.
|
||||
|
||||
:type subscribers: list
|
||||
:param subscribers: A list of subscribers to be invoked during the
|
||||
process of the transfer request. Note that the ``on_progress``
|
||||
callback is not invoked during object deletion.
|
||||
|
||||
:rtype: s3transfer.futures.TransferFuture
|
||||
:return: Transfer future representing the deletion.
|
||||
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = {}
|
||||
if subscribers is None:
|
||||
subscribers = []
|
||||
self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
|
||||
self._validate_if_bucket_supported(bucket)
|
||||
call_args = CallArgs(
|
||||
bucket=bucket,
|
||||
key=key,
|
||||
extra_args=extra_args,
|
||||
subscribers=subscribers,
|
||||
)
|
||||
return self._submit_transfer(call_args, DeleteSubmissionTask)
|
||||
|
||||
def _validate_if_bucket_supported(self, bucket):
|
||||
# s3 high level operations don't support some resources
|
||||
# (eg. S3 Object Lambda) only direct API calls are available
|
||||
# for such resources
|
||||
if self.VALIDATE_SUPPORTED_BUCKET_VALUES:
|
||||
for resource, pattern in self._UNSUPPORTED_BUCKET_PATTERNS.items():
|
||||
match = pattern.match(bucket)
|
||||
if match:
|
||||
raise ValueError(
|
||||
f'TransferManager methods do not support {resource} '
|
||||
'resource. Use direct client calls instead.'
|
||||
)
|
||||
|
||||
def _validate_all_known_args(self, actual, allowed):
|
||||
for kwarg in actual:
|
||||
if kwarg not in allowed:
|
||||
raise ValueError(
|
||||
"Invalid extra_args key '{}', "
|
||||
"must be one of: {}".format(kwarg, ', '.join(allowed))
|
||||
)
|
||||
|
||||
def _add_operation_defaults(self, extra_args):
|
||||
if (
|
||||
self.client.meta.config.request_checksum_calculation
|
||||
== "when_supported"
|
||||
):
|
||||
set_default_checksum_algorithm(extra_args)
|
||||
|
||||
def _submit_transfer(
|
||||
self, call_args, submission_task_cls, extra_main_kwargs=None
|
||||
):
|
||||
if not extra_main_kwargs:
|
||||
extra_main_kwargs = {}
|
||||
|
||||
# Create a TransferFuture to return back to the user
|
||||
transfer_future, components = self._get_future_with_components(
|
||||
call_args
|
||||
)
|
||||
|
||||
# Add any provided done callbacks to the created transfer future
|
||||
# to be invoked on the transfer future being complete.
|
||||
for callback in get_callbacks(transfer_future, 'done'):
|
||||
components['coordinator'].add_done_callback(callback)
|
||||
|
||||
# Get the main kwargs needed to instantiate the submission task
|
||||
main_kwargs = self._get_submission_task_main_kwargs(
|
||||
transfer_future, extra_main_kwargs
|
||||
)
|
||||
|
||||
# Submit a SubmissionTask that will submit all of the necessary
|
||||
# tasks needed to complete the S3 transfer.
|
||||
self._submission_executor.submit(
|
||||
submission_task_cls(
|
||||
transfer_coordinator=components['coordinator'],
|
||||
main_kwargs=main_kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
# Increment the unique id counter for future transfer requests
|
||||
self._id_counter += 1
|
||||
|
||||
return transfer_future
|
||||
|
||||
def _get_future_with_components(self, call_args):
|
||||
transfer_id = self._id_counter
|
||||
# Creates a new transfer future along with its components
|
||||
transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
|
||||
# Track the transfer coordinator for transfers to manage.
|
||||
self._coordinator_controller.add_transfer_coordinator(
|
||||
transfer_coordinator
|
||||
)
|
||||
# Also make sure that the transfer coordinator is removed once
|
||||
# the transfer completes so it does not stick around in memory.
|
||||
transfer_coordinator.add_done_callback(
|
||||
self._coordinator_controller.remove_transfer_coordinator,
|
||||
transfer_coordinator,
|
||||
)
|
||||
components = {
|
||||
'meta': TransferMeta(call_args, transfer_id=transfer_id),
|
||||
'coordinator': transfer_coordinator,
|
||||
}
|
||||
transfer_future = TransferFuture(**components)
|
||||
return transfer_future, components
|
||||
|
||||
def _get_submission_task_main_kwargs(
|
||||
self, transfer_future, extra_main_kwargs
|
||||
):
|
||||
main_kwargs = {
|
||||
'client': self._client,
|
||||
'config': self._config,
|
||||
'osutil': self._osutil,
|
||||
'request_executor': self._request_executor,
|
||||
'transfer_future': transfer_future,
|
||||
}
|
||||
main_kwargs.update(extra_main_kwargs)
|
||||
return main_kwargs
|
||||
|
||||
def _register_handlers(self):
|
||||
# Register handlers to enable/disable callbacks on uploads.
|
||||
event_name = 'request-created.s3'
|
||||
self._client.meta.events.register_first(
|
||||
event_name,
|
||||
signal_not_transferring,
|
||||
unique_id='s3upload-not-transferring',
|
||||
)
|
||||
self._client.meta.events.register_last(
|
||||
event_name, signal_transferring, unique_id='s3upload-transferring'
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, *args):
|
||||
cancel = False
|
||||
cancel_msg = ''
|
||||
cancel_exc_type = FatalError
|
||||
# If a exception was raised in the context handler, signal to cancel
|
||||
# all of the inprogress futures in the shutdown.
|
||||
if exc_type:
|
||||
cancel = True
|
||||
cancel_msg = str(exc_value)
|
||||
if not cancel_msg:
|
||||
cancel_msg = repr(exc_value)
|
||||
# If it was a KeyboardInterrupt, the cancellation was initiated
|
||||
# by the user.
|
||||
if isinstance(exc_value, KeyboardInterrupt):
|
||||
cancel_exc_type = CancelledError
|
||||
self._shutdown(cancel, cancel_msg, cancel_exc_type)
|
||||
|
||||
def shutdown(self, cancel=False, cancel_msg=''):
|
||||
"""Shutdown the TransferManager
|
||||
|
||||
It will wait till all transfers complete before it completely shuts
|
||||
down.
|
||||
|
||||
:type cancel: boolean
|
||||
:param cancel: If True, calls TransferFuture.cancel() for
|
||||
all in-progress in transfers. This is useful if you want the
|
||||
shutdown to happen quicker.
|
||||
|
||||
:type cancel_msg: str
|
||||
:param cancel_msg: The message to specify if canceling all in-progress
|
||||
transfers.
|
||||
"""
|
||||
self._shutdown(cancel, cancel, cancel_msg)
|
||||
|
||||
def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
|
||||
if cancel:
|
||||
# Cancel all in-flight transfers if requested, before waiting
|
||||
# for them to complete.
|
||||
self._coordinator_controller.cancel(cancel_msg, exc_type)
|
||||
try:
|
||||
# Wait until there are no more in-progress transfers. This is
|
||||
# wrapped in a try statement because this can be interrupted
|
||||
# with a KeyboardInterrupt that needs to be caught.
|
||||
self._coordinator_controller.wait()
|
||||
except KeyboardInterrupt:
|
||||
# If not errors were raised in the try block, the cancel should
|
||||
# have no coordinators it needs to run cancel on. If there was
|
||||
# an error raised in the try statement we want to cancel all of
|
||||
# the inflight transfers before shutting down to speed that
|
||||
# process up.
|
||||
self._coordinator_controller.cancel('KeyboardInterrupt()')
|
||||
raise
|
||||
finally:
|
||||
# Shutdown all of the executors.
|
||||
self._submission_executor.shutdown()
|
||||
self._request_executor.shutdown()
|
||||
self._io_executor.shutdown()
|
||||
|
||||
|
||||
class TransferCoordinatorController:
|
||||
def __init__(self):
|
||||
"""Abstraction to control all transfer coordinators
|
||||
|
||||
This abstraction allows the manager to wait for inprogress transfers
|
||||
to complete and cancel all inprogress transfers.
|
||||
"""
|
||||
self._lock = threading.Lock()
|
||||
self._tracked_transfer_coordinators = set()
|
||||
|
||||
@property
|
||||
def tracked_transfer_coordinators(self):
|
||||
"""The set of transfer coordinators being tracked"""
|
||||
with self._lock:
|
||||
# We return a copy because the set is mutable and if you were to
|
||||
# iterate over the set, it may be changing in length due to
|
||||
# additions and removals of transfer coordinators.
|
||||
return copy.copy(self._tracked_transfer_coordinators)
|
||||
|
||||
def add_transfer_coordinator(self, transfer_coordinator):
|
||||
"""Adds a transfer coordinator of a transfer to be canceled if needed
|
||||
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
:param transfer_coordinator: The transfer coordinator for the
|
||||
particular transfer
|
||||
"""
|
||||
with self._lock:
|
||||
self._tracked_transfer_coordinators.add(transfer_coordinator)
|
||||
|
||||
def remove_transfer_coordinator(self, transfer_coordinator):
|
||||
"""Remove a transfer coordinator from cancellation consideration
|
||||
|
||||
Typically, this method is invoked by the transfer coordinator itself
|
||||
to remove its self when it completes its transfer.
|
||||
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
:param transfer_coordinator: The transfer coordinator for the
|
||||
particular transfer
|
||||
"""
|
||||
with self._lock:
|
||||
self._tracked_transfer_coordinators.remove(transfer_coordinator)
|
||||
|
||||
def cancel(self, msg='', exc_type=CancelledError):
|
||||
"""Cancels all inprogress transfers
|
||||
|
||||
This cancels the inprogress transfers by calling cancel() on all
|
||||
tracked transfer coordinators.
|
||||
|
||||
:param msg: The message to pass on to each transfer coordinator that
|
||||
gets cancelled.
|
||||
|
||||
:param exc_type: The type of exception to set for the cancellation
|
||||
"""
|
||||
for transfer_coordinator in self.tracked_transfer_coordinators:
|
||||
transfer_coordinator.cancel(msg, exc_type)
|
||||
|
||||
def wait(self):
|
||||
"""Wait until there are no more inprogress transfers
|
||||
|
||||
This will not stop when failures are encountered and not propagate any
|
||||
of these errors from failed transfers, but it can be interrupted with
|
||||
a KeyboardInterrupt.
|
||||
"""
|
||||
try:
|
||||
transfer_coordinator = None
|
||||
for transfer_coordinator in self.tracked_transfer_coordinators:
|
||||
transfer_coordinator.result()
|
||||
except KeyboardInterrupt:
|
||||
logger.debug('Received KeyboardInterrupt in wait()')
|
||||
# If Keyboard interrupt is raised while waiting for
|
||||
# the result, then exit out of the wait and raise the
|
||||
# exception
|
||||
if transfer_coordinator:
|
||||
logger.debug(
|
||||
'On KeyboardInterrupt was waiting for %s',
|
||||
transfer_coordinator,
|
||||
)
|
||||
raise
|
||||
except Exception:
|
||||
# A general exception could have been thrown because
|
||||
# of result(). We just want to ignore this and continue
|
||||
# because we at least know that the transfer coordinator
|
||||
# has completed.
|
||||
pass
|
1009
cdk-env/lib/python3.12/site-packages/s3transfer/processpool.py
Normal file
1009
cdk-env/lib/python3.12/site-packages/s3transfer/processpool.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,94 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
from functools import lru_cache
|
||||
|
||||
from s3transfer.compat import accepts_kwargs
|
||||
from s3transfer.exceptions import InvalidSubscriberMethodError
|
||||
|
||||
|
||||
class BaseSubscriber:
|
||||
"""The base subscriber class
|
||||
|
||||
It is recommended that all subscriber implementations subclass and then
|
||||
override the subscription methods (i.e. on_{subsribe_type}() methods).
|
||||
"""
|
||||
|
||||
VALID_SUBSCRIBER_TYPES = ['queued', 'progress', 'done']
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
cls._validate_subscriber_methods()
|
||||
return super().__new__(cls)
|
||||
|
||||
@classmethod
|
||||
@lru_cache
|
||||
def _validate_subscriber_methods(cls):
|
||||
for subscriber_type in cls.VALID_SUBSCRIBER_TYPES:
|
||||
subscriber_method = getattr(cls, 'on_' + subscriber_type)
|
||||
if not callable(subscriber_method):
|
||||
raise InvalidSubscriberMethodError(
|
||||
f'Subscriber method {subscriber_method} must be callable.'
|
||||
)
|
||||
|
||||
if not accepts_kwargs(subscriber_method):
|
||||
raise InvalidSubscriberMethodError(
|
||||
f'Subscriber method {subscriber_method} must accept keyword '
|
||||
'arguments (**kwargs)'
|
||||
)
|
||||
|
||||
def on_queued(self, future, **kwargs):
|
||||
"""Callback to be invoked when transfer request gets queued
|
||||
|
||||
This callback can be useful for:
|
||||
|
||||
* Keeping track of how many transfers have been requested
|
||||
* Providing the expected transfer size through
|
||||
future.meta.provide_transfer_size() so a HeadObject would not
|
||||
need to be made for copies and downloads.
|
||||
|
||||
:type future: s3transfer.futures.TransferFuture
|
||||
:param future: The TransferFuture representing the requested transfer.
|
||||
"""
|
||||
pass
|
||||
|
||||
def on_progress(self, future, bytes_transferred, **kwargs):
|
||||
"""Callback to be invoked when progress is made on transfer
|
||||
|
||||
This callback can be useful for:
|
||||
|
||||
* Recording and displaying progress
|
||||
|
||||
:type future: s3transfer.futures.TransferFuture
|
||||
:param future: The TransferFuture representing the requested transfer.
|
||||
|
||||
:type bytes_transferred: int
|
||||
:param bytes_transferred: The number of bytes transferred for that
|
||||
invocation of the callback. Note that a negative amount can be
|
||||
provided, which usually indicates that an in-progress request
|
||||
needed to be retried and thus progress was rewound.
|
||||
"""
|
||||
pass
|
||||
|
||||
def on_done(self, future, **kwargs):
|
||||
"""Callback to be invoked once a transfer is done
|
||||
|
||||
This callback can be useful for:
|
||||
|
||||
* Recording and displaying whether the transfer succeeded or
|
||||
failed using future.result()
|
||||
* Running some task after the transfer completed like changing
|
||||
the last modified time of a downloaded file.
|
||||
|
||||
:type future: s3transfer.futures.TransferFuture
|
||||
:param future: The TransferFuture representing the requested transfer.
|
||||
"""
|
||||
pass
|
390
cdk-env/lib/python3.12/site-packages/s3transfer/tasks.py
Normal file
390
cdk-env/lib/python3.12/site-packages/s3transfer/tasks.py
Normal file
@@ -0,0 +1,390 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from s3transfer.utils import get_callbacks
|
||||
|
||||
try:
|
||||
from botocore.context import start_as_current_context
|
||||
except ImportError:
|
||||
from contextlib import nullcontext as start_as_current_context
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Task:
|
||||
"""A task associated to a TransferFuture request
|
||||
|
||||
This is a base class for other classes to subclass from. All subclassed
|
||||
classes must implement the main() method.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
transfer_coordinator,
|
||||
main_kwargs=None,
|
||||
pending_main_kwargs=None,
|
||||
done_callbacks=None,
|
||||
is_final=False,
|
||||
):
|
||||
"""
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
:param transfer_coordinator: The context associated to the
|
||||
TransferFuture for which this Task is associated with.
|
||||
|
||||
:type main_kwargs: dict
|
||||
:param main_kwargs: The keyword args that can be immediately supplied
|
||||
to the _main() method of the task
|
||||
|
||||
:type pending_main_kwargs: dict
|
||||
:param pending_main_kwargs: The keyword args that are depended upon
|
||||
by the result from a dependent future(s). The result returned by
|
||||
the future(s) will be used as the value for the keyword argument
|
||||
when _main() is called. The values for each key can be:
|
||||
* a single future - Once completed, its value will be the
|
||||
result of that single future
|
||||
* a list of futures - Once all of the futures complete, the
|
||||
value used will be a list of each completed future result
|
||||
value in order of when they were originally supplied.
|
||||
|
||||
:type done_callbacks: list of callbacks
|
||||
:param done_callbacks: A list of callbacks to call once the task is
|
||||
done completing. Each callback will be called with no arguments
|
||||
and will be called no matter if the task succeeds or an exception
|
||||
is raised.
|
||||
|
||||
:type is_final: boolean
|
||||
:param is_final: True, to indicate that this task is the final task
|
||||
for the TransferFuture request. By setting this value to True, it
|
||||
will set the result of the entire TransferFuture to the result
|
||||
returned by this task's main() method.
|
||||
"""
|
||||
self._transfer_coordinator = transfer_coordinator
|
||||
|
||||
self._main_kwargs = main_kwargs
|
||||
if self._main_kwargs is None:
|
||||
self._main_kwargs = {}
|
||||
|
||||
self._pending_main_kwargs = pending_main_kwargs
|
||||
if pending_main_kwargs is None:
|
||||
self._pending_main_kwargs = {}
|
||||
|
||||
self._done_callbacks = done_callbacks
|
||||
if self._done_callbacks is None:
|
||||
self._done_callbacks = []
|
||||
|
||||
self._is_final = is_final
|
||||
|
||||
def __repr__(self):
|
||||
# These are the general main_kwarg parameters that we want to
|
||||
# display in the repr.
|
||||
params_to_display = [
|
||||
'bucket',
|
||||
'key',
|
||||
'part_number',
|
||||
'final_filename',
|
||||
'transfer_future',
|
||||
'offset',
|
||||
'extra_args',
|
||||
]
|
||||
main_kwargs_to_display = self._get_kwargs_with_params_to_include(
|
||||
self._main_kwargs, params_to_display
|
||||
)
|
||||
return f'{self.__class__.__name__}(transfer_id={self._transfer_coordinator.transfer_id}, {main_kwargs_to_display})'
|
||||
|
||||
@property
|
||||
def transfer_id(self):
|
||||
"""The id for the transfer request that the task belongs to"""
|
||||
return self._transfer_coordinator.transfer_id
|
||||
|
||||
def _get_kwargs_with_params_to_include(self, kwargs, include):
|
||||
filtered_kwargs = {}
|
||||
for param in include:
|
||||
if param in kwargs:
|
||||
filtered_kwargs[param] = kwargs[param]
|
||||
return filtered_kwargs
|
||||
|
||||
def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
|
||||
filtered_kwargs = {}
|
||||
for param, value in kwargs.items():
|
||||
if param in exclude:
|
||||
continue
|
||||
filtered_kwargs[param] = value
|
||||
return filtered_kwargs
|
||||
|
||||
def __call__(self, ctx=None):
|
||||
"""The callable to use when submitting a Task to an executor"""
|
||||
with start_as_current_context(ctx):
|
||||
try:
|
||||
# Wait for all of futures this task depends on.
|
||||
self._wait_on_dependent_futures()
|
||||
# Gather up all of the main keyword arguments for main().
|
||||
# This includes the immediately provided main_kwargs and
|
||||
# the values for pending_main_kwargs that source from the return
|
||||
# values from the task's dependent futures.
|
||||
kwargs = self._get_all_main_kwargs()
|
||||
# If the task is not done (really only if some other related
|
||||
# task to the TransferFuture had failed) then execute the task's
|
||||
# main() method.
|
||||
if not self._transfer_coordinator.done():
|
||||
return self._execute_main(kwargs)
|
||||
except Exception as e:
|
||||
self._log_and_set_exception(e)
|
||||
finally:
|
||||
# Run any done callbacks associated to the task no matter what.
|
||||
for done_callback in self._done_callbacks:
|
||||
done_callback()
|
||||
|
||||
if self._is_final:
|
||||
# If this is the final task announce that it is done if results
|
||||
# are waiting on its completion.
|
||||
self._transfer_coordinator.announce_done()
|
||||
|
||||
def _execute_main(self, kwargs):
|
||||
# Do not display keyword args that should not be printed, especially
|
||||
# if they are going to make the logs hard to follow.
|
||||
params_to_exclude = ['data']
|
||||
kwargs_to_display = self._get_kwargs_with_params_to_exclude(
|
||||
kwargs, params_to_exclude
|
||||
)
|
||||
# Log what is about to be executed.
|
||||
logger.debug(f"Executing task {self} with kwargs {kwargs_to_display}")
|
||||
|
||||
return_value = self._main(**kwargs)
|
||||
# If the task is the final task, then set the TransferFuture's
|
||||
# value to the return value from main().
|
||||
if self._is_final:
|
||||
self._transfer_coordinator.set_result(return_value)
|
||||
return return_value
|
||||
|
||||
def _log_and_set_exception(self, exception):
|
||||
# If an exception is ever thrown than set the exception for the
|
||||
# entire TransferFuture.
|
||||
logger.debug("Exception raised.", exc_info=True)
|
||||
self._transfer_coordinator.set_exception(exception)
|
||||
|
||||
def _main(self, **kwargs):
|
||||
"""The method that will be ran in the executor
|
||||
|
||||
This method must be implemented by subclasses from Task. main() can
|
||||
be implemented with any arguments decided upon by the subclass.
|
||||
"""
|
||||
raise NotImplementedError('_main() must be implemented')
|
||||
|
||||
def _wait_on_dependent_futures(self):
|
||||
# Gather all of the futures into that main() depends on.
|
||||
futures_to_wait_on = []
|
||||
for _, future in self._pending_main_kwargs.items():
|
||||
# If the pending main keyword arg is a list then extend the list.
|
||||
if isinstance(future, list):
|
||||
futures_to_wait_on.extend(future)
|
||||
# If the pending main keyword arg is a future append it to the list.
|
||||
else:
|
||||
futures_to_wait_on.append(future)
|
||||
# Now wait for all of the futures to complete.
|
||||
self._wait_until_all_complete(futures_to_wait_on)
|
||||
|
||||
def _wait_until_all_complete(self, futures):
|
||||
# This is a basic implementation of the concurrent.futures.wait()
|
||||
#
|
||||
# concurrent.futures.wait() is not used instead because of this
|
||||
# reported issue: https://bugs.python.org/issue20319.
|
||||
# The issue would occasionally cause multipart uploads to hang
|
||||
# when wait() was called. With this approach, it avoids the
|
||||
# concurrency bug by removing any association with concurrent.futures
|
||||
# implementation of waiters.
|
||||
logger.debug(
|
||||
'%s about to wait for the following futures %s', self, futures
|
||||
)
|
||||
for future in futures:
|
||||
try:
|
||||
logger.debug('%s about to wait for %s', self, future)
|
||||
future.result()
|
||||
except Exception:
|
||||
# result() can also produce exceptions. We want to ignore
|
||||
# these to be deferred to error handling down the road.
|
||||
pass
|
||||
logger.debug('%s done waiting for dependent futures', self)
|
||||
|
||||
def _get_all_main_kwargs(self):
|
||||
# Copy over all of the kwargs that we know is available.
|
||||
kwargs = copy.copy(self._main_kwargs)
|
||||
|
||||
# Iterate through the kwargs whose values are pending on the result
|
||||
# of a future.
|
||||
for key, pending_value in self._pending_main_kwargs.items():
|
||||
# If the value is a list of futures, iterate though the list
|
||||
# appending on the result from each future.
|
||||
if isinstance(pending_value, list):
|
||||
result = []
|
||||
for future in pending_value:
|
||||
result.append(future.result())
|
||||
# Otherwise if the pending_value is a future, just wait for it.
|
||||
else:
|
||||
result = pending_value.result()
|
||||
# Add the retrieved value to the kwargs to be sent to the
|
||||
# main() call.
|
||||
kwargs[key] = result
|
||||
return kwargs
|
||||
|
||||
|
||||
class SubmissionTask(Task):
|
||||
"""A base class for any submission task
|
||||
|
||||
Submission tasks are the top-level task used to submit a series of tasks
|
||||
to execute a particular transfer.
|
||||
"""
|
||||
|
||||
def _main(self, transfer_future, **kwargs):
|
||||
"""
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
|
||||
:param kwargs: Any additional kwargs that you may want to pass
|
||||
to the _submit() method
|
||||
"""
|
||||
try:
|
||||
self._transfer_coordinator.set_status_to_queued()
|
||||
|
||||
# Before submitting any tasks, run all of the on_queued callbacks
|
||||
on_queued_callbacks = get_callbacks(transfer_future, 'queued')
|
||||
for on_queued_callback in on_queued_callbacks:
|
||||
on_queued_callback()
|
||||
|
||||
# Once callbacks have been ran set the status to running.
|
||||
self._transfer_coordinator.set_status_to_running()
|
||||
|
||||
# Call the submit method to start submitting tasks to execute the
|
||||
# transfer.
|
||||
self._submit(transfer_future=transfer_future, **kwargs)
|
||||
except BaseException as e:
|
||||
# If there was an exception raised during the submission of task
|
||||
# there is a chance that the final task that signals if a transfer
|
||||
# is done and too run the cleanup may never have been submitted in
|
||||
# the first place so we need to account accordingly.
|
||||
#
|
||||
# Note that BaseException is caught, instead of Exception, because
|
||||
# for some implementations of executors, specifically the serial
|
||||
# implementation, the SubmissionTask is directly exposed to
|
||||
# KeyboardInterupts and so needs to cleanup and signal done
|
||||
# for those as well.
|
||||
|
||||
# Set the exception, that caused the process to fail.
|
||||
self._log_and_set_exception(e)
|
||||
|
||||
# Wait for all possibly associated futures that may have spawned
|
||||
# from this submission task have finished before we announce the
|
||||
# transfer done.
|
||||
self._wait_for_all_submitted_futures_to_complete()
|
||||
|
||||
# Announce the transfer as done, which will run any cleanups
|
||||
# and done callbacks as well.
|
||||
self._transfer_coordinator.announce_done()
|
||||
|
||||
def _submit(self, transfer_future, **kwargs):
|
||||
"""The submission method to be implemented
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
|
||||
:param kwargs: Any additional keyword arguments you want to be passed
|
||||
in
|
||||
"""
|
||||
raise NotImplementedError('_submit() must be implemented')
|
||||
|
||||
def _wait_for_all_submitted_futures_to_complete(self):
|
||||
# We want to wait for all futures that were submitted to
|
||||
# complete as we do not want the cleanup callbacks or done callbacks
|
||||
# to be called to early. The main problem is any task that was
|
||||
# submitted may have submitted even more during its process and so
|
||||
# we need to account accordingly.
|
||||
|
||||
# First get all of the futures that were submitted up to this point.
|
||||
submitted_futures = self._transfer_coordinator.associated_futures
|
||||
while submitted_futures:
|
||||
# Wait for those futures to complete.
|
||||
self._wait_until_all_complete(submitted_futures)
|
||||
# However, more futures may have been submitted as we waited so
|
||||
# we need to check again for any more associated futures.
|
||||
possibly_more_submitted_futures = (
|
||||
self._transfer_coordinator.associated_futures
|
||||
)
|
||||
# If the current list of submitted futures is equal to the
|
||||
# the list of associated futures for when after the wait completes,
|
||||
# we can ensure no more futures were submitted in waiting on
|
||||
# the current list of futures to complete ultimately meaning all
|
||||
# futures that may have spawned from the original submission task
|
||||
# have completed.
|
||||
if submitted_futures == possibly_more_submitted_futures:
|
||||
break
|
||||
submitted_futures = possibly_more_submitted_futures
|
||||
|
||||
|
||||
class CreateMultipartUploadTask(Task):
|
||||
"""Task to initiate a multipart upload"""
|
||||
|
||||
def _main(self, client, bucket, key, extra_args):
|
||||
"""
|
||||
:param client: The client to use when calling CreateMultipartUpload
|
||||
:param bucket: The name of the bucket to upload to
|
||||
:param key: The name of the key to upload to
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in the initialization.
|
||||
|
||||
:returns: The upload id of the multipart upload
|
||||
"""
|
||||
# Create the multipart upload.
|
||||
response = client.create_multipart_upload(
|
||||
Bucket=bucket, Key=key, **extra_args
|
||||
)
|
||||
upload_id = response['UploadId']
|
||||
|
||||
# Add a cleanup if the multipart upload fails at any point.
|
||||
self._transfer_coordinator.add_failure_cleanup(
|
||||
client.abort_multipart_upload,
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
)
|
||||
return upload_id
|
||||
|
||||
|
||||
class CompleteMultipartUploadTask(Task):
|
||||
"""Task to complete a multipart upload"""
|
||||
|
||||
def _main(self, client, bucket, key, upload_id, parts, extra_args):
|
||||
"""
|
||||
:param client: The client to use when calling CompleteMultipartUpload
|
||||
:param bucket: The name of the bucket to upload to
|
||||
:param key: The name of the key to upload to
|
||||
:param upload_id: The id of the upload
|
||||
:param parts: A list of parts to use to complete the multipart upload::
|
||||
|
||||
[{'Etag': etag_value, 'PartNumber': part_number}, ...]
|
||||
|
||||
Each element in the list consists of a return value from
|
||||
``UploadPartTask.main()``.
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in completing the multipart transfer.
|
||||
"""
|
||||
client.complete_multipart_upload(
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
MultipartUpload={'Parts': parts},
|
||||
**extra_args,
|
||||
)
|
840
cdk-env/lib/python3.12/site-packages/s3transfer/upload.py
Normal file
840
cdk-env/lib/python3.12/site-packages/s3transfer/upload.py
Normal file
@@ -0,0 +1,840 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import math
|
||||
from io import BytesIO
|
||||
|
||||
from s3transfer.compat import readable, seekable
|
||||
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS
|
||||
from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
|
||||
from s3transfer.tasks import (
|
||||
CompleteMultipartUploadTask,
|
||||
CreateMultipartUploadTask,
|
||||
SubmissionTask,
|
||||
Task,
|
||||
)
|
||||
from s3transfer.utils import (
|
||||
ChunksizeAdjuster,
|
||||
DeferredOpenFile,
|
||||
get_callbacks,
|
||||
get_filtered_dict,
|
||||
)
|
||||
|
||||
|
||||
class AggregatedProgressCallback:
|
||||
def __init__(self, callbacks, threshold=1024 * 256):
|
||||
"""Aggregates progress updates for every provided progress callback
|
||||
|
||||
:type callbacks: A list of functions that accepts bytes_transferred
|
||||
as a single argument
|
||||
:param callbacks: The callbacks to invoke when threshold is reached
|
||||
|
||||
:type threshold: int
|
||||
:param threshold: The progress threshold in which to take the
|
||||
aggregated progress and invoke the progress callback with that
|
||||
aggregated progress total
|
||||
"""
|
||||
self._callbacks = callbacks
|
||||
self._threshold = threshold
|
||||
self._bytes_seen = 0
|
||||
|
||||
def __call__(self, bytes_transferred):
|
||||
self._bytes_seen += bytes_transferred
|
||||
if self._bytes_seen >= self._threshold:
|
||||
self._trigger_callbacks()
|
||||
|
||||
def flush(self):
|
||||
"""Flushes out any progress that has not been sent to its callbacks"""
|
||||
if self._bytes_seen > 0:
|
||||
self._trigger_callbacks()
|
||||
|
||||
def _trigger_callbacks(self):
|
||||
for callback in self._callbacks:
|
||||
callback(bytes_transferred=self._bytes_seen)
|
||||
self._bytes_seen = 0
|
||||
|
||||
|
||||
class InterruptReader:
|
||||
"""Wrapper that can interrupt reading using an error
|
||||
|
||||
It uses a transfer coordinator to propagate an error if it notices
|
||||
that a read is being made while the file is being read from.
|
||||
|
||||
:type fileobj: file-like obj
|
||||
:param fileobj: The file-like object to read from
|
||||
|
||||
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||||
:param transfer_coordinator: The transfer coordinator to use if the
|
||||
reader needs to be interrupted.
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj, transfer_coordinator):
|
||||
self._fileobj = fileobj
|
||||
self._transfer_coordinator = transfer_coordinator
|
||||
|
||||
def read(self, amount=None):
|
||||
# If there is an exception, then raise the exception.
|
||||
# We raise an error instead of returning no bytes because for
|
||||
# requests where the content length and md5 was sent, it will
|
||||
# cause md5 mismatches and retries as there was no indication that
|
||||
# the stream being read from encountered any issues.
|
||||
if self._transfer_coordinator.exception:
|
||||
raise self._transfer_coordinator.exception
|
||||
return self._fileobj.read(amount)
|
||||
|
||||
def seek(self, where, whence=0):
|
||||
self._fileobj.seek(where, whence)
|
||||
|
||||
def tell(self):
|
||||
return self._fileobj.tell()
|
||||
|
||||
def close(self):
|
||||
self._fileobj.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.close()
|
||||
|
||||
|
||||
class UploadInputManager:
|
||||
"""Base manager class for handling various types of files for uploads
|
||||
|
||||
This class is typically used for the UploadSubmissionTask class to help
|
||||
determine the following:
|
||||
|
||||
* How to determine the size of the file
|
||||
* How to determine if a multipart upload is required
|
||||
* How to retrieve the body for a PutObject
|
||||
* How to retrieve the bodies for a set of UploadParts
|
||||
|
||||
The answers/implementations differ for the various types of file inputs
|
||||
that may be accepted. All implementations must subclass and override
|
||||
public methods from this class.
|
||||
"""
|
||||
|
||||
def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
|
||||
self._osutil = osutil
|
||||
self._transfer_coordinator = transfer_coordinator
|
||||
self._bandwidth_limiter = bandwidth_limiter
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, upload_source):
|
||||
"""Determines if the source for the upload is compatible with manager
|
||||
|
||||
:param upload_source: The source for which the upload will pull data
|
||||
from.
|
||||
|
||||
:returns: True if the manager can handle the type of source specified
|
||||
otherwise returns False.
|
||||
"""
|
||||
raise NotImplementedError('must implement _is_compatible()')
|
||||
|
||||
def stores_body_in_memory(self, operation_name):
|
||||
"""Whether the body it provides are stored in-memory
|
||||
|
||||
:type operation_name: str
|
||||
:param operation_name: The name of the client operation that the body
|
||||
is being used for. Valid operation_names are ``put_object`` and
|
||||
``upload_part``.
|
||||
|
||||
:rtype: boolean
|
||||
:returns: True if the body returned by the manager will be stored in
|
||||
memory. False if the manager will not directly store the body in
|
||||
memory.
|
||||
"""
|
||||
raise NotImplementedError('must implement store_body_in_memory()')
|
||||
|
||||
def provide_transfer_size(self, transfer_future):
|
||||
"""Provides the transfer size of an upload
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The future associated with upload request
|
||||
"""
|
||||
raise NotImplementedError('must implement provide_transfer_size()')
|
||||
|
||||
def requires_multipart_upload(self, transfer_future, config):
|
||||
"""Determines where a multipart upload is required
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The future associated with upload request
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The config associated to the transfer manager
|
||||
|
||||
:rtype: boolean
|
||||
:returns: True, if the upload should be multipart based on
|
||||
configuration and size. False, otherwise.
|
||||
"""
|
||||
raise NotImplementedError('must implement requires_multipart_upload()')
|
||||
|
||||
def get_put_object_body(self, transfer_future):
|
||||
"""Returns the body to use for PutObject
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The future associated with upload request
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The config associated to the transfer manager
|
||||
|
||||
:rtype: s3transfer.utils.ReadFileChunk
|
||||
:returns: A ReadFileChunk including all progress callbacks
|
||||
associated with the transfer future.
|
||||
"""
|
||||
raise NotImplementedError('must implement get_put_object_body()')
|
||||
|
||||
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||||
"""Yields the part number and body to use for each UploadPart
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The future associated with upload request
|
||||
|
||||
:type chunksize: int
|
||||
:param chunksize: The chunksize to use for this upload.
|
||||
|
||||
:rtype: int, s3transfer.utils.ReadFileChunk
|
||||
:returns: Yields the part number and the ReadFileChunk including all
|
||||
progress callbacks associated with the transfer future for that
|
||||
specific yielded part.
|
||||
"""
|
||||
raise NotImplementedError('must implement yield_upload_part_bodies()')
|
||||
|
||||
def _wrap_fileobj(self, fileobj):
|
||||
fileobj = InterruptReader(fileobj, self._transfer_coordinator)
|
||||
if self._bandwidth_limiter:
|
||||
fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
|
||||
fileobj, self._transfer_coordinator, enabled=False
|
||||
)
|
||||
return fileobj
|
||||
|
||||
def _get_progress_callbacks(self, transfer_future):
|
||||
callbacks = get_callbacks(transfer_future, 'progress')
|
||||
# We only want to be wrapping the callbacks if there are callbacks to
|
||||
# invoke because we do not want to be doing any unnecessary work if
|
||||
# there are no callbacks to invoke.
|
||||
if callbacks:
|
||||
return [AggregatedProgressCallback(callbacks)]
|
||||
return []
|
||||
|
||||
def _get_close_callbacks(self, aggregated_progress_callbacks):
|
||||
return [callback.flush for callback in aggregated_progress_callbacks]
|
||||
|
||||
|
||||
class UploadFilenameInputManager(UploadInputManager):
|
||||
"""Upload utility for filenames"""
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, upload_source):
|
||||
return isinstance(upload_source, str)
|
||||
|
||||
def stores_body_in_memory(self, operation_name):
|
||||
return False
|
||||
|
||||
def provide_transfer_size(self, transfer_future):
|
||||
transfer_future.meta.provide_transfer_size(
|
||||
self._osutil.get_file_size(transfer_future.meta.call_args.fileobj)
|
||||
)
|
||||
|
||||
def requires_multipart_upload(self, transfer_future, config):
|
||||
return transfer_future.meta.size >= config.multipart_threshold
|
||||
|
||||
def get_put_object_body(self, transfer_future):
|
||||
# Get a file-like object for the given input
|
||||
fileobj, full_size = self._get_put_object_fileobj_with_full_size(
|
||||
transfer_future
|
||||
)
|
||||
|
||||
# Wrap fileobj with interrupt reader that will quickly cancel
|
||||
# uploads if needed instead of having to wait for the socket
|
||||
# to completely read all of the data.
|
||||
fileobj = self._wrap_fileobj(fileobj)
|
||||
|
||||
callbacks = self._get_progress_callbacks(transfer_future)
|
||||
close_callbacks = self._get_close_callbacks(callbacks)
|
||||
size = transfer_future.meta.size
|
||||
# Return the file-like object wrapped into a ReadFileChunk to get
|
||||
# progress.
|
||||
return self._osutil.open_file_chunk_reader_from_fileobj(
|
||||
fileobj=fileobj,
|
||||
chunk_size=size,
|
||||
full_file_size=full_size,
|
||||
callbacks=callbacks,
|
||||
close_callbacks=close_callbacks,
|
||||
)
|
||||
|
||||
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||||
full_file_size = transfer_future.meta.size
|
||||
num_parts = self._get_num_parts(transfer_future, chunksize)
|
||||
for part_number in range(1, num_parts + 1):
|
||||
callbacks = self._get_progress_callbacks(transfer_future)
|
||||
close_callbacks = self._get_close_callbacks(callbacks)
|
||||
start_byte = chunksize * (part_number - 1)
|
||||
# Get a file-like object for that part and the size of the full
|
||||
# file size for the associated file-like object for that part.
|
||||
fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
|
||||
transfer_future.meta.call_args.fileobj,
|
||||
start_byte=start_byte,
|
||||
part_size=chunksize,
|
||||
full_file_size=full_file_size,
|
||||
)
|
||||
|
||||
# Wrap fileobj with interrupt reader that will quickly cancel
|
||||
# uploads if needed instead of having to wait for the socket
|
||||
# to completely read all of the data.
|
||||
fileobj = self._wrap_fileobj(fileobj)
|
||||
|
||||
# Wrap the file-like object into a ReadFileChunk to get progress.
|
||||
read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
|
||||
fileobj=fileobj,
|
||||
chunk_size=chunksize,
|
||||
full_file_size=full_size,
|
||||
callbacks=callbacks,
|
||||
close_callbacks=close_callbacks,
|
||||
)
|
||||
yield part_number, read_file_chunk
|
||||
|
||||
def _get_deferred_open_file(self, fileobj, start_byte):
|
||||
fileobj = DeferredOpenFile(
|
||||
fileobj, start_byte, open_function=self._osutil.open
|
||||
)
|
||||
return fileobj
|
||||
|
||||
def _get_put_object_fileobj_with_full_size(self, transfer_future):
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
size = transfer_future.meta.size
|
||||
return self._get_deferred_open_file(fileobj, 0), size
|
||||
|
||||
def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
|
||||
start_byte = kwargs['start_byte']
|
||||
full_size = kwargs['full_file_size']
|
||||
return self._get_deferred_open_file(fileobj, start_byte), full_size
|
||||
|
||||
def _get_num_parts(self, transfer_future, part_size):
|
||||
return int(math.ceil(transfer_future.meta.size / float(part_size)))
|
||||
|
||||
|
||||
class UploadSeekableInputManager(UploadFilenameInputManager):
|
||||
"""Upload utility for an open file object"""
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, upload_source):
|
||||
return readable(upload_source) and seekable(upload_source)
|
||||
|
||||
def stores_body_in_memory(self, operation_name):
|
||||
if operation_name == 'put_object':
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def provide_transfer_size(self, transfer_future):
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
# To determine size, first determine the starting position
|
||||
# Seek to the end and then find the difference in the length
|
||||
# between the end and start positions.
|
||||
start_position = fileobj.tell()
|
||||
fileobj.seek(0, 2)
|
||||
end_position = fileobj.tell()
|
||||
fileobj.seek(start_position)
|
||||
transfer_future.meta.provide_transfer_size(
|
||||
end_position - start_position
|
||||
)
|
||||
|
||||
def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
|
||||
# Note: It is unfortunate that in order to do a multithreaded
|
||||
# multipart upload we cannot simply copy the filelike object
|
||||
# since there is not really a mechanism in python (i.e. os.dup
|
||||
# points to the same OS filehandle which causes concurrency
|
||||
# issues). So instead we need to read from the fileobj and
|
||||
# chunk the data out to separate file-like objects in memory.
|
||||
data = fileobj.read(kwargs['part_size'])
|
||||
# We return the length of the data instead of the full_file_size
|
||||
# because we partitioned the data into separate BytesIO objects
|
||||
# meaning the BytesIO object has no knowledge of its start position
|
||||
# relative the input source nor access to the rest of the input
|
||||
# source. So we must treat it as its own standalone file.
|
||||
return BytesIO(data), len(data)
|
||||
|
||||
def _get_put_object_fileobj_with_full_size(self, transfer_future):
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
# The current position needs to be taken into account when retrieving
|
||||
# the full size of the file.
|
||||
size = fileobj.tell() + transfer_future.meta.size
|
||||
return fileobj, size
|
||||
|
||||
|
||||
class UploadNonSeekableInputManager(UploadInputManager):
|
||||
"""Upload utility for a file-like object that cannot seek."""
|
||||
|
||||
def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
|
||||
super().__init__(osutil, transfer_coordinator, bandwidth_limiter)
|
||||
self._initial_data = b''
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, upload_source):
|
||||
return readable(upload_source)
|
||||
|
||||
def stores_body_in_memory(self, operation_name):
|
||||
return True
|
||||
|
||||
def provide_transfer_size(self, transfer_future):
|
||||
# No-op because there is no way to do this short of reading the entire
|
||||
# body into memory.
|
||||
return
|
||||
|
||||
def requires_multipart_upload(self, transfer_future, config):
|
||||
# If the user has set the size, we can use that.
|
||||
if transfer_future.meta.size is not None:
|
||||
return transfer_future.meta.size >= config.multipart_threshold
|
||||
|
||||
# This is tricky to determine in this case because we can't know how
|
||||
# large the input is. So to figure it out, we read data into memory
|
||||
# up until the threshold and compare how much data was actually read
|
||||
# against the threshold.
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
threshold = config.multipart_threshold
|
||||
self._initial_data = self._read(fileobj, threshold, False)
|
||||
if len(self._initial_data) < threshold:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def get_put_object_body(self, transfer_future):
|
||||
callbacks = self._get_progress_callbacks(transfer_future)
|
||||
close_callbacks = self._get_close_callbacks(callbacks)
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
|
||||
body = self._wrap_data(
|
||||
self._initial_data + fileobj.read(), callbacks, close_callbacks
|
||||
)
|
||||
|
||||
# Zero out the stored data so we don't have additional copies
|
||||
# hanging around in memory.
|
||||
self._initial_data = None
|
||||
return body
|
||||
|
||||
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||||
file_object = transfer_future.meta.call_args.fileobj
|
||||
part_number = 0
|
||||
|
||||
# Continue reading parts from the file-like object until it is empty.
|
||||
while True:
|
||||
callbacks = self._get_progress_callbacks(transfer_future)
|
||||
close_callbacks = self._get_close_callbacks(callbacks)
|
||||
part_number += 1
|
||||
part_content = self._read(file_object, chunksize)
|
||||
if not part_content:
|
||||
break
|
||||
part_object = self._wrap_data(
|
||||
part_content, callbacks, close_callbacks
|
||||
)
|
||||
|
||||
# Zero out part_content to avoid hanging on to additional data.
|
||||
part_content = None
|
||||
yield part_number, part_object
|
||||
|
||||
def _read(self, fileobj, amount, truncate=True):
|
||||
"""
|
||||
Reads a specific amount of data from a stream and returns it. If there
|
||||
is any data in initial_data, that will be popped out first.
|
||||
|
||||
:type fileobj: A file-like object that implements read
|
||||
:param fileobj: The stream to read from.
|
||||
|
||||
:type amount: int
|
||||
:param amount: The number of bytes to read from the stream.
|
||||
|
||||
:type truncate: bool
|
||||
:param truncate: Whether or not to truncate initial_data after
|
||||
reading from it.
|
||||
|
||||
:return: Generator which generates part bodies from the initial data.
|
||||
"""
|
||||
# If the the initial data is empty, we simply read from the fileobj
|
||||
if len(self._initial_data) == 0:
|
||||
return fileobj.read(amount)
|
||||
|
||||
# If the requested number of bytes is less than the amount of
|
||||
# initial data, pull entirely from initial data.
|
||||
if amount <= len(self._initial_data):
|
||||
data = self._initial_data[:amount]
|
||||
# Truncate initial data so we don't hang onto the data longer
|
||||
# than we need.
|
||||
if truncate:
|
||||
self._initial_data = self._initial_data[amount:]
|
||||
return data
|
||||
|
||||
# At this point there is some initial data left, but not enough to
|
||||
# satisfy the number of bytes requested. Pull out the remaining
|
||||
# initial data and read the rest from the fileobj.
|
||||
amount_to_read = amount - len(self._initial_data)
|
||||
data = self._initial_data + fileobj.read(amount_to_read)
|
||||
|
||||
# Zero out initial data so we don't hang onto the data any more.
|
||||
if truncate:
|
||||
self._initial_data = b''
|
||||
return data
|
||||
|
||||
def _wrap_data(self, data, callbacks, close_callbacks):
|
||||
"""
|
||||
Wraps data with the interrupt reader and the file chunk reader.
|
||||
|
||||
:type data: bytes
|
||||
:param data: The data to wrap.
|
||||
|
||||
:type callbacks: list
|
||||
:param callbacks: The callbacks associated with the transfer future.
|
||||
|
||||
:type close_callbacks: list
|
||||
:param close_callbacks: The callbacks to be called when closing the
|
||||
wrapper for the data.
|
||||
|
||||
:return: Fully wrapped data.
|
||||
"""
|
||||
fileobj = self._wrap_fileobj(BytesIO(data))
|
||||
return self._osutil.open_file_chunk_reader_from_fileobj(
|
||||
fileobj=fileobj,
|
||||
chunk_size=len(data),
|
||||
full_file_size=len(data),
|
||||
callbacks=callbacks,
|
||||
close_callbacks=close_callbacks,
|
||||
)
|
||||
|
||||
|
||||
class UploadSubmissionTask(SubmissionTask):
|
||||
"""Task for submitting tasks to execute an upload"""
|
||||
|
||||
PUT_OBJECT_BLOCKLIST = ["ChecksumType", "MpuObjectSize"]
|
||||
|
||||
CREATE_MULTIPART_BLOCKLIST = FULL_OBJECT_CHECKSUM_ARGS + ["MpuObjectSize"]
|
||||
|
||||
UPLOAD_PART_ARGS = [
|
||||
'ChecksumAlgorithm',
|
||||
'SSECustomerKey',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
]
|
||||
|
||||
COMPLETE_MULTIPART_ARGS = [
|
||||
'SSECustomerKey',
|
||||
'SSECustomerAlgorithm',
|
||||
'SSECustomerKeyMD5',
|
||||
'RequestPayer',
|
||||
'ExpectedBucketOwner',
|
||||
'ChecksumType',
|
||||
'MpuObjectSize',
|
||||
] + FULL_OBJECT_CHECKSUM_ARGS
|
||||
|
||||
def _get_upload_input_manager_cls(self, transfer_future):
|
||||
"""Retrieves a class for managing input for an upload based on file type
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future for the request
|
||||
|
||||
:rtype: class of UploadInputManager
|
||||
:returns: The appropriate class to use for managing a specific type of
|
||||
input for uploads.
|
||||
"""
|
||||
upload_manager_resolver_chain = [
|
||||
UploadFilenameInputManager,
|
||||
UploadSeekableInputManager,
|
||||
UploadNonSeekableInputManager,
|
||||
]
|
||||
|
||||
fileobj = transfer_future.meta.call_args.fileobj
|
||||
for upload_manager_cls in upload_manager_resolver_chain:
|
||||
if upload_manager_cls.is_compatible(fileobj):
|
||||
return upload_manager_cls
|
||||
raise RuntimeError(
|
||||
f'Input {fileobj} of type: {type(fileobj)} is not supported.'
|
||||
)
|
||||
|
||||
def _submit(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
transfer_future,
|
||||
bandwidth_limiter=None,
|
||||
):
|
||||
"""
|
||||
:param client: The client associated with the transfer manager
|
||||
|
||||
:type config: s3transfer.manager.TransferConfig
|
||||
:param config: The transfer config associated with the transfer
|
||||
manager
|
||||
|
||||
:type osutil: s3transfer.utils.OSUtil
|
||||
:param osutil: The os utility associated to the transfer manager
|
||||
|
||||
:type request_executor: s3transfer.futures.BoundedExecutor
|
||||
:param request_executor: The request executor associated with the
|
||||
transfer manager
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future associated with the
|
||||
transfer request that tasks are being submitted for
|
||||
"""
|
||||
upload_input_manager = self._get_upload_input_manager_cls(
|
||||
transfer_future
|
||||
)(osutil, self._transfer_coordinator, bandwidth_limiter)
|
||||
|
||||
# Determine the size if it was not provided
|
||||
if transfer_future.meta.size is None:
|
||||
upload_input_manager.provide_transfer_size(transfer_future)
|
||||
|
||||
# Do a multipart upload if needed, otherwise do a regular put object.
|
||||
if not upload_input_manager.requires_multipart_upload(
|
||||
transfer_future, config
|
||||
):
|
||||
self._submit_upload_request(
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
transfer_future,
|
||||
upload_input_manager,
|
||||
)
|
||||
else:
|
||||
self._submit_multipart_request(
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
transfer_future,
|
||||
upload_input_manager,
|
||||
)
|
||||
|
||||
def _submit_upload_request(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
transfer_future,
|
||||
upload_input_manager,
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
put_object_extra_args = self._extra_put_object_args(
|
||||
call_args.extra_args
|
||||
)
|
||||
|
||||
# Get any tags that need to be associated to the put object task
|
||||
put_object_tag = self._get_upload_task_tag(
|
||||
upload_input_manager, 'put_object'
|
||||
)
|
||||
|
||||
# Submit the request of a single upload.
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
PutObjectTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'fileobj': upload_input_manager.get_put_object_body(
|
||||
transfer_future
|
||||
),
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': put_object_extra_args,
|
||||
},
|
||||
is_final=True,
|
||||
),
|
||||
tag=put_object_tag,
|
||||
)
|
||||
|
||||
def _submit_multipart_request(
|
||||
self,
|
||||
client,
|
||||
config,
|
||||
osutil,
|
||||
request_executor,
|
||||
transfer_future,
|
||||
upload_input_manager,
|
||||
):
|
||||
call_args = transfer_future.meta.call_args
|
||||
|
||||
# When a user provided checksum is passed, set "ChecksumType" to "FULL_OBJECT"
|
||||
# and "ChecksumAlgorithm" to the related algorithm.
|
||||
for checksum in FULL_OBJECT_CHECKSUM_ARGS:
|
||||
if checksum in call_args.extra_args:
|
||||
call_args.extra_args["ChecksumType"] = "FULL_OBJECT"
|
||||
call_args.extra_args["ChecksumAlgorithm"] = checksum.replace(
|
||||
"Checksum", ""
|
||||
)
|
||||
|
||||
create_multipart_extra_args = self._extra_create_multipart_args(
|
||||
call_args.extra_args
|
||||
)
|
||||
|
||||
# Submit the request to create a multipart upload.
|
||||
create_multipart_future = self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CreateMultipartUploadTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': create_multipart_extra_args,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
# Submit requests to upload the parts of the file.
|
||||
part_futures = []
|
||||
extra_part_args = self._extra_upload_part_args(call_args.extra_args)
|
||||
|
||||
# Get any tags that need to be associated to the submitted task
|
||||
# for upload the data
|
||||
upload_part_tag = self._get_upload_task_tag(
|
||||
upload_input_manager, 'upload_part'
|
||||
)
|
||||
|
||||
size = transfer_future.meta.size
|
||||
adjuster = ChunksizeAdjuster()
|
||||
chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
|
||||
part_iterator = upload_input_manager.yield_upload_part_bodies(
|
||||
transfer_future, chunksize
|
||||
)
|
||||
|
||||
for part_number, fileobj in part_iterator:
|
||||
part_futures.append(
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
UploadPartTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'fileobj': fileobj,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'part_number': part_number,
|
||||
'extra_args': extra_part_args,
|
||||
},
|
||||
pending_main_kwargs={
|
||||
'upload_id': create_multipart_future
|
||||
},
|
||||
),
|
||||
tag=upload_part_tag,
|
||||
)
|
||||
)
|
||||
|
||||
complete_multipart_extra_args = self._extra_complete_multipart_args(
|
||||
call_args.extra_args
|
||||
)
|
||||
# Submit the request to complete the multipart upload.
|
||||
self._transfer_coordinator.submit(
|
||||
request_executor,
|
||||
CompleteMultipartUploadTask(
|
||||
transfer_coordinator=self._transfer_coordinator,
|
||||
main_kwargs={
|
||||
'client': client,
|
||||
'bucket': call_args.bucket,
|
||||
'key': call_args.key,
|
||||
'extra_args': complete_multipart_extra_args,
|
||||
},
|
||||
pending_main_kwargs={
|
||||
'upload_id': create_multipart_future,
|
||||
'parts': part_futures,
|
||||
},
|
||||
is_final=True,
|
||||
),
|
||||
)
|
||||
|
||||
def _extra_upload_part_args(self, extra_args):
|
||||
# Only the args in UPLOAD_PART_ARGS actually need to be passed
|
||||
# onto the upload_part calls.
|
||||
return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
|
||||
|
||||
def _extra_complete_multipart_args(self, extra_args):
|
||||
return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
|
||||
|
||||
def _extra_create_multipart_args(self, extra_args):
|
||||
return get_filtered_dict(
|
||||
extra_args, blocklisted_keys=self.CREATE_MULTIPART_BLOCKLIST
|
||||
)
|
||||
|
||||
def _extra_put_object_args(self, extra_args):
|
||||
return get_filtered_dict(
|
||||
extra_args, blocklisted_keys=self.PUT_OBJECT_BLOCKLIST
|
||||
)
|
||||
|
||||
def _get_upload_task_tag(self, upload_input_manager, operation_name):
|
||||
tag = None
|
||||
if upload_input_manager.stores_body_in_memory(operation_name):
|
||||
tag = IN_MEMORY_UPLOAD_TAG
|
||||
return tag
|
||||
|
||||
|
||||
class PutObjectTask(Task):
|
||||
"""Task to do a nonmultipart upload"""
|
||||
|
||||
def _main(self, client, fileobj, bucket, key, extra_args):
|
||||
"""
|
||||
:param client: The client to use when calling PutObject
|
||||
:param fileobj: The file to upload.
|
||||
:param bucket: The name of the bucket to upload to
|
||||
:param key: The name of the key to upload to
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in the upload.
|
||||
"""
|
||||
with fileobj as body:
|
||||
client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
|
||||
|
||||
|
||||
class UploadPartTask(Task):
|
||||
"""Task to upload a part in a multipart upload"""
|
||||
|
||||
def _main(
|
||||
self, client, fileobj, bucket, key, upload_id, part_number, extra_args
|
||||
):
|
||||
"""
|
||||
:param client: The client to use when calling PutObject
|
||||
:param fileobj: The file to upload.
|
||||
:param bucket: The name of the bucket to upload to
|
||||
:param key: The name of the key to upload to
|
||||
:param upload_id: The id of the upload
|
||||
:param part_number: The number representing the part of the multipart
|
||||
upload
|
||||
:param extra_args: A dictionary of any extra arguments that may be
|
||||
used in the upload.
|
||||
|
||||
:rtype: dict
|
||||
:returns: A dictionary representing a part::
|
||||
|
||||
{'Etag': etag_value, 'PartNumber': part_number}
|
||||
|
||||
This value can be appended to a list to be used to complete
|
||||
the multipart upload.
|
||||
"""
|
||||
with fileobj as body:
|
||||
response = client.upload_part(
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
UploadId=upload_id,
|
||||
PartNumber=part_number,
|
||||
Body=body,
|
||||
**extra_args,
|
||||
)
|
||||
etag = response['ETag']
|
||||
part_metadata = {'ETag': etag, 'PartNumber': part_number}
|
||||
if 'ChecksumAlgorithm' in extra_args:
|
||||
algorithm_name = extra_args['ChecksumAlgorithm'].upper()
|
||||
checksum_member = f'Checksum{algorithm_name}'
|
||||
if checksum_member in response:
|
||||
part_metadata[checksum_member] = response[checksum_member]
|
||||
return part_metadata
|
833
cdk-env/lib/python3.12/site-packages/s3transfer/utils.py
Normal file
833
cdk-env/lib/python3.12/site-packages/s3transfer/utils.py
Normal file
@@ -0,0 +1,833 @@
|
||||
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||||
# may not use this file except in compliance with the License. A copy of
|
||||
# the License is located at
|
||||
#
|
||||
# http://aws.amazon.com/apache2.0/
|
||||
#
|
||||
# or in the "license" file accompanying this file. This file is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||||
# ANY KIND, either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
import functools
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import stat
|
||||
import string
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
from botocore.exceptions import (
|
||||
IncompleteReadError,
|
||||
ReadTimeoutError,
|
||||
ResponseStreamingError,
|
||||
)
|
||||
from botocore.httpchecksum import DEFAULT_CHECKSUM_ALGORITHM, AwsChunkedWrapper
|
||||
from botocore.utils import is_s3express_bucket
|
||||
|
||||
from s3transfer.compat import SOCKET_ERROR, fallocate, rename_file
|
||||
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS
|
||||
|
||||
MAX_PARTS = 10000
|
||||
# The maximum file size you can upload via S3 per request.
|
||||
# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
|
||||
# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
|
||||
MAX_SINGLE_UPLOAD_SIZE = 5 * (1024**3)
|
||||
MIN_UPLOAD_CHUNKSIZE = 5 * (1024**2)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
S3_RETRYABLE_DOWNLOAD_ERRORS = (
|
||||
socket.timeout,
|
||||
SOCKET_ERROR,
|
||||
ReadTimeoutError,
|
||||
IncompleteReadError,
|
||||
ResponseStreamingError,
|
||||
)
|
||||
|
||||
|
||||
def random_file_extension(num_digits=8):
|
||||
return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
|
||||
|
||||
|
||||
def signal_not_transferring(request, operation_name, **kwargs):
|
||||
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
|
||||
request.body, 'signal_not_transferring'
|
||||
):
|
||||
request.body.signal_not_transferring()
|
||||
|
||||
|
||||
def signal_transferring(request, operation_name, **kwargs):
|
||||
if operation_name in ['PutObject', 'UploadPart']:
|
||||
body = request.body
|
||||
if isinstance(body, AwsChunkedWrapper):
|
||||
body = getattr(body, '_raw', None)
|
||||
if hasattr(body, 'signal_transferring'):
|
||||
body.signal_transferring()
|
||||
|
||||
|
||||
def calculate_num_parts(size, part_size):
|
||||
return int(math.ceil(size / float(part_size)))
|
||||
|
||||
|
||||
def calculate_range_parameter(
|
||||
part_size, part_index, num_parts, total_size=None
|
||||
):
|
||||
"""Calculate the range parameter for multipart downloads/copies
|
||||
|
||||
:type part_size: int
|
||||
:param part_size: The size of the part
|
||||
|
||||
:type part_index: int
|
||||
:param part_index: The index for which this parts starts. This index starts
|
||||
at zero
|
||||
|
||||
:type num_parts: int
|
||||
:param num_parts: The total number of parts in the transfer
|
||||
|
||||
:returns: The value to use for Range parameter on downloads or
|
||||
the CopySourceRange parameter for copies
|
||||
"""
|
||||
# Used to calculate the Range parameter
|
||||
start_range = part_index * part_size
|
||||
if part_index == num_parts - 1:
|
||||
end_range = ''
|
||||
if total_size is not None:
|
||||
end_range = str(total_size - 1)
|
||||
else:
|
||||
end_range = start_range + part_size - 1
|
||||
range_param = f'bytes={start_range}-{end_range}'
|
||||
return range_param
|
||||
|
||||
|
||||
def get_callbacks(transfer_future, callback_type):
|
||||
"""Retrieves callbacks from a subscriber
|
||||
|
||||
:type transfer_future: s3transfer.futures.TransferFuture
|
||||
:param transfer_future: The transfer future the subscriber is associated
|
||||
to.
|
||||
|
||||
:type callback_type: str
|
||||
:param callback_type: The type of callback to retrieve from the subscriber.
|
||||
Valid types include:
|
||||
* 'queued'
|
||||
* 'progress'
|
||||
* 'done'
|
||||
|
||||
:returns: A list of callbacks for the type specified. All callbacks are
|
||||
preinjected with the transfer future.
|
||||
"""
|
||||
callbacks = []
|
||||
for subscriber in transfer_future.meta.call_args.subscribers:
|
||||
callback_name = 'on_' + callback_type
|
||||
if hasattr(subscriber, callback_name):
|
||||
callbacks.append(
|
||||
functools.partial(
|
||||
getattr(subscriber, callback_name), future=transfer_future
|
||||
)
|
||||
)
|
||||
return callbacks
|
||||
|
||||
|
||||
def invoke_progress_callbacks(callbacks, bytes_transferred):
|
||||
"""Calls all progress callbacks
|
||||
|
||||
:param callbacks: A list of progress callbacks to invoke
|
||||
:param bytes_transferred: The number of bytes transferred. This is passed
|
||||
to the callbacks. If no bytes were transferred the callbacks will not
|
||||
be invoked because no progress was achieved. It is also possible
|
||||
to receive a negative amount which comes from retrying a transfer
|
||||
request.
|
||||
"""
|
||||
# Only invoke the callbacks if bytes were actually transferred.
|
||||
if bytes_transferred:
|
||||
for callback in callbacks:
|
||||
callback(bytes_transferred=bytes_transferred)
|
||||
|
||||
|
||||
def get_filtered_dict(
|
||||
original_dict, whitelisted_keys=None, blocklisted_keys=None
|
||||
):
|
||||
"""Gets a dictionary filtered by whitelisted and blocklisted keys.
|
||||
|
||||
:param original_dict: The original dictionary of arguments to source keys
|
||||
and values.
|
||||
:param whitelisted_key: A list of keys to include in the filtered
|
||||
dictionary.
|
||||
:param blocklisted_key: A list of keys to exclude in the filtered
|
||||
dictionary.
|
||||
|
||||
:returns: A dictionary containing key/values from the original dictionary
|
||||
whose key was included in the whitelist and/or not included in the
|
||||
blocklist.
|
||||
"""
|
||||
filtered_dict = {}
|
||||
for key, value in original_dict.items():
|
||||
if (whitelisted_keys and key in whitelisted_keys) or (
|
||||
blocklisted_keys and key not in blocklisted_keys
|
||||
):
|
||||
filtered_dict[key] = value
|
||||
return filtered_dict
|
||||
|
||||
|
||||
class CallArgs:
|
||||
def __init__(self, **kwargs):
|
||||
"""A class that records call arguments
|
||||
|
||||
The call arguments must be passed as keyword arguments. It will set
|
||||
each keyword argument as an attribute of the object along with its
|
||||
associated value.
|
||||
"""
|
||||
for arg, value in kwargs.items():
|
||||
setattr(self, arg, value)
|
||||
|
||||
|
||||
class FunctionContainer:
|
||||
"""An object that contains a function and any args or kwargs to call it
|
||||
|
||||
When called the provided function will be called with provided args
|
||||
and kwargs.
|
||||
"""
|
||||
|
||||
def __init__(self, func, *args, **kwargs):
|
||||
self._func = func
|
||||
self._args = args
|
||||
self._kwargs = kwargs
|
||||
|
||||
def __repr__(self):
|
||||
return f'Function: {self._func} with args {self._args} and kwargs {self._kwargs}'
|
||||
|
||||
def __call__(self):
|
||||
return self._func(*self._args, **self._kwargs)
|
||||
|
||||
|
||||
class CountCallbackInvoker:
|
||||
"""An abstraction to invoke a callback when a shared count reaches zero
|
||||
|
||||
:param callback: Callback invoke when finalized count reaches zero
|
||||
"""
|
||||
|
||||
def __init__(self, callback):
|
||||
self._lock = threading.Lock()
|
||||
self._callback = callback
|
||||
self._count = 0
|
||||
self._is_finalized = False
|
||||
|
||||
@property
|
||||
def current_count(self):
|
||||
with self._lock:
|
||||
return self._count
|
||||
|
||||
def increment(self):
|
||||
"""Increment the count by one"""
|
||||
with self._lock:
|
||||
if self._is_finalized:
|
||||
raise RuntimeError(
|
||||
'Counter has been finalized it can no longer be '
|
||||
'incremented.'
|
||||
)
|
||||
self._count += 1
|
||||
|
||||
def decrement(self):
|
||||
"""Decrement the count by one"""
|
||||
with self._lock:
|
||||
if self._count == 0:
|
||||
raise RuntimeError(
|
||||
'Counter is at zero. It cannot dip below zero'
|
||||
)
|
||||
self._count -= 1
|
||||
if self._is_finalized and self._count == 0:
|
||||
self._callback()
|
||||
|
||||
def finalize(self):
|
||||
"""Finalize the counter
|
||||
|
||||
Once finalized, the counter never be incremented and the callback
|
||||
can be invoked once the count reaches zero
|
||||
"""
|
||||
with self._lock:
|
||||
self._is_finalized = True
|
||||
if self._count == 0:
|
||||
self._callback()
|
||||
|
||||
|
||||
class OSUtils:
|
||||
_MAX_FILENAME_LEN = 255
|
||||
|
||||
def get_file_size(self, filename):
|
||||
return os.path.getsize(filename)
|
||||
|
||||
def open_file_chunk_reader(self, filename, start_byte, size, callbacks):
|
||||
return ReadFileChunk.from_filename(
|
||||
filename, start_byte, size, callbacks, enable_callbacks=False
|
||||
)
|
||||
|
||||
def open_file_chunk_reader_from_fileobj(
|
||||
self,
|
||||
fileobj,
|
||||
chunk_size,
|
||||
full_file_size,
|
||||
callbacks,
|
||||
close_callbacks=None,
|
||||
):
|
||||
return ReadFileChunk(
|
||||
fileobj,
|
||||
chunk_size,
|
||||
full_file_size,
|
||||
callbacks=callbacks,
|
||||
enable_callbacks=False,
|
||||
close_callbacks=close_callbacks,
|
||||
)
|
||||
|
||||
def open(self, filename, mode):
|
||||
return open(filename, mode)
|
||||
|
||||
def remove_file(self, filename):
|
||||
"""Remove a file, noop if file does not exist."""
|
||||
# Unlike os.remove, if the file does not exist,
|
||||
# then this method does nothing.
|
||||
try:
|
||||
os.remove(filename)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def rename_file(self, current_filename, new_filename):
|
||||
rename_file(current_filename, new_filename)
|
||||
|
||||
def is_special_file(cls, filename):
|
||||
"""Checks to see if a file is a special UNIX file.
|
||||
|
||||
It checks if the file is a character special device, block special
|
||||
device, FIFO, or socket.
|
||||
|
||||
:param filename: Name of the file
|
||||
|
||||
:returns: True if the file is a special file. False, if is not.
|
||||
"""
|
||||
# If it does not exist, it must be a new file so it cannot be
|
||||
# a special file.
|
||||
if not os.path.exists(filename):
|
||||
return False
|
||||
mode = os.stat(filename).st_mode
|
||||
# Character special device.
|
||||
if stat.S_ISCHR(mode):
|
||||
return True
|
||||
# Block special device
|
||||
if stat.S_ISBLK(mode):
|
||||
return True
|
||||
# Named pipe / FIFO
|
||||
if stat.S_ISFIFO(mode):
|
||||
return True
|
||||
# Socket.
|
||||
if stat.S_ISSOCK(mode):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_temp_filename(self, filename):
|
||||
suffix = os.extsep + random_file_extension()
|
||||
path = os.path.dirname(filename)
|
||||
name = os.path.basename(filename)
|
||||
temp_filename = name[: self._MAX_FILENAME_LEN - len(suffix)] + suffix
|
||||
return os.path.join(path, temp_filename)
|
||||
|
||||
def allocate(self, filename, size):
|
||||
try:
|
||||
with self.open(filename, 'wb') as f:
|
||||
fallocate(f, size)
|
||||
except OSError:
|
||||
self.remove_file(filename)
|
||||
raise
|
||||
|
||||
|
||||
class DeferredOpenFile:
|
||||
def __init__(self, filename, start_byte=0, mode='rb', open_function=open):
|
||||
"""A class that defers the opening of a file till needed
|
||||
|
||||
This is useful for deferring opening of a file till it is needed
|
||||
in a separate thread, as there is a limit of how many open files
|
||||
there can be in a single thread for most operating systems. The
|
||||
file gets opened in the following methods: ``read()``, ``seek()``,
|
||||
and ``__enter__()``
|
||||
|
||||
:type filename: str
|
||||
:param filename: The name of the file to open
|
||||
|
||||
:type start_byte: int
|
||||
:param start_byte: The byte to seek to when the file is opened.
|
||||
|
||||
:type mode: str
|
||||
:param mode: The mode to use to open the file
|
||||
|
||||
:type open_function: function
|
||||
:param open_function: The function to use to open the file
|
||||
"""
|
||||
self._filename = filename
|
||||
self._fileobj = None
|
||||
self._start_byte = start_byte
|
||||
self._mode = mode
|
||||
self._open_function = open_function
|
||||
|
||||
def _open_if_needed(self):
|
||||
if self._fileobj is None:
|
||||
self._fileobj = self._open_function(self._filename, self._mode)
|
||||
if self._start_byte != 0:
|
||||
self._fileobj.seek(self._start_byte)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._filename
|
||||
|
||||
def read(self, amount=None):
|
||||
self._open_if_needed()
|
||||
return self._fileobj.read(amount)
|
||||
|
||||
def write(self, data):
|
||||
self._open_if_needed()
|
||||
self._fileobj.write(data)
|
||||
|
||||
def seek(self, where, whence=0):
|
||||
self._open_if_needed()
|
||||
self._fileobj.seek(where, whence)
|
||||
|
||||
def tell(self):
|
||||
if self._fileobj is None:
|
||||
return self._start_byte
|
||||
return self._fileobj.tell()
|
||||
|
||||
def close(self):
|
||||
if self._fileobj:
|
||||
self._fileobj.close()
|
||||
|
||||
def __enter__(self):
|
||||
self._open_if_needed()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.close()
|
||||
|
||||
|
||||
class ReadFileChunk:
|
||||
def __init__(
|
||||
self,
|
||||
fileobj,
|
||||
chunk_size,
|
||||
full_file_size,
|
||||
callbacks=None,
|
||||
enable_callbacks=True,
|
||||
close_callbacks=None,
|
||||
):
|
||||
"""
|
||||
|
||||
Given a file object shown below::
|
||||
|
||||
|___________________________________________________|
|
||||
0 | | full_file_size
|
||||
|----chunk_size---|
|
||||
f.tell()
|
||||
|
||||
:type fileobj: file
|
||||
:param fileobj: File like object
|
||||
|
||||
:type chunk_size: int
|
||||
:param chunk_size: The max chunk size to read. Trying to read
|
||||
pass the end of the chunk size will behave like you've
|
||||
reached the end of the file.
|
||||
|
||||
:type full_file_size: int
|
||||
:param full_file_size: The entire content length associated
|
||||
with ``fileobj``.
|
||||
|
||||
:type callbacks: A list of function(amount_read)
|
||||
:param callbacks: Called whenever data is read from this object in the
|
||||
order provided.
|
||||
|
||||
:type enable_callbacks: boolean
|
||||
:param enable_callbacks: True if to run callbacks. Otherwise, do not
|
||||
run callbacks
|
||||
|
||||
:type close_callbacks: A list of function()
|
||||
:param close_callbacks: Called when close is called. The function
|
||||
should take no arguments.
|
||||
"""
|
||||
self._fileobj = fileobj
|
||||
self._start_byte = self._fileobj.tell()
|
||||
self._size = self._calculate_file_size(
|
||||
self._fileobj,
|
||||
requested_size=chunk_size,
|
||||
start_byte=self._start_byte,
|
||||
actual_file_size=full_file_size,
|
||||
)
|
||||
# _amount_read represents the position in the chunk and may exceed
|
||||
# the chunk size, but won't allow reads out of bounds.
|
||||
self._amount_read = 0
|
||||
self._callbacks = callbacks
|
||||
if callbacks is None:
|
||||
self._callbacks = []
|
||||
self._callbacks_enabled = enable_callbacks
|
||||
self._close_callbacks = close_callbacks
|
||||
if close_callbacks is None:
|
||||
self._close_callbacks = close_callbacks
|
||||
|
||||
@classmethod
|
||||
def from_filename(
|
||||
cls,
|
||||
filename,
|
||||
start_byte,
|
||||
chunk_size,
|
||||
callbacks=None,
|
||||
enable_callbacks=True,
|
||||
):
|
||||
"""Convenience factory function to create from a filename.
|
||||
|
||||
:type start_byte: int
|
||||
:param start_byte: The first byte from which to start reading.
|
||||
|
||||
:type chunk_size: int
|
||||
:param chunk_size: The max chunk size to read. Trying to read
|
||||
pass the end of the chunk size will behave like you've
|
||||
reached the end of the file.
|
||||
|
||||
:type full_file_size: int
|
||||
:param full_file_size: The entire content length associated
|
||||
with ``fileobj``.
|
||||
|
||||
:type callbacks: function(amount_read)
|
||||
:param callbacks: Called whenever data is read from this object.
|
||||
|
||||
:type enable_callbacks: bool
|
||||
:param enable_callbacks: Indicate whether to invoke callback
|
||||
during read() calls.
|
||||
|
||||
:rtype: ``ReadFileChunk``
|
||||
:return: A new instance of ``ReadFileChunk``
|
||||
|
||||
"""
|
||||
f = open(filename, 'rb')
|
||||
f.seek(start_byte)
|
||||
file_size = os.fstat(f.fileno()).st_size
|
||||
return cls(f, chunk_size, file_size, callbacks, enable_callbacks)
|
||||
|
||||
def _calculate_file_size(
|
||||
self, fileobj, requested_size, start_byte, actual_file_size
|
||||
):
|
||||
max_chunk_size = actual_file_size - start_byte
|
||||
return min(max_chunk_size, requested_size)
|
||||
|
||||
def read(self, amount=None):
|
||||
amount_left = max(self._size - self._amount_read, 0)
|
||||
if amount is None:
|
||||
amount_to_read = amount_left
|
||||
else:
|
||||
amount_to_read = min(amount_left, amount)
|
||||
data = self._fileobj.read(amount_to_read)
|
||||
self._amount_read += len(data)
|
||||
if self._callbacks is not None and self._callbacks_enabled:
|
||||
invoke_progress_callbacks(self._callbacks, len(data))
|
||||
return data
|
||||
|
||||
def signal_transferring(self):
|
||||
self.enable_callback()
|
||||
if hasattr(self._fileobj, 'signal_transferring'):
|
||||
self._fileobj.signal_transferring()
|
||||
|
||||
def signal_not_transferring(self):
|
||||
self.disable_callback()
|
||||
if hasattr(self._fileobj, 'signal_not_transferring'):
|
||||
self._fileobj.signal_not_transferring()
|
||||
|
||||
def enable_callback(self):
|
||||
self._callbacks_enabled = True
|
||||
|
||||
def disable_callback(self):
|
||||
self._callbacks_enabled = False
|
||||
|
||||
def seek(self, where, whence=0):
|
||||
if whence not in (0, 1, 2):
|
||||
# Mimic io's error for invalid whence values
|
||||
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
||||
|
||||
# Recalculate where based on chunk attributes so seek from file
|
||||
# start (whence=0) is always used
|
||||
where += self._start_byte
|
||||
if whence == 1:
|
||||
where += self._amount_read
|
||||
elif whence == 2:
|
||||
where += self._size
|
||||
|
||||
self._fileobj.seek(max(where, self._start_byte))
|
||||
if self._callbacks is not None and self._callbacks_enabled:
|
||||
# To also rewind the callback() for an accurate progress report
|
||||
bounded_where = max(min(where - self._start_byte, self._size), 0)
|
||||
bounded_amount_read = min(self._amount_read, self._size)
|
||||
amount = bounded_where - bounded_amount_read
|
||||
invoke_progress_callbacks(
|
||||
self._callbacks, bytes_transferred=amount
|
||||
)
|
||||
self._amount_read = max(where - self._start_byte, 0)
|
||||
|
||||
def close(self):
|
||||
if self._close_callbacks is not None and self._callbacks_enabled:
|
||||
for callback in self._close_callbacks:
|
||||
callback()
|
||||
self._fileobj.close()
|
||||
|
||||
def tell(self):
|
||||
return self._amount_read
|
||||
|
||||
def __len__(self):
|
||||
# __len__ is defined because requests will try to determine the length
|
||||
# of the stream to set a content length. In the normal case
|
||||
# of the file it will just stat the file, but we need to change that
|
||||
# behavior. By providing a __len__, requests will use that instead
|
||||
# of stat'ing the file.
|
||||
return self._size
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.close()
|
||||
|
||||
def __iter__(self):
|
||||
# This is a workaround for http://bugs.python.org/issue17575
|
||||
# Basically httplib will try to iterate over the contents, even
|
||||
# if its a file like object. This wasn't noticed because we've
|
||||
# already exhausted the stream so iterating over the file immediately
|
||||
# stops, which is what we're simulating here.
|
||||
return iter([])
|
||||
|
||||
|
||||
class StreamReaderProgress:
|
||||
"""Wrapper for a read only stream that adds progress callbacks."""
|
||||
|
||||
def __init__(self, stream, callbacks=None):
|
||||
self._stream = stream
|
||||
self._callbacks = callbacks
|
||||
if callbacks is None:
|
||||
self._callbacks = []
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
value = self._stream.read(*args, **kwargs)
|
||||
invoke_progress_callbacks(self._callbacks, len(value))
|
||||
return value
|
||||
|
||||
|
||||
class NoResourcesAvailable(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TaskSemaphore:
|
||||
def __init__(self, count):
|
||||
"""A semaphore for the purpose of limiting the number of tasks
|
||||
|
||||
:param count: The size of semaphore
|
||||
"""
|
||||
self._semaphore = threading.Semaphore(count)
|
||||
|
||||
def acquire(self, tag, blocking=True):
|
||||
"""Acquire the semaphore
|
||||
|
||||
:param tag: A tag identifying what is acquiring the semaphore. Note
|
||||
that this is not really needed to directly use this class but is
|
||||
needed for API compatibility with the SlidingWindowSemaphore
|
||||
implementation.
|
||||
:param block: If True, block until it can be acquired. If False,
|
||||
do not block and raise an exception if cannot be acquired.
|
||||
|
||||
:returns: A token (can be None) to use when releasing the semaphore
|
||||
"""
|
||||
logger.debug("Acquiring %s", tag)
|
||||
if not self._semaphore.acquire(blocking):
|
||||
raise NoResourcesAvailable(f"Cannot acquire tag '{tag}'")
|
||||
|
||||
def release(self, tag, acquire_token):
|
||||
"""Release the semaphore
|
||||
|
||||
:param tag: A tag identifying what is releasing the semaphore
|
||||
:param acquire_token: The token returned from when the semaphore was
|
||||
acquired. Note that this is not really needed to directly use this
|
||||
class but is needed for API compatibility with the
|
||||
SlidingWindowSemaphore implementation.
|
||||
"""
|
||||
logger.debug(f"Releasing acquire {tag}/{acquire_token}")
|
||||
self._semaphore.release()
|
||||
|
||||
|
||||
class SlidingWindowSemaphore(TaskSemaphore):
|
||||
"""A semaphore used to coordinate sequential resource access.
|
||||
|
||||
This class is similar to the stdlib BoundedSemaphore:
|
||||
|
||||
* It's initialized with a count.
|
||||
* Each call to ``acquire()`` decrements the counter.
|
||||
* If the count is at zero, then ``acquire()`` will either block until the
|
||||
count increases, or if ``blocking=False``, then it will raise
|
||||
a NoResourcesAvailable exception indicating that it failed to acquire the
|
||||
semaphore.
|
||||
|
||||
The main difference is that this semaphore is used to limit
|
||||
access to a resource that requires sequential access. For example,
|
||||
if I want to access resource R that has 20 subresources R_0 - R_19,
|
||||
this semaphore can also enforce that you only have a max range of
|
||||
10 at any given point in time. You must also specify a tag name
|
||||
when you acquire the semaphore. The sliding window semantics apply
|
||||
on a per tag basis. The internal count will only be incremented
|
||||
when the minimum sequence number for a tag is released.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, count):
|
||||
self._count = count
|
||||
# Dict[tag, next_sequence_number].
|
||||
self._tag_sequences = defaultdict(int)
|
||||
self._lowest_sequence = {}
|
||||
self._lock = threading.Lock()
|
||||
self._condition = threading.Condition(self._lock)
|
||||
# Dict[tag, List[sequence_number]]
|
||||
self._pending_release = {}
|
||||
|
||||
def current_count(self):
|
||||
with self._lock:
|
||||
return self._count
|
||||
|
||||
def acquire(self, tag, blocking=True):
|
||||
logger.debug("Acquiring %s", tag)
|
||||
self._condition.acquire()
|
||||
try:
|
||||
if self._count == 0:
|
||||
if not blocking:
|
||||
raise NoResourcesAvailable(f"Cannot acquire tag '{tag}'")
|
||||
else:
|
||||
while self._count == 0:
|
||||
self._condition.wait()
|
||||
# self._count is no longer zero.
|
||||
# First, check if this is the first time we're seeing this tag.
|
||||
sequence_number = self._tag_sequences[tag]
|
||||
if sequence_number == 0:
|
||||
# First time seeing the tag, so record we're at 0.
|
||||
self._lowest_sequence[tag] = sequence_number
|
||||
self._tag_sequences[tag] += 1
|
||||
self._count -= 1
|
||||
return sequence_number
|
||||
finally:
|
||||
self._condition.release()
|
||||
|
||||
def release(self, tag, acquire_token):
|
||||
sequence_number = acquire_token
|
||||
logger.debug("Releasing acquire %s/%s", tag, sequence_number)
|
||||
self._condition.acquire()
|
||||
try:
|
||||
if tag not in self._tag_sequences:
|
||||
raise ValueError(f"Attempted to release unknown tag: {tag}")
|
||||
max_sequence = self._tag_sequences[tag]
|
||||
if self._lowest_sequence[tag] == sequence_number:
|
||||
# We can immediately process this request and free up
|
||||
# resources.
|
||||
self._lowest_sequence[tag] += 1
|
||||
self._count += 1
|
||||
self._condition.notify()
|
||||
queued = self._pending_release.get(tag, [])
|
||||
while queued:
|
||||
if self._lowest_sequence[tag] == queued[-1]:
|
||||
queued.pop()
|
||||
self._lowest_sequence[tag] += 1
|
||||
self._count += 1
|
||||
else:
|
||||
break
|
||||
elif self._lowest_sequence[tag] < sequence_number < max_sequence:
|
||||
# We can't do anything right now because we're still waiting
|
||||
# for the min sequence for the tag to be released. We have
|
||||
# to queue this for pending release.
|
||||
self._pending_release.setdefault(tag, []).append(
|
||||
sequence_number
|
||||
)
|
||||
self._pending_release[tag].sort(reverse=True)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Attempted to release unknown sequence number "
|
||||
f"{sequence_number} for tag: {tag}"
|
||||
)
|
||||
finally:
|
||||
self._condition.release()
|
||||
|
||||
|
||||
class ChunksizeAdjuster:
|
||||
def __init__(
|
||||
self,
|
||||
max_size=MAX_SINGLE_UPLOAD_SIZE,
|
||||
min_size=MIN_UPLOAD_CHUNKSIZE,
|
||||
max_parts=MAX_PARTS,
|
||||
):
|
||||
self.max_size = max_size
|
||||
self.min_size = min_size
|
||||
self.max_parts = max_parts
|
||||
|
||||
def adjust_chunksize(self, current_chunksize, file_size=None):
|
||||
"""Get a chunksize close to current that fits within all S3 limits.
|
||||
|
||||
:type current_chunksize: int
|
||||
:param current_chunksize: The currently configured chunksize.
|
||||
|
||||
:type file_size: int or None
|
||||
:param file_size: The size of the file to upload. This might be None
|
||||
if the object being transferred has an unknown size.
|
||||
|
||||
:returns: A valid chunksize that fits within configured limits.
|
||||
"""
|
||||
chunksize = current_chunksize
|
||||
if file_size is not None:
|
||||
chunksize = self._adjust_for_max_parts(chunksize, file_size)
|
||||
return self._adjust_for_chunksize_limits(chunksize)
|
||||
|
||||
def _adjust_for_chunksize_limits(self, current_chunksize):
|
||||
if current_chunksize > self.max_size:
|
||||
logger.debug(
|
||||
"Chunksize greater than maximum chunksize. "
|
||||
f"Setting to {self.max_size} from {current_chunksize}."
|
||||
)
|
||||
return self.max_size
|
||||
elif current_chunksize < self.min_size:
|
||||
logger.debug(
|
||||
"Chunksize less than minimum chunksize. "
|
||||
f"Setting to {self.min_size} from {current_chunksize}."
|
||||
)
|
||||
return self.min_size
|
||||
else:
|
||||
return current_chunksize
|
||||
|
||||
def _adjust_for_max_parts(self, current_chunksize, file_size):
|
||||
chunksize = current_chunksize
|
||||
num_parts = int(math.ceil(file_size / float(chunksize)))
|
||||
|
||||
while num_parts > self.max_parts:
|
||||
chunksize *= 2
|
||||
num_parts = int(math.ceil(file_size / float(chunksize)))
|
||||
|
||||
if chunksize != current_chunksize:
|
||||
logger.debug(
|
||||
"Chunksize would result in the number of parts exceeding the "
|
||||
f"maximum. Setting to {chunksize} from {current_chunksize}."
|
||||
)
|
||||
|
||||
return chunksize
|
||||
|
||||
|
||||
def add_s3express_defaults(bucket, extra_args):
|
||||
"""
|
||||
This function has been deprecated, but is kept for backwards compatibility.
|
||||
This function is subject to removal in a future release.
|
||||
"""
|
||||
if is_s3express_bucket(bucket) and "ChecksumAlgorithm" not in extra_args:
|
||||
# Default Transfer Operations to S3Express to use CRC32
|
||||
extra_args["ChecksumAlgorithm"] = "crc32"
|
||||
|
||||
|
||||
def set_default_checksum_algorithm(extra_args):
|
||||
"""Set the default algorithm to CRC32 if not specified by the user."""
|
||||
if any(checksum in extra_args for checksum in FULL_OBJECT_CHECKSUM_ARGS):
|
||||
return
|
||||
extra_args.setdefault("ChecksumAlgorithm", DEFAULT_CHECKSUM_ALGORITHM)
|
Reference in New Issue
Block a user