From abab571136e68018dbb7d131a93a21236291e0be Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Fri, 13 Sep 2024 15:51:16 +1000 Subject: [PATCH 1/9] initial commit --- classes/api.php | 243 +++++++++++++++++++++++++++++ lang/en/local_azureblobstorage.php | 5 + version.php | 23 +++ 3 files changed, 271 insertions(+) create mode 100644 classes/api.php create mode 100644 lang/en/local_azureblobstorage.php create mode 100644 version.php diff --git a/classes/api.php b/classes/api.php new file mode 100644 index 0000000..7a6c966 --- /dev/null +++ b/classes/api.php @@ -0,0 +1,243 @@ +account = $account; + $this->container = $container; + $this->sastoken = $sastoken; + $this->client = new Client(); + } + + /** + * URL for blob + * @param string $blobkey key of blob + * @return string + */ + private function build_blob_url(string $blobkey): string { + return 'https://' . $this->account . '.blob.core.windows.net/' . $this->container . '/' . $blobkey . '?' . $this->sastoken; + } + + /** + * Blob block URL. Blocks are 'pieces' of a blob. + * @param string $blobkey key of blob + * @param string $blockid id of block. TODO note format or docs link + * @return string + */ + private function build_blob_block_url(string $blobkey, string $blockid): string { + return $this->build_blob_url($blobkey) . '&comp=block&blockid=' . $blockid; + } + + /** + * Builds block list url. Block list of a list of blocks. + * @param string $blobkey key of blob + * @return string + */ + private function build_blocklist_url(string $blobkey): string { + return $this->build_blob_url($blobkey) . '&comp=blocklist'; + } + + /** + * Build blob properties URL. + * @param string $blobkey key of blob + * @return string + */ + private function build_blob_properties_url(string $blobkey): string { + return $this->build_blob_url($blobkey) . '&comp=properties'; + } + + /** + * Get blob. + * @param string $key blob key + * @return PromiseInterface Promise that resolves a ResponseInterface value where the body is a stream of the blob contents. + */ + public function get_blob(string $key): PromiseInterface { + return $this->client->getAsync($this->build_blob_url($key)); + } + + /** + * Get blob properties. + * @param string $key blob key + * @return PromiseInterface Promise that resolves a ResponseInterface value where the properties are in the response headers. + */ + public function get_blob_properties(string $key): PromiseInterface { + return $this->client->headAsync($this->build_blob_url($key)); + } + + /** + * Put (create/update) blob. + * Note depending on the size of the stream, it may be uploaded via single or multipart upload. + * + * @param string $key blob key + * @param StreamInterface $contentstream the blob contents as a stream + * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. + * @return PromiseInterface Promise that resolves a ResponseInterface value. + */ + public function put_blob(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + if ($this->should_stream_upload_multipart($contentstream)) { + return $this->put_blob_multipart($key, $contentstream, $md5); + } else { + return $this->put_blob_single($key, $contentstream, $md5); + } + } + + /** + * Puts a blob using single upload. Suitable for small blobs. + * + * @param string $key blob key + * @param StreamInterface $contentstream the blob contents as a stream + * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. + * @return PromiseInterface Promise that resolves a ResponseInterface value. + */ + public function put_blob_single(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + return $this->client->putAsync( + $this->build_blob_url($key), + [ + 'headers' => [ + 'x-ms-blob-type' => 'BlockBlob', + 'content-md5' => base64_encode($md5), + ], + 'body' => $contentstream, + ] + ); + } + + /** + * Puts a blob using multipart/block upload. Suitable for large blobs. + * This is done by splitting the blob into multiple blocks, and then combining them using a BlockList on the Azure side + * before finally setting the final md5 by setting the blob properties. + * + * @param string $key blob key + * @param StreamInterface $contentstream the blob contents as a stream + * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. + * @return PromiseInterface Promise that resolves when complete. + */ + public function put_blob_multipart(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + // We make multiple calls to the Azure API to do multipart uploads, so wrap the entire thing + // into a single promise. + $entirepromise = new Promise(function() use (&$entirepromise, $key, $contentstream, $md5) { + // Split into blocks. + $counter = 0; + $blockids = []; + $promises = []; + + while (true) { + $content = $contentstream->read(self::MULTIPART_BLOCK_SIZE); + + // Each block has its own md5 specific to itself. + $blockmd5 = base64_encode(hex2bin(md5($content))); + + // Finished reading, nothing more to upload. + if (empty($content)) { + break; + } + + // The block ID must be the same length regardles of the counter value. + // So pad them with zeros. + $blockid = base64_encode( + str_pad($counter++, 6, '0', STR_PAD_LEFT) + ); + + $request = new Request('PUT', $this->build_blob_block_url($key, $blockid), ['content-md5' => $blockmd5], $content); + $promises[] = $this->client->sendAsync($request); + $blockids[] = $blockid; + }; + + if (count($blockids) > self::MAX_NUMBER_BLOCKS) { + // TODO different ex, handle better + throw new coding_exception("Max number of blocks reached, block size too small ?"); + } + + // Will throw exception if any fail - if any fail we want to abort early. + Utils::unwrap($promises); + + // Commit the blocks together into a single blob. + $body = $this->make_block_list_xml($blockids); + $bodymd5 = base64_encode(hex2bin(md5($body))); + $request = new Request('PUT', $this->build_blocklist_url($key), ['Content-Type' => 'application/xml', 'content-md5' => $bodymd5], $body); + $this->client->send($request); + + // Now it is combined, set the md5 on the completed blob. + $request = new Request('PUT', $this->build_blob_properties_url($key), ['x-ms-blob-content-md5' => base64_encode($md5)]); + $this->client->send($request); + + $entirepromise->resolve('fulfilled'); + }); + + return $entirepromise; + } + + /** + * If the stream should upload using multipart upload. + * @param StreamInterface $stream + * @return bool + */ + private function should_stream_upload_multipart(StreamInterface $stream): bool { + return $stream->getSize() > self::MULTIPART_THRESHOLD; + } + + /** + * Generates a blocklist XML. + * @see https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list#request-body + * @param array $blockidlist list of block ids. + * @return string blocklist xml string. + */ + private function make_block_list_xml(array $blockidlist): string { + // We use 'Latest' since we don't care about committing different + // blob block versions - we always want the latest. + $string = "\n"; + foreach($blockidlist as $blockid) { + $string .= "\n" . $blockid . ''; + } + $string .= "\n"; + return $string; + } +} \ No newline at end of file diff --git a/lang/en/local_azureblobstorage.php b/lang/en/local_azureblobstorage.php new file mode 100644 index 0000000..7cc2100 --- /dev/null +++ b/lang/en/local_azureblobstorage.php @@ -0,0 +1,5 @@ +. + +defined('MOODLE_INTERNAL') || die(); + +$plugin->version = 2018072500; // The current plugin version (Date: YYYYMMDDXX). +$plugin->release = 2017102000; // Same as version. +$plugin->requires = 2013111811; +$plugin->component = "local_azureblobstorage"; +$plugin->maturity = MATURITY_STABLE; From 48b5531e4ed2aa01ec2704180b75c98db0ffbd2f Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 14 Oct 2024 10:47:08 +1000 Subject: [PATCH 2/9] feat: initial sdk --- README.md | 4 - classes/api.php | 133 ++++--- classes/privacy/provider.php | 40 +++ classes/stream_wrapper.php | 539 +++++++++++++++++++++++++++++ lang/en/local_azureblobstorage.php | 26 +- version.php | 17 +- 6 files changed, 710 insertions(+), 49 deletions(-) create mode 100644 classes/privacy/provider.php create mode 100644 classes/stream_wrapper.php diff --git a/README.md b/README.md index f189dd4..ec66707 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -> [!CAUTION] -> This plugin is under development and is currently not ready for general use. - # Azure Blob Storage SDK - Moodle Plugin A moodle plugin with functions to interact with the Microsoft Azure Blob Storage service. @@ -52,4 +49,3 @@ If you would like commercial support or would like to sponsor additional improve to this plugin please contact us: https://www.catalyst-au.net/contact-us - diff --git a/classes/api.php b/classes/api.php index 7a6c966..ca63952 100644 --- a/classes/api.php +++ b/classes/api.php @@ -1,35 +1,45 @@ . namespace local_azureblobstorage; -use core\exception\coding_exception; use GuzzleHttp\Client; use GuzzleHttp\Promise\Promise; use GuzzleHttp\Promise\PromiseInterface; use GuzzleHttp\Promise\Utils; use GuzzleHttp\Psr7\Request; use Psr\Http\Message\StreamInterface; +use coding_exception; +/** + * Azure blob storage API. + * + * This class is intended to generically implement basic blob storage operations (get,put,delete,etc...) + * which can then be referenced in other plugins. + * + * @package local_azureblobstorage + * @author Matthew Hilton + * @copyright 2024 Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ class api { - /** - * @var string Storage account name - */ - private string $account; - - /** - * @var string Storage account container name - */ - private string $container; - - /** - * @var Shared Access Token (SAS) for authentication - */ - private string $sastoken; - /** * @var Client Guzzle HTTP client for making requests */ - private Client $client; + private readonly Client $client; /** * @var int Threshold before blob uploads using multipart upload. @@ -38,7 +48,7 @@ class api { /** * @var int Number of bytes per multipart block. - * + * * As of 2019-12-12 api version the max size is 4000MB. * @see https://learn.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs#about-block-blobs */ @@ -50,10 +60,31 @@ class api { */ const MAX_NUMBER_BLOCKS = 50000; - public function __construct(string $account, string $container, string $sastoken) { - $this->account = $account; - $this->container = $container; - $this->sastoken = $sastoken; + /** + * @var int Maximum block size. This is set by azure + * @see https://learn.microsoft.com/en-us/azure/storage/blobs/scalability-targets + */ + const MAX_BLOCK_SIZE = 50000 * 4000 * 1024; // 50,000 x 4000 MB blocks, approx 190 TB + + /** + * @var string the default content type if none is given. + */ + const DEFAULT_CONTENT_TYPE = 'application/octet-stream'; + + /** + * Create a API + * @param string $account Azure storage account name + * @param string $container Azure storage container name (inside the given storage account). + * @param string $sastoken SAS (Shared access secret) token for authentication. + */ + public function __construct( + /** @var string Azure storage account name */ + readonly public string $account, + /** @var string Azure storage container name */ + readonly public string $container, + /** @var string SAS token for authentication */ + readonly public string $sastoken + ) { $this->client = new Client(); } @@ -69,7 +100,8 @@ private function build_blob_url(string $blobkey): string { /** * Blob block URL. Blocks are 'pieces' of a blob. * @param string $blobkey key of blob - * @param string $blockid id of block. TODO note format or docs link + * @param string $blockid id of block. Note, for each blob, every blockid must have the exact same length and is base64 encoded. + * @see https://learn.microsoft.com/en-us/rest/api/storageservices/put-block * @return string */ private function build_blob_block_url(string $blobkey, string $blockid): string { @@ -84,7 +116,7 @@ private function build_blob_block_url(string $blobkey, string $blockid): string private function build_blocklist_url(string $blobkey): string { return $this->build_blob_url($blobkey) . '&comp=blocklist'; } - + /** * Build blob properties URL. * @param string $blobkey key of blob @@ -99,7 +131,7 @@ private function build_blob_properties_url(string $blobkey): string { * @param string $key blob key * @return PromiseInterface Promise that resolves a ResponseInterface value where the body is a stream of the blob contents. */ - public function get_blob(string $key): PromiseInterface { + public function get_blob_async(string $key): PromiseInterface { return $this->client->getAsync($this->build_blob_url($key)); } @@ -108,24 +140,35 @@ public function get_blob(string $key): PromiseInterface { * @param string $key blob key * @return PromiseInterface Promise that resolves a ResponseInterface value where the properties are in the response headers. */ - public function get_blob_properties(string $key): PromiseInterface { + public function get_blob_properties_async(string $key): PromiseInterface { return $this->client->headAsync($this->build_blob_url($key)); } + /** + * Deletes a given blob + * @param string $key blob key + * @return PromiseInterface Promise that resolves once the delete request succeeds. + */ + public function delete_blob_async(string $key): PromiseInterface { + return $this->client->deleteAsync($this->build_blob_url($key)); + } + /** * Put (create/update) blob. * Note depending on the size of the stream, it may be uploaded via single or multipart upload. - * + * * @param string $key blob key * @param StreamInterface $contentstream the blob contents as a stream * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. + * @param string $contenttype Content type to set for the file. * @return PromiseInterface Promise that resolves a ResponseInterface value. */ - public function put_blob(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + public function put_blob_async(string $key, StreamInterface $contentstream, string $md5, + string $contenttype = self::DEFAULT_CONTENT_TYPE): PromiseInterface { if ($this->should_stream_upload_multipart($contentstream)) { - return $this->put_blob_multipart($key, $contentstream, $md5); + return $this->put_blob_multipart_async($key, $contentstream, $md5, $contenttype); } else { - return $this->put_blob_single($key, $contentstream, $md5); + return $this->put_blob_single_async($key, $contentstream, $md5, $contenttype); } } @@ -135,14 +178,17 @@ public function put_blob(string $key, StreamInterface $contentstream, string $md * @param string $key blob key * @param StreamInterface $contentstream the blob contents as a stream * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. + * @param string $contenttype Content type to set for the file. * @return PromiseInterface Promise that resolves a ResponseInterface value. */ - public function put_blob_single(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + public function put_blob_single_async(string $key, StreamInterface $contentstream, string $md5, + string $contenttype = self::DEFAULT_CONTENT_TYPE): PromiseInterface { return $this->client->putAsync( $this->build_blob_url($key), [ 'headers' => [ 'x-ms-blob-type' => 'BlockBlob', + 'x-ms-blob-content-type' => $contenttype, 'content-md5' => base64_encode($md5), ], 'body' => $contentstream, @@ -158,12 +204,15 @@ public function put_blob_single(string $key, StreamInterface $contentstream, str * @param string $key blob key * @param StreamInterface $contentstream the blob contents as a stream * @param string $md5 binary md5 hash of file contents. You likely need to call hex2bin before passing in here. - * @return PromiseInterface Promise that resolves when complete. + * @param string $contenttype Content type to set for the file. + * @return PromiseInterface Promise that resolves when complete. Note the response is NOT available here, + * because this operation involves many separate requests. */ - public function put_blob_multipart(string $key, StreamInterface $contentstream, string $md5): PromiseInterface { + public function put_blob_multipart_async(string $key, StreamInterface $contentstream, string $md5, + string $contenttype = self::DEFAULT_CONTENT_TYPE): PromiseInterface { // We make multiple calls to the Azure API to do multipart uploads, so wrap the entire thing // into a single promise. - $entirepromise = new Promise(function() use (&$entirepromise, $key, $contentstream, $md5) { + $entirepromise = new Promise(function() use (&$entirepromise, $key, $contentstream, $md5, $contenttype) { // Split into blocks. $counter = 0; $blockids = []; @@ -192,7 +241,6 @@ public function put_blob_multipart(string $key, StreamInterface $contentstream, }; if (count($blockids) > self::MAX_NUMBER_BLOCKS) { - // TODO different ex, handle better throw new coding_exception("Max number of blocks reached, block size too small ?"); } @@ -202,13 +250,18 @@ public function put_blob_multipart(string $key, StreamInterface $contentstream, // Commit the blocks together into a single blob. $body = $this->make_block_list_xml($blockids); $bodymd5 = base64_encode(hex2bin(md5($body))); - $request = new Request('PUT', $this->build_blocklist_url($key), ['Content-Type' => 'application/xml', 'content-md5' => $bodymd5], $body); + $request = new Request('PUT', $this->build_blocklist_url($key), + ['Content-Type' => 'application/xml', 'content-md5' => $bodymd5], $body); $this->client->send($request); - // Now it is combined, set the md5 on the completed blob. - $request = new Request('PUT', $this->build_blob_properties_url($key), ['x-ms-blob-content-md5' => base64_encode($md5)]); + // Now it is combined, set the md5 and content type on the completed blob. + $request = new Request('PUT', $this->build_blob_properties_url($key), [ + 'x-ms-blob-content-md5' => base64_encode($md5), + 'x-ms-blob-content-type' => $contenttype, + ]); $this->client->send($request); + // Done, resolve the entire promise. $entirepromise->resolve('fulfilled'); }); @@ -234,10 +287,10 @@ private function make_block_list_xml(array $blockidlist): string { // We use 'Latest' since we don't care about committing different // blob block versions - we always want the latest. $string = "\n"; - foreach($blockidlist as $blockid) { + foreach ($blockidlist as $blockid) { $string .= "\n" . $blockid . ''; } $string .= "\n"; return $string; } -} \ No newline at end of file +} diff --git a/classes/privacy/provider.php b/classes/privacy/provider.php new file mode 100644 index 0000000..9c07fcb --- /dev/null +++ b/classes/privacy/provider.php @@ -0,0 +1,40 @@ +. + +namespace local_azureblobstorage\privacy; + +/** + * Privacy system + * + * @package local_azureblobstorage + * @author Matthew Hilton + * @copyright 2024 Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class provider implements \core_privacy\local\metadata\null_provider { + /** + * Get the language string identifier with the component's language + * file to explain why this plugin stores no data. + * + * This function is compatible with old php version. (Diff is the underscore '_' in the beginning) + * But the get_reason is still available because of the trait legacy_polyfill. + * + * @return string + */ + public static function get_reason(): string { + return 'privacy:no_data_reason'; + } +} diff --git a/classes/stream_wrapper.php b/classes/stream_wrapper.php new file mode 100644 index 0000000..305aba9 --- /dev/null +++ b/classes/stream_wrapper.php @@ -0,0 +1,539 @@ +. + +namespace local_azureblobstorage; + +use GuzzleHttp\Exception\ClientException; +use GuzzleHttp\Psr7\CachingStream; +use GuzzleHttp\Psr7\Utils; +use local_azureblobstorage\api; +use HashContext; +use Psr\Http\Message\StreamInterface; + +/** + * Azure Blob Storage stream wrapper to use "blob:///" files with PHP. + * + * Implementation references, + * https://github.com/aws/aws-sdk-php/blob/master/src/S3/StreamWrapper.php + * https://phpazure.codeplex.com/SourceControl/latest#trunk/library/Microsoft/WindowsAzure/Storage/Blob/Stream.php + * + * @package local_azureblobstorage + * @author Matthew Hilton + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class stream_wrapper { + + /** @var resource|null Stream context (this is set by PHP) */ + public $context; + + /** @var StreamInterface Underlying stream resource */ + private $body; + + /** @var int Size of the body that is opened */ + private $size; + + /** @var string Mode in which the stream was opened */ + private $mode; + + /** @var string The opened protocol (e.g. "blob") */ + private $protocol = 'blob'; + + /** @var HashContext Hash resource that is sent when flushing the file to Azure. */ + private $hash; + + /** @var bool records whether the file was readable when validating the stream_handle */ + private $readable = true; + + /** @var string The key extracted from the path from when the stream was opened */ + private $key = null; + + /** + * Register the blob://' stream wrapper + * + * @param api $client Client to use with the stream wrapper + * @param string $protocol Protocol to register as. + */ + public static function register(api $client, $protocol = 'blob') { + if (in_array($protocol, stream_get_wrappers())) { + stream_wrapper_unregister($protocol); + } + + stream_wrapper_register($protocol, get_called_class(), STREAM_IS_URL); + $default = stream_context_get_options(stream_context_get_default()); + $default[$protocol]['client'] = $client; + stream_context_set_default($default); + } + + /** + * Does not support casting, always returns false. + * @param mixed $castas + * @return bool + */ + public function stream_cast($castas): bool { + return false; + } + + /** + * Closes the stream + */ + public function stream_close() { + $this->body = null; + $this->hash = null; + } + + /** + * Opens the stream, depending on the mode. + * @param mixed $path filepath + * @param mixed $mode file mode constants, see fopen + * @param mixed $options Additional flags + * @param mixed $openedpath + * @return bool True if successful, else false + * @see https://www.php.net/manual/en/function.fopen.php + */ + public function stream_open($path, $mode, $options, &$openedpath): bool { + // Set the protocol. + $this->initProtocol($path); + $this->key = $this->get_key_from_path($path); + + // Trim 'b' and 't' off end. + // these are line ending flags used to handle Unix/Windows line endings. + // We don't care about these, so we just remove them. + $this->mode = rtrim($mode, 'bt'); + + // Check mode is valid for the given path. + if ($errors = $this->validate($path, $this->mode)) { + return $this->triggerError($errors); + } + + $this->hash = hash_init('md5'); + + // Call stream based on the mode. + return $this->boolCall(function() use ($path) { + switch ($this->mode) { + case 'r': + return $this->open_read_stream(); + case 'a': + return $this->open_append_stream(); + default: + return $this->open_write_stream(); + } + }); + } + + /** + * Has stream reached end of file ? + * @return bool + */ + public function stream_eof(): bool { + return $this->body->eof(); + } + + /** + * Flushes (closes) the stream. This is where files are uploaded to Azure. + * @return bool + */ + public function stream_flush() { + // Cannot write in readonly mode, exit. + if ($this->mode == 'r') { + return false; + } + + // Return to start of stream. + if ($this->body->isSeekable()) { + $this->body->seek(0); + } + + // Get the hash of the file, used as a checksum in Azure. + // Azure will reject it on the server side if the given MD5 + // does not match what they receive. + $md5 = hex2bin(hash_final($this->hash)); + + // Upload the blob. Under the hood this may be a multipart upload if the file is large enough. + $this->get_client()->put_blob_async($this->key, $this->body, $md5)->wait(); + return true; + } + + /** + * Reads the stream + * @param int $count Number of bytes to read + * @return string data returned from stream. + */ + public function stream_read($count) { + // If the file isn't readable, we need to return no content. Azure can emit XML here otherwise. + return $this->readable ? $this->body->read($count) : ''; + } + + /** + * Go to a position in the stream + * @param int $offset + * @param int $whence + * @return bool if successful. + */ + public function stream_seek($offset, $whence = SEEK_SET) { + // Cannot seek if underlying body is not seekable. + if (!$this->body->isSeekable()) { + return false; + } + + return $this->boolCall(function () use ($offset, $whence) { + $this->body->seek($offset, $whence); + return true; + }); + } + + /** + * Returns current position of stream. + * @return bool + */ + public function stream_tell() { + return $this->boolCall(function() { + return $this->body->tell(); + }); + } + + /** + * Write to the stream. + * @param string $data + * @return int Number of bytes successfully written. + */ + public function stream_write($data) { + // Update the md5 hash as we go along, + // it is used for verification when uploading to Azure. + hash_update($this->hash, $data); + return $this->body->write($data); + } + + /** + * Get stats about the stream + * @return array + */ + public function stream_stat() { + $stat = $this->getStatTemplate(); + $stat[7] = $stat['size'] = $this->get_size(); + $stat[2] = $stat['mode'] = $this->mode; + + return $stat; + } + + /** + * url_stat + * + * Provides information for is_dir, is_file, filesize, etc. Works on + * buckets, keys, and prefixes. + * @link http://www.php.net/manual/en/streamwrapper.url-stat.php + * + * @param string $path + * @param mixed $flags + * + * @return mixed + */ + public function url_stat($path, $flags) { + $stat = $this->getStatTemplate(); + + try { + $key = $this->get_key_from_path($path); + $res = $this->get_client()->get_blob_properties_async($key)->wait(); + + $contentlength = current($res->getHeader('Content-Length')); + $lastmodified = strtotime(current($res->getHeader('Last-Modified'))); + + $stat['size'] = $stat[7] = $contentlength; + $stat['mtime'] = $stat[9] = $lastmodified; + $stat['ctime'] = $stat[10] = $lastmodified; + + // Regular file with 0777 access - see "man 2 stat". + $stat['mode'] = $stat[2] = 0100777; + + return $stat; + + // ClientException is thrown on 4xx errors e.g. 404. + } catch (ClientException $ex) { + // The specified blob does not exist. + return false; + } + } + + /** + * Unlinks (deletes) a given file. + * @param string $path + * @return bool if successful + */ + public function unlink(string $path): bool { + return $this->boolcall(function() use ($path) { + $client = $this->get_client(); + $key = $this->get_key_from_path($path); + $client->delete_blob_async($key)->wait(); + return true; + }); + } + + /** + * Parse the protocol out of the given path. + * + * @param string $path + */ + private function initprotocol($path) { + $parts = explode('://', $path, 2); + $this->protocol = $parts[0] ?: 'blob'; + } + + /** + * Extracts the blob key from the given filepath (filepath is usually blob://key) + * @param string $path + * @return string|null + */ + private function get_key_from_path(string $path): ?string { + // Remove the protocol. + $parts = explode('://', $path); + return $parts[1] ?: null; + } + + /** + * Validates the provided stream arguments for fopen + * @param string $path + * @param string $mode + * @return array of error messages, or empty if ok. + */ + private function validate($path, $mode): array { + $errors = []; + + // Ensure the key is correctly set in the options. + // it might not have been parsed correctly. + if (!$this->key) { + $errors[] = 'Could not parse the filepath. You must specify a path in the ' + . 'form of blob://container/key'; + } + + // Ensure mode is valid, we don't support every mode. + if (!in_array($mode, ['r', 'w', 'a', 'x'])) { + $errors[] = "Mode not supported: {$mode}. " + . "Use one 'r', 'w', 'a', or 'x'."; + } + + $key = $this->get_key_from_path($path); + $blobexists = $this->blob_exists($key); + + // When using mode "x" validate if the file exists before attempting to read. + if ($mode == 'x' && $blobexists) { + $errors[] = "{$path} already exists on Azure Blob Storage"; + } + + // When using mode 'r' we should validate the file exists before opening a handle on it. + if ($mode == 'r' && !$blobexists) { + $errors[] = "{$path} does not exist on Azure Blob Storage"; + $this->readable = false; + } + + return $errors; + } + + /** + * Determines if a blob exists in azure. + * @param string $key + * @return bool true if exists, else false. + */ + private function blob_exists(string $key): bool { + try { + $this->get_client()->get_blob_properties_async($key)->wait(); + + // No exception, blob exists. + return true; + } catch (ClientException $e) { + // Exception was 404 indicating it connected, but the blob did not exist. + if ($e->getResponse()->getStatusCode() == 404) { + return false; + } + + // Else another error ocurred, re-throw. + throw $e; + } + } + + /** + * Get the stream context options available to the current stream + * @return array + */ + private function get_options(): array { + // Context is not set when doing things like stat. + if ($this->context === null) { + $options = []; + } else { + $options = stream_context_get_options($this->context); + $options = isset($options[$this->protocol]) + ? $options[$this->protocol] + : []; + } + + $default = stream_context_get_options(stream_context_get_default()); + $default = isset($default[$this->protocol]) + ? $default[$this->protocol] + : []; + $result = $options + $default; + + return $result; + } + + /** + * Get a specific stream context option + * + * @param string $name Name of the option to retrieve + * + * @return mixed|null + */ + private function get_option($name) { + $options = $this->get_options(); + return isset($options[$name]) ? $options[$name] : null; + } + + /** + * Gets the client. + * + * @return api + * @throws \RuntimeException if no client has been configured + */ + private function get_client() { + if (!$client = $this->get_option('client')) { + throw new \RuntimeException('No client in stream context'); + } + + return $client; + } + + /** + * Opens a readable stream. + * @return bool True if successful, else false. + */ + private function open_read_stream() { + $client = $this->get_client(); + + try { + $res = $client->get_blob_async($this->key)->wait(); + $this->body = $res->getBody(); + } catch (ClientException $e) { + // Could not open stream. + return false; + } + + // Wrap the body in a caching entity body if seeking is allowed. + if ($this->get_option('seekable') && !$this->body->isSeekable()) { + $this->body = new CachingStream($this->body); + } + + return true; + } + + /** + * Opens a stream for writing. + * @return bool True if successfull. + */ + private function open_write_stream() { + // A writeable stream is actually just a stream to a temp file. + // the actual Azure upload only takes place once the stream is flushed (i.e. closed). + $this->body = Utils::streamFor(fopen('php://temp', 'r+')); + return true; + } + + /** + * Opens a stream to append to a file. + * @return bool + */ + private function open_append_stream(): bool { + try { + // Get the body of the object and seek to the end of the stream. + $client = $this->get_client(); + $this->body = $client->get_blob_async($this->key)->wait()->getBody(); + $this->body->seek(0, SEEK_END); + return true; + + // Client exceptions are thrown on 4xx errors, e.g. 404. + } catch (ClientException $e) { + // The object does not exist, so use a simple write stream. + return $this->open_write_stream(); + } + } + + /** + * Gets a URL stat template with default values + * These are returned in both numeric and associative values + * @see https://www.php.net/manual/en/function.stat.php + * @return array + */ + private function getstattemplate() { + return [ + 0 => 0, 'dev' => 0, + 1 => 0, 'ino' => 0, + 2 => 0, 'mode' => 0, + 3 => 0, 'nlink' => 0, + 4 => 0, 'uid' => 0, + 5 => 0, 'gid' => 0, + 6 => -1, 'rdev' => -1, + 7 => 0, 'size' => 0, + 8 => 0, 'atime' => 0, + 9 => 0, 'mtime' => 0, + 10 => 0, 'ctime' => 0, + 11 => -1, 'blksize' => -1, + 12 => -1, 'blocks' => -1, + ]; + } + + /** + * Invokes a callable and triggers an error if an exception occurs while + * calling the function. + * @param callable $fn + * @param int $flags + * @return bool + */ + private function boolcall(callable $fn, $flags = null): bool { + try { + return $fn(); + } catch (\Exception $e) { + return $this->triggerError($e->getMessage(), $flags); + } + } + + /** + * Trigger one or more errors + * + * @param string|array $errors Errors to trigger + * @param mixed $flags If set to STREAM_URL_STAT_QUIET, then no + * error or exception occurs + * @return bool Returns false + * @throws \RuntimeException if throw_errors is true + */ + private function triggererror($errors, $flags = null): bool { + // This is triggered with things like file_exists(). + if ($flags & STREAM_URL_STAT_QUIET) { + return $flags & STREAM_URL_STAT_LINK + // This is triggered for things like is_link(). + ? $this->getStatTemplate() + : false; + } + + // This is triggered when doing things like lstat() or stat(). + trigger_error(implode("\n", (array) $errors), E_USER_WARNING); + return false; + } + + /** + * Returns the size of the opened object body. + * @return int|null + */ + private function get_size(): ?int { + $size = $this->body->getSize(); + return $size !== null ? $size : $this->size; + } +} diff --git a/lang/en/local_azureblobstorage.php b/lang/en/local_azureblobstorage.php index 7cc2100..eb5443b 100644 --- a/lang/en/local_azureblobstorage.php +++ b/lang/en/local_azureblobstorage.php @@ -1,5 +1,29 @@ . + +/** + * Azure blob storage lang strings + * + * @package local_azureblobstorage + * @author Matthew Hilton + * @copyright 2024 Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ defined('MOODLE_INTERNAL') || die(); -$string['pluginname'] = 'Azure Blob Storage interface for PHP'; \ No newline at end of file +$string['pluginname'] = 'Azure Blob Storage interface for PHP'; +$string['privacy:no_data_reason'] = 'No user data is stored'; diff --git a/version.php b/version.php index d0cc1cf..870246e 100644 --- a/version.php +++ b/version.php @@ -14,10 +14,19 @@ // You should have received a copy of the GNU General Public License // along with Moodle. If not, see . +/** + * Azure blob storage API + * + * @package local_azureblobstorage + * @author Matthew Hilton + * @copyright 2024 Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + defined('MOODLE_INTERNAL') || die(); -$plugin->version = 2018072500; // The current plugin version (Date: YYYYMMDDXX). -$plugin->release = 2017102000; // Same as version. -$plugin->requires = 2013111811; +$plugin->version = 2024101400; // The current plugin version (Date: YYYYMMDDXX). +$plugin->release = 2024101400; // Same as version. +$plugin->requires = 2024042200; // 4.4.0, PHP 8.1.0+ $plugin->component = "local_azureblobstorage"; -$plugin->maturity = MATURITY_STABLE; +$plugin->maturity = MATURITY_ALPHA; From 4350d8f6f6681e85429d8ef9ba21804993b17362 Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Tue, 22 Oct 2024 16:01:21 +1000 Subject: [PATCH 3/9] feat: redact SAS token from error messages --- classes/stream_wrapper.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/classes/stream_wrapper.php b/classes/stream_wrapper.php index 305aba9..bd1b830 100644 --- a/classes/stream_wrapper.php +++ b/classes/stream_wrapper.php @@ -523,8 +523,13 @@ private function triggererror($errors, $flags = null): bool { : false; } + // Redact the SAS token from the error to avoid accidental leakage. + $errormsg = implode("\n", (array) $errors); + $sastoken = $this->get_client()->sastoken; + $errormsg = str_replace($sastoken, 'SAS_TOKEN_REDACTED', $errormsg); + // This is triggered when doing things like lstat() or stat(). - trigger_error(implode("\n", (array) $errors), E_USER_WARNING); + trigger_error($errormsg, E_USER_WARNING); return false; } From f8a16404e3627ff1dc1063e5a716abc832571395 Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 28 Oct 2024 14:14:13 +1000 Subject: [PATCH 4/9] ci: add ci --- .github/workflows/ci.yml | 8 ++++++++ version.php | 1 + 2 files changed, 9 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..75a7a00 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,8 @@ +# .github/workflows/ci.yml +name: ci + +on: [push, pull_request] + +jobs: + ci: + uses: catalyst/catalyst-moodle-workflows/.github/workflows/ci.yml@main \ No newline at end of file diff --git a/version.php b/version.php index 870246e..f386c56 100644 --- a/version.php +++ b/version.php @@ -30,3 +30,4 @@ $plugin->requires = 2024042200; // 4.4.0, PHP 8.1.0+ $plugin->component = "local_azureblobstorage"; $plugin->maturity = MATURITY_ALPHA; +$plugin->supported = [404, 405]; From 0385564fd668f7d770f87591a677f56ec07e4484 Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 28 Oct 2024 14:33:06 +1000 Subject: [PATCH 5/9] ci: phpcs fixup --- classes/api.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/classes/api.php b/classes/api.php index ca63952..62751e5 100644 --- a/classes/api.php +++ b/classes/api.php @@ -79,11 +79,11 @@ class api { */ public function __construct( /** @var string Azure storage account name */ - readonly public string $account, + public readonly string $account, /** @var string Azure storage container name */ - readonly public string $container, + public readonly string $container, /** @var string SAS token for authentication */ - readonly public string $sastoken + public readonly string $sastoken ) { $this->client = new Client(); } From b4905d4358d72ae404e4c4986e93f23a435dd71c Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 28 Oct 2024 14:44:25 +1000 Subject: [PATCH 6/9] ci: disable phpunit --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75a7a00..ecad698 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,4 +5,6 @@ on: [push, pull_request] jobs: ci: - uses: catalyst/catalyst-moodle-workflows/.github/workflows/ci.yml@main \ No newline at end of file + uses: catalyst/catalyst-moodle-workflows/.github/workflows/ci.yml@main + with: + disable_phpunit: true # There are no phpunit tests, and this breaks the Moodle CI if phpunit runs and there are no tests. \ No newline at end of file From dc631e65569dadde627e89a8b8619665b2b54a3c Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Mon, 28 Oct 2024 16:06:39 +1000 Subject: [PATCH 7/9] bugfix: stream reading blobs --- classes/api.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/classes/api.php b/classes/api.php index 62751e5..7ae1d42 100644 --- a/classes/api.php +++ b/classes/api.php @@ -132,7 +132,8 @@ private function build_blob_properties_url(string $blobkey): string { * @return PromiseInterface Promise that resolves a ResponseInterface value where the body is a stream of the blob contents. */ public function get_blob_async(string $key): PromiseInterface { - return $this->client->getAsync($this->build_blob_url($key)); + // Enable streaming response, useful for large files e.g. videos. + return $this->client->getAsync($this->build_blob_url($key), ['stream' => true]); } /** From bbe04be0ace86be250f7cd5b5a102755968d2817 Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Thu, 31 Oct 2024 14:14:37 +1000 Subject: [PATCH 8/9] bugfix: fix sas token redaction --- classes/api.php | 48 +++++++++++++++++++++++++++----------- classes/stream_wrapper.php | 6 ++--- version.php | 4 ++-- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/classes/api.php b/classes/api.php index 7ae1d42..0540336 100644 --- a/classes/api.php +++ b/classes/api.php @@ -23,6 +23,7 @@ use GuzzleHttp\Psr7\Request; use Psr\Http\Message\StreamInterface; use coding_exception; +use GuzzleHttp\Exception\RequestException; /** * Azure blob storage API. @@ -39,7 +40,7 @@ class api { /** * @var Client Guzzle HTTP client for making requests */ - private readonly Client $client; + private Client $client; /** * @var int Threshold before blob uploads using multipart upload. @@ -76,14 +77,17 @@ class api { * @param string $account Azure storage account name * @param string $container Azure storage container name (inside the given storage account). * @param string $sastoken SAS (Shared access secret) token for authentication. + * @param bool $redactsastoken If should react SAS token from error messages to avoid accidental leakage. */ public function __construct( /** @var string Azure storage account name */ - public readonly string $account, + public string $account, /** @var string Azure storage container name */ - public readonly string $container, + public string $container, /** @var string SAS token for authentication */ - public readonly string $sastoken + public string $sastoken, + /** @var bool If should redact SAS token from error messages to avoid accidental leakage */ + public bool $redactsastoken = true ) { $this->client = new Client(); } @@ -133,7 +137,8 @@ private function build_blob_properties_url(string $blobkey): string { */ public function get_blob_async(string $key): PromiseInterface { // Enable streaming response, useful for large files e.g. videos. - return $this->client->getAsync($this->build_blob_url($key), ['stream' => true]); + return $this->client->getAsync($this->build_blob_url($key), ['stream' => true]) + ->then(null, $this->clean_exception_sas_if_needed()); } /** @@ -142,7 +147,7 @@ public function get_blob_async(string $key): PromiseInterface { * @return PromiseInterface Promise that resolves a ResponseInterface value where the properties are in the response headers. */ public function get_blob_properties_async(string $key): PromiseInterface { - return $this->client->headAsync($this->build_blob_url($key)); + return $this->client->headAsync($this->build_blob_url($key))->then(null, $this->clean_exception_sas_if_needed()); } /** @@ -151,7 +156,7 @@ public function get_blob_properties_async(string $key): PromiseInterface { * @return PromiseInterface Promise that resolves once the delete request succeeds. */ public function delete_blob_async(string $key): PromiseInterface { - return $this->client->deleteAsync($this->build_blob_url($key)); + return $this->client->deleteAsync($this->build_blob_url($key))->then(null, $this->clean_exception_sas_if_needed()); } /** @@ -194,7 +199,7 @@ public function put_blob_single_async(string $key, StreamInterface $contentstrea ], 'body' => $contentstream, ] - ); + )->then(null, $this->clean_exception_sas_if_needed()); } /** @@ -222,14 +227,14 @@ public function put_blob_multipart_async(string $key, StreamInterface $contentst while (true) { $content = $contentstream->read(self::MULTIPART_BLOCK_SIZE); - // Each block has its own md5 specific to itself. - $blockmd5 = base64_encode(hex2bin(md5($content))); - // Finished reading, nothing more to upload. if (empty($content)) { break; } + // Each block has its own md5 specific to itself. + $blockmd5 = base64_encode(hex2bin(md5($content))); + // The block ID must be the same length regardles of the counter value. // So pad them with zeros. $blockid = base64_encode( @@ -237,7 +242,7 @@ public function put_blob_multipart_async(string $key, StreamInterface $contentst ); $request = new Request('PUT', $this->build_blob_block_url($key, $blockid), ['content-md5' => $blockmd5], $content); - $promises[] = $this->client->sendAsync($request); + $promises[] = $this->client->sendAsync($request)->then(null, $this->clean_exception_sas_if_needed()); $blockids[] = $blockid; }; @@ -253,14 +258,14 @@ public function put_blob_multipart_async(string $key, StreamInterface $contentst $bodymd5 = base64_encode(hex2bin(md5($body))); $request = new Request('PUT', $this->build_blocklist_url($key), ['Content-Type' => 'application/xml', 'content-md5' => $bodymd5], $body); - $this->client->send($request); + $this->client->sendAsync($request)->then(null, $this->clean_exception_sas_if_needed())->wait(); // Now it is combined, set the md5 and content type on the completed blob. $request = new Request('PUT', $this->build_blob_properties_url($key), [ 'x-ms-blob-content-md5' => base64_encode($md5), 'x-ms-blob-content-type' => $contenttype, ]); - $this->client->send($request); + $this->client->sendAsync($request)->then(null, $this->clean_exception_sas_if_needed())->wait(); // Done, resolve the entire promise. $entirepromise->resolve('fulfilled'); @@ -294,4 +299,19 @@ private function make_block_list_xml(array $blockidlist): string { $string .= "\n"; return $string; } + + /** + * Returns a request exception handling function that redacts the SAS token from error messages if needed. + * @return callable + */ + private function clean_exception_sas_if_needed(): callable { + return function(RequestException $ex) { + if ($this->redactsastoken) { + $newmsg = str_replace($this->sastoken, '[SAS TOKEN REDACTED]', $ex->getMessage()); + $exceptiontype = get_class($ex); + throw new $exceptiontype($newmsg, $ex->getRequest(), $ex->getResponse(), $ex, $ex->getHandlerContext()); + } + throw $ex; + }; + } } diff --git a/classes/stream_wrapper.php b/classes/stream_wrapper.php index bd1b830..b565713 100644 --- a/classes/stream_wrapper.php +++ b/classes/stream_wrapper.php @@ -197,12 +197,10 @@ public function stream_seek($offset, $whence = SEEK_SET) { /** * Returns current position of stream. - * @return bool + * @return int */ public function stream_tell() { - return $this->boolCall(function() { - return $this->body->tell(); - }); + return $this->body->tell(); } /** diff --git a/version.php b/version.php index f386c56..0028dda 100644 --- a/version.php +++ b/version.php @@ -27,7 +27,7 @@ $plugin->version = 2024101400; // The current plugin version (Date: YYYYMMDDXX). $plugin->release = 2024101400; // Same as version. -$plugin->requires = 2024042200; // 4.4.0, PHP 8.1.0+ +$plugin->requires = 2023042400; // 4.2.0, PHP 8.0.0+ $plugin->component = "local_azureblobstorage"; $plugin->maturity = MATURITY_ALPHA; -$plugin->supported = [404, 405]; +$plugin->supported = [402, 405]; From 0ea03eff4f38bea256c67bc0d18dcd6f3e3b663a Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Thu, 31 Oct 2024 14:18:07 +1000 Subject: [PATCH 9/9] docs: fixup readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ec66707..c37e901 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This is mainly used as a dependency when using Azure storage with tool_objectfs, | Branch | Version support | PHP Version | | ---------------- | --------------- | ------------ | -| MOODLE_44_STABLE | 4.4 + | 8.1.0+ | +| MOODLE_402_STABLE | 4.2 + | 8.0.0+ | ## Installation