From ad847c7f6266a200e46b5167de892a4665948e57 Mon Sep 17 00:00:00 2001 From: Ondra Urban Date: Thu, 15 Aug 2019 14:55:08 +0200 Subject: [PATCH] Improve docs, bump package versions --- CHANGELOG.md | 4 ++++ docs/api/RequestQueue.md | 3 +++ docs/api/puppeteer.md | 10 ++++++---- docs/api/utils.md | 11 ++++++++++- package.json | 8 ++++---- src/enqueue_links/click_elements.js | 10 ++++++---- src/enqueue_links/enqueue_links.js | 13 +++++++++++++ src/request_queue.js | 3 +++ 8 files changed, 49 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 149d71809c00..d505c3ac806f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +0.15.5 / 2019-08-15 +==================== +- This release just updates some dependencies (not Puppeteer). + 0.15.4 / 2019-08-02 ==================== - **DEPRECATED**: `dataset.delete()`, `keyValueStore.delete()` and `requestQueue.delete()` methods have been diff --git a/docs/api/RequestQueue.md b/docs/api/RequestQueue.md index 8a42e98cccb5..b9f71ae8c996 100644 --- a/docs/api/RequestQueue.md +++ b/docs/api/RequestQueue.md @@ -84,6 +84,9 @@ Adds a request to the queue. If a request with the same `uniqueKey` property is already present in the queue, it will not be updated. You can find out whether this happened from the resulting [`QueueOperationInfo`](../typedefs/queueoperationinfo) object. +To add multiple requests to the queue by extracting links from a webpage, see the [`Apify.utils.enqueueLinks()`](utils#utils.enqueueLinks) helper +function. + diff --git a/docs/api/puppeteer.md b/docs/api/puppeteer.md index cc7410ac3231..668b15278215 100644 --- a/docs/api/puppeteer.md +++ b/docs/api/puppeteer.md @@ -135,10 +135,12 @@ objects. a combination of url, method and payload which enables crawling of websites that navigate using form submits (POST requests).

Example:

-
  function transformRequestFunction(request) {
-      request.userData.foo = 'bar';
-      request.useExtendedUniqueKey = true;
-      return request;
+
  {
+      transformRequestFunction: (request) => {
+          request.userData.foo = 'bar';
+          request.useExtendedUniqueKey = true;
+          return request;
+      }
   }
diff --git a/docs/api/utils.md b/docs/api/utils.md index 0847a43fad9a..09ef351e44b5 100644 --- a/docs/api/utils.md +++ b/docs/api/utils.md @@ -118,12 +118,21 @@ objects. -
[options.waitForPageIdleSecs]number1[options.transformRequestFunction]function

Just before a new Request is constructed and enqueued to the RequestQueue, this function can be used +

Signature: (Request): Request

+

Just before a new Request is constructed and enqueued to the RequestQueue, this function can be used to remove it or modify its contents such as userData, payload or, most importantly uniqueKey. This is useful when you need to enqueue multiple Requests to the queue that share the same URL, but differ in methods or payloads, or to dynamically update or create userData.

For example: by adding keepUrlFragment: true to the request object, URL fragments will not be removed when uniqueKey is computed.

+

Example:

+
  {
+      transformRequestFunction: (request) => {
+          request.userData.foo = 'bar';
+          request.keepUrlFragment = true;
+          return request;
+      }
+  }
diff --git a/package.json b/package.json index 0683fe569e35..d4681a2f6656 100644 --- a/package.json +++ b/package.json @@ -49,11 +49,11 @@ "lint:fix": "./node_modules/.bin/eslint ./src ./test --ext .js,.jsx --fix" }, "dependencies": { - "@apify/http-request": "^1.0.5", + "@apify/http-request": "^1.0.6", "@apify/ps-tree": "^1.1.3", - "apify-client": "^0.5.24", - "apify-shared": "^0.1.55", - "cheerio": "^1.0.0-rc.2", + "apify-client": "^0.5.25", + "apify-shared": "^0.1.56", + "cheerio": "^1.0.0-rc.3", "content-type": "^1.0.4", "express": "^4.17.1", "fs-extra": "^8.1.0", diff --git a/src/enqueue_links/click_elements.js b/src/enqueue_links/click_elements.js index 01cc78f2914c..3451c1f7e43c 100644 --- a/src/enqueue_links/click_elements.js +++ b/src/enqueue_links/click_elements.js @@ -87,10 +87,12 @@ const STARTING_Z_INDEX = 2147400000; * * **Example:** * ```javascript - * function transformRequestFunction(request) { - * request.userData.foo = 'bar'; - * request.useExtendedUniqueKey = true; - * return request; + * { + * transformRequestFunction: (request) => { + * request.userData.foo = 'bar'; + * request.useExtendedUniqueKey = true; + * return request; + * } * } * ``` * @param {number} [options.waitForPageIdleSecs=1] diff --git a/src/enqueue_links/enqueue_links.js b/src/enqueue_links/enqueue_links.js index a5f457642417..fec56dc3d319 100644 --- a/src/enqueue_links/enqueue_links.js +++ b/src/enqueue_links/enqueue_links.js @@ -63,6 +63,8 @@ import { constructPseudoUrlInstances, createRequests, addRequestsToQueueInBatche * If `pseudoUrls` is an empty array, `null` or `undefined`, then the function * enqueues all links found on the page. * @param {Function} [options.transformRequestFunction] + * **Signature:** ({@link Request}): {@link Request} + * * Just before a new {@link Request} is constructed and enqueued to the {@link RequestQueue}, this function can be used * to remove it or modify its contents such as `userData`, `payload` or, most importantly `uniqueKey`. This is useful * when you need to enqueue multiple `Requests` to the queue that share the same URL, but differ in methods or payloads, @@ -70,6 +72,17 @@ import { constructPseudoUrlInstances, createRequests, addRequestsToQueueInBatche * * For example: by adding `keepUrlFragment: true` to the `request` object, URL fragments will not be removed * when `uniqueKey` is computed. + * + * **Example:** + * ```javascript + * { + * transformRequestFunction: (request) => { + * request.userData.foo = 'bar'; + * request.keepUrlFragment = true; + * return request; + * } + * } + * ``` * @return {Promise} * Promise that resolves to an array of {@link QueueOperationInfo} objects. * @memberOf utils diff --git a/src/request_queue.js b/src/request_queue.js index 66ba2fd0c40e..397a0fa773a3 100644 --- a/src/request_queue.js +++ b/src/request_queue.js @@ -247,6 +247,9 @@ export class RequestQueue { * it will not be updated. You can find out whether this happened from the resulting * {@link QueueOperationInfo} object. * + * To add multiple requests to the queue by extracting links from a webpage, + * see the [`Apify.utils.enqueueLinks()`](utils#utils.enqueueLinks) helper function. + * * @param {Request|Object} request {@link Request} object or vanilla object with request data. * Note that the function sets the `uniqueKey` and `id` fields to the passed object. * @param {Object} [options]