From bccb2319bc564984648239cf1f1aa138a62b8b66 Mon Sep 17 00:00:00 2001 From: philip-ellis-sp Date: Fri, 21 Oct 2022 10:49:53 -0400 Subject: [PATCH] modified crawler to index site more effectively --- algolia/config.json | 193 +++++++++++++++++++++++++++++++--------- algolia/dev_config.json | 165 ++++++++++++++++++++++++++++++++++ 2 files changed, 316 insertions(+), 42 deletions(-) create mode 100644 algolia/dev_config.json diff --git a/algolia/config.json b/algolia/config.json index e4c685c7d..4b3af064f 100644 --- a/algolia/config.json +++ b/algolia/config.json @@ -1,29 +1,113 @@ { - "index_name": "prod_DEVELOPER_SAILPOINT_COM", - "start_urls": [ - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/getting-started/", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/event-triggers/getting-started", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/saas-configuration/", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/saas-connectivity", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/transforms", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/api/v3", - "https://sailpoint-oss.github.io/developer.sailpoint.com/idn/api/beta", - "https://sailpoint-oss.github.io/developer.sailpoint.com/iiq/api" + "index_name": "prod_DEVELOPER_SAILPOINT_COM", + "start_urls": [ + { + "url": "https://developer.sailpoint.com/idn/docs/transforms", + "tags": ["IDN Documentation", "Transforms"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/rules", + "tags": ["IDN Documentation", "Rules"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/event-triggers", + "tags": ["IDN Documentation", "Event Triggers"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/saas-configuration", + "tags": ["IDN Documentation", "SaaS Configuration"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/saas-connectivity", + "tags": ["IDN Documentation", "SaaS Connectivity"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/", + "tags": ["IDN Documentation"] + }, - ], - "sitemap_urls": [ - "https://sailpoint-oss.github.io/developer.sailpoint.com/sitemap.xml" - ], - "sitemap_alternate_links": true, - "stop_urls": [], - "selectors": { - "lvl0": { + + + { + "url": "https://developer.sailpoint.com/idn/api/getting-started", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/authentication", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/standard-collection-parameters", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/rate-limit", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + + + + { + "url": "https://developer.sailpoint.com/idn/api/v3", + "selectors_key": "api_v3", + "tags": ["IDN V3 APIs"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/beta", + "selectors_key": "api_v3", + "tags": ["IDN Beta APIs"] + }, + { + "url": "https://developer.sailpoint.com/iiq/api", + "selectors_key": "api_iiq", + "tags": ["IIQ APIs"] + } + ], + "js_render": false, + "sitemap_urls": [ + "https://developer.sailpoint.com/sitemap.xml" + ], + "sitemap_alternate_links": true, + "stop_urls": [], + "selectors": { + "default" : { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IDN Documentation" + }, + "lvl1": { "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", "type": "xpath", "global": true, - "default_value": "Documentation" + "default_value": "IDN Documentation" + }, + "lvl2": "header h1", + "lvl3": "article h2", + "lvl4": "article h3", + "lvl5": "article h4", + "lvl6": "article h5, article td:first-child", + "lvl7": "article h6", + "text": "article p, article li, article td:last-child" + }, + "api_v3": { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IDN API Documentation" + }, + "lvl1": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "IDN API Documentation" }, - "lvl1": "header h1", "lvl2": "article h2", "lvl3": "article h3", "lvl4": "article h4", @@ -31,26 +115,51 @@ "lvl6": "article h6", "text": "article p, article li, article td:last-child" }, - "strip_chars": " .,;:#", - "custom_settings": { - "separatorsToIndex": "_", - "attributesForFaceting": [ - "language", - "version", - "type", - "docusaurus_tag" - ], - "attributesToRetrieve": [ - "hierarchy", - "content", - "anchor", - "url", - "url_without_anchor", - "type" - ] - }, - "conversation_id": [ - "1090805758" + "api_iiq": { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IIQ API Documentation" + }, + "lvl1": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "IIQ API Documentation" + }, + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + } + + + }, + "strip_chars": " .,;:#", + "custom_settings": { + "separatorsToIndex": "_", + "attributesForFaceting": [ + "language", + "version", + "type", + "docusaurus_tag", + "tags" ], - "nb_hits": 8687 - } \ No newline at end of file + "attributesToRetrieve": [ + "hierarchy", + "content", + "anchor", + "url", + "tags", + "url_without_anchor", + "type" + ] + }, + "conversation_id": [ + "1090805758" + ], + "nb_hits": 8687 +} \ No newline at end of file diff --git a/algolia/dev_config.json b/algolia/dev_config.json new file mode 100644 index 000000000..d3ae7f064 --- /dev/null +++ b/algolia/dev_config.json @@ -0,0 +1,165 @@ +{ + "index_name": "dev_DEVELOPER_SAILPOINT_COM", + "start_urls": [ + { + "url": "https://developer.sailpoint.com/idn/docs/transforms", + "tags": ["IDN Documentation", "Transforms"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/rules", + "tags": ["IDN Documentation", "Rules"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/event-triggers", + "tags": ["IDN Documentation", "Event Triggers"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/saas-configuration", + "tags": ["IDN Documentation", "SaaS Configuration"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/saas-connectivity", + "tags": ["IDN Documentation", "SaaS Connectivity"] + }, + { + "url": "https://developer.sailpoint.com/idn/docs/", + "tags": ["IDN Documentation"] + }, + + + + { + "url": "https://developer.sailpoint.com/idn/api/getting-started", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/authentication", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/standard-collection-parameters", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/rate-limit", + "selectors_key": "api_v3", + "tags": ["IDN API Documenation"] + }, + + + + { + "url": "https://developer.sailpoint.com/idn/api/v3", + "selectors_key": "api_v3", + "tags": ["IDN V3 APIs"] + }, + { + "url": "https://developer.sailpoint.com/idn/api/beta", + "selectors_key": "api_v3", + "tags": ["IDN Beta APIs"] + }, + { + "url": "https://developer.sailpoint.com/iiq/api", + "selectors_key": "api_iiq", + "tags": ["IIQ APIs"] + } + ], + "js_render": false, + "sitemap_urls": [ + "https://developer.sailpoint.com/sitemap.xml" + ], + "sitemap_alternate_links": true, + "stop_urls": [], + "selectors": { + "default" : { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IDN Documentation" + }, + "lvl1": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "IDN Documentation" + }, + "lvl2": "header h1", + "lvl3": "article h2", + "lvl4": "article h3", + "lvl5": "article h4", + "lvl6": "article h5, article td:first-child", + "lvl7": "article h6", + "text": "article p, article li, article td:last-child" + }, + "api_v3": { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IDN API Documentation" + }, + "lvl1": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "IDN API Documentation" + }, + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + }, + "api_iiq": { + "lvl0" : { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]", + "type": "xpath", + "global": true, + "default_value": "IIQ API Documentation" + }, + "lvl1": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "IIQ API Documentation" + }, + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + } + + + }, + "strip_chars": " .,;:#", + "custom_settings": { + "separatorsToIndex": "_", + "attributesForFaceting": [ + "language", + "version", + "type", + "docusaurus_tag", + "tags" + ], + "attributesToRetrieve": [ + "hierarchy", + "content", + "anchor", + "url", + "tags", + "url_without_anchor", + "type" + ] + }, + "conversation_id": [ + "1090805758" + ], + "nb_hits": 8687 +} \ No newline at end of file