modified crawler to index site more effectively

This commit is contained in:
philip-ellis-sp
2022-10-21 10:49:53 -04:00
parent 67dfeb3314
commit bccb2319bc
2 changed files with 316 additions and 42 deletions

View File

@@ -1,35 +1,142 @@
{
"index_name": "prod_DEVELOPER_SAILPOINT_COM",
"start_urls": [
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/getting-started/",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/event-triggers/getting-started",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/saas-configuration/",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/saas-connectivity",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/docs/transforms",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/api/v3",
"https://sailpoint-oss.github.io/developer.sailpoint.com/idn/api/beta",
"https://sailpoint-oss.github.io/developer.sailpoint.com/iiq/api"
{
"url": "https://developer.sailpoint.com/idn/docs/transforms",
"tags": ["IDN Documentation", "Transforms"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/rules",
"tags": ["IDN Documentation", "Rules"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/event-triggers",
"tags": ["IDN Documentation", "Event Triggers"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/saas-configuration",
"tags": ["IDN Documentation", "SaaS Configuration"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/saas-connectivity",
"tags": ["IDN Documentation", "SaaS Connectivity"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/",
"tags": ["IDN Documentation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/getting-started",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/authentication",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/standard-collection-parameters",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/rate-limit",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/v3",
"selectors_key": "api_v3",
"tags": ["IDN V3 APIs"]
},
{
"url": "https://developer.sailpoint.com/idn/api/beta",
"selectors_key": "api_v3",
"tags": ["IDN Beta APIs"]
},
{
"url": "https://developer.sailpoint.com/iiq/api",
"selectors_key": "api_iiq",
"tags": ["IIQ APIs"]
}
],
"js_render": false,
"sitemap_urls": [
"https://sailpoint-oss.github.io/developer.sailpoint.com/sitemap.xml"
"https://developer.sailpoint.com/sitemap.xml"
],
"sitemap_alternate_links": true,
"stop_urls": [],
"selectors": {
"default" : {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IDN Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "Documentation"
"default_value": "IDN Documentation"
},
"lvl2": "header h1",
"lvl3": "article h2",
"lvl4": "article h3",
"lvl5": "article h4",
"lvl6": "article h5, article td:first-child",
"lvl7": "article h6",
"text": "article p, article li, article td:last-child"
},
"api_v3": {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IDN API Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "IDN API Documentation"
},
"lvl1": "header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"api_iiq": {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IIQ API Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "IIQ API Documentation"
},
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
}
},
"strip_chars": " .,;:#",
"custom_settings": {
@@ -38,13 +145,15 @@
"language",
"version",
"type",
"docusaurus_tag"
"docusaurus_tag",
"tags"
],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"tags",
"url_without_anchor",
"type"
]

165
algolia/dev_config.json Normal file
View File

@@ -0,0 +1,165 @@
{
"index_name": "dev_DEVELOPER_SAILPOINT_COM",
"start_urls": [
{
"url": "https://developer.sailpoint.com/idn/docs/transforms",
"tags": ["IDN Documentation", "Transforms"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/rules",
"tags": ["IDN Documentation", "Rules"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/event-triggers",
"tags": ["IDN Documentation", "Event Triggers"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/saas-configuration",
"tags": ["IDN Documentation", "SaaS Configuration"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/saas-connectivity",
"tags": ["IDN Documentation", "SaaS Connectivity"]
},
{
"url": "https://developer.sailpoint.com/idn/docs/",
"tags": ["IDN Documentation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/getting-started",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/authentication",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/standard-collection-parameters",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/rate-limit",
"selectors_key": "api_v3",
"tags": ["IDN API Documenation"]
},
{
"url": "https://developer.sailpoint.com/idn/api/v3",
"selectors_key": "api_v3",
"tags": ["IDN V3 APIs"]
},
{
"url": "https://developer.sailpoint.com/idn/api/beta",
"selectors_key": "api_v3",
"tags": ["IDN Beta APIs"]
},
{
"url": "https://developer.sailpoint.com/iiq/api",
"selectors_key": "api_iiq",
"tags": ["IIQ APIs"]
}
],
"js_render": false,
"sitemap_urls": [
"https://developer.sailpoint.com/sitemap.xml"
],
"sitemap_alternate_links": true,
"stop_urls": [],
"selectors": {
"default" : {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IDN Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "IDN Documentation"
},
"lvl2": "header h1",
"lvl3": "article h2",
"lvl4": "article h3",
"lvl5": "article h4",
"lvl6": "article h5, article td:first-child",
"lvl7": "article h6",
"text": "article p, article li, article td:last-child"
},
"api_v3": {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IDN API Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "IDN API Documentation"
},
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"api_iiq": {
"lvl0" : {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[1]",
"type": "xpath",
"global": true,
"default_value": "IIQ API Documentation"
},
"lvl1": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "IIQ API Documentation"
},
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
}
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": [
"language",
"version",
"type",
"docusaurus_tag",
"tags"
],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"tags",
"url_without_anchor",
"type"
]
},
"conversation_id": [
"1090805758"
],
"nb_hits": 8687
}