From 7a01ef1a59f6c5d22764f82d8bbbdf3112e764aa Mon Sep 17 00:00:00 2001
From: Dale Phurrough <dale@hidale.com>
Date: Fri, 13 Jan 2023 19:08:33 -0800
Subject: [PATCH] add options for custom `lunr` Liquid and JS code (#1068)

This is a prototype for review and discussion. My use and testing of this PR is on top of 6d9d41359c46882d9b64a446d5a83fac5b3e20a7. The changes are trival to rebase to `main` and I'm happy to do so if this prototype moves forward.


* Feature request details in linked issue, fixes just-the-docs/just-the-docs#1067
* I welcome feedback and all discussion
* A draft doc site of mine using this PR is at https://docs.hidale.com/

To use the prototype, the two include files need to be customized. Here are mine from the draft website https://github.com/diablodale/dp.docs/commit/9c0d836408af2e72dbce115a01ce6627137e66dd

Co-authored-by: Matt Wang <matt@matthewwang.me>
---
 _includes/lunr/custom-data.json |  0
 _includes/lunr/custom-index.js  |  0
 assets/js/just-the-docs.js      |  1 +
 assets/js/zzzz-search-data.json |  2 ++
 docs/search.md                  | 34 +++++++++++++++++++++++++++++++++
 lib/tasks/search.rake           |  2 ++
 6 files changed, 39 insertions(+)
 create mode 100644 _includes/lunr/custom-data.json
 create mode 100644 _includes/lunr/custom-index.js

diff --git a/_includes/lunr/custom-data.json b/_includes/lunr/custom-data.json
new file mode 100644
index 00000000..e69de29b
diff --git a/_includes/lunr/custom-index.js b/_includes/lunr/custom-index.js
new file mode 100644
index 00000000..e69de29b
diff --git a/assets/js/just-the-docs.js b/assets/js/just-the-docs.js
index f243f07e..07d4f076 100644
--- a/assets/js/just-the-docs.js
+++ b/assets/js/just-the-docs.js
@@ -87,6 +87,7 @@ function initSearch() {
         this.metadataWhitelist = ['position']
 
         for (var i in docs) {
+          {% include lunr/custom-index.js %}
           this.add({
             id: i,
             title: docs[i].title,
diff --git a/assets/js/zzzz-search-data.json b/assets/js/zzzz-search-data.json
index 74dd0bad..370dbe50 100644
--- a/assets/js/zzzz-search-data.json
+++ b/assets/js/zzzz-search-data.json
@@ -51,6 +51,7 @@ permalink: /assets/js/search-data.json
     "title": {{ title | jsonify }},
     "content": {{ content | replace: '</h', ' . </h' | replace: '<hr', ' . <hr' | replace: '</p', ' . </p' | replace: '<ul', ' . <ul' | replace: '</ul', ' . </ul' | replace: '<ol', ' . <ol' | replace: '</ol', ' . </ol' | replace: '</tr', ' . </tr' | replace: '<li', ' | <li' | replace: '</li', ' | </li' | replace: '</td', ' | </td' | replace: '<td', ' | <td' | replace: '</th', ' | </th' | replace: '<th', ' | <th' | strip_html | remove: 'Table of contents' | normalize_whitespace | replace: '. . .', '.' | replace: '. .', '.' | replace: '| |', '|' | append: ' ' | jsonify }},
     "url": "{{ url | relative_url }}",
+    {% include lunr/custom-data.json page=page %}
     "relUrl": "{{ url }}"
   }
         {%- assign i = i | plus: 1 -%}
@@ -62,6 +63,7 @@ permalink: /assets/js/search-data.json
     "title": {{ page.title | jsonify }},
     "content": {{ parts[0] | replace: '</h', ' . </h' | replace: '<hr', ' . <hr' | replace: '</p', ' . </p' | replace: '<ul', ' . <ul' | replace: '</ul', ' . </ul' | replace: '<ol', ' . <ol' | replace: '</ol', ' . </ol' | replace: '</tr', ' . </tr' | replace: '<li', ' | <li' | replace: '</li', ' | </li' | replace: '</td', ' | </td' | replace: '<td', ' | <td' | replace: '</th', ' | </th' | replace: '<th', ' | <th' | strip_html | remove: 'Table of contents' | normalize_whitespace | replace: '. . .', '.' | replace: '. .', '.' | replace: '| |', '|' | append: ' ' | jsonify }},
     "url": "{{ page.url | relative_url }}",
+    {% include lunr/custom-data.json page=page %}
     "relUrl": "{{ page.url }}"
   }
         {%- assign i = i | plus: 1 -%}
diff --git a/docs/search.md b/docs/search.md
index 9ea67c44..b9b9d62e 100644
--- a/docs/search.md
+++ b/docs/search.md
@@ -125,3 +125,37 @@ $ bundle exec just-the-docs rake search:init
 
 This command creates the `assets/js/zzzz-search-data.json` file that Jekyll uses to create your search index.
 Alternatively, you can create the file manually with [this content]({{ site.github.repository_url }}/blob/main/assets/js/zzzz-search-data.json).
+
+## Custom content for search index
+
+By default, the search feature indexes a page's `.content`, `.title`, and *some* headers within the `.content`.
+Other data (ex front matter, files in `_data`, `assets`) is not indexed. To index additional data, users can customize what `lunr` indexes.
+
+{: .warning }
+> Customizing search indices is an advanced feature that requires Javascript and Liquid knowledge.
+
+1. First, ensure that `assets/js/zzzz-search-data.json` is up-to-date; it can be regenerated with `rake` or manually (see:  ["Generate search index when used as a gem"](#generate-search-index-when-used-as-a-gem)).
+2. To add Liquid/Jekyll-based data: create a new include at the path `_includes/lunr/custom-data.json`. Insert custom Liquid code that reads various data (ex: `include.page`, `site.data`, `site.static_files`) that then generates valid [JSON](https://www.json.org/json-en.html) to add to the index.  Verify the fields in the generated `assets/js/search-data.json`.
+3. For all custom data (Liquid, JavaScript, or external): create a new include at the path `_includes/lunr/custom-index.js` to your site. Add valid JavaScript that creates relevant fields to add to the index. You may want to inspect `assets/js/just-the-docs.js` to better understand the code structure. **This is necessary to render any relevant custom index code.**
+
+#### Example
+
+`_includes/lunr/custom-data.json`: this example adds each page's `usage` and `examples` front matter fields, normalizes the text, and writes the text to custom Javascript `myusage` and `myexamples` fields.
+
+{% raw %}
+```liquid
+{%- capture newline %}
+{% endcapture -%}
+"myusage": {{ include.page.usage | markdownify | replace:newline,' ' | strip_html | normalize_whitespace | strip | jsonify }},
+"myexamples": {{ include.page.examples | markdownify | replace:newline,' ' | strip_html | normalize_whitespace | strip | jsonify }},
+```
+{% endraw %}
+
+`_includes/lunr/custom-index.js` custom code is within a Javascript loop. All custom
+Javascript fields are accessed as fields of `docs[i]` such as `docs[i].myusage`.
+Finally, append your custom fields on to the already existing `docs[i].content`.
+
+```javascript
+const content_to_merge = [docs[i].content, docs[i].myusage, docs[i].myexamples];
+docs[i].content = content_to_merge.join(' ');
+```
diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake
index 55d012a9..d1f9fd47 100644
--- a/lib/tasks/search.rake
+++ b/lib/tasks/search.rake
@@ -61,6 +61,7 @@ permalink: /assets/js/search-data.json
     "title": {{ title | jsonify }},
     "content": {{ content | replace: \'</h\', \' . </h\' | replace: \'<hr\', \' . <hr\' | replace: \'</p\', \' . </p\' | replace: \'<ul\', \' . <ul\' | replace: \'</ul\', \' . </ul\' | replace: \'<ol\', \' . <ol\' | replace: \'</ol\', \' . </ol\' | replace: \'</tr\', \' . </tr\' | replace: \'<li\', \' | <li\' | replace: \'</li\', \' | </li\' | replace: \'</td\', \' | </td\' | replace: \'<td\', \' | <td\' | replace: \'</th\', \' | </th\' | replace: \'<th\', \' | <th\' | strip_html | remove: \'Table of contents\' | normalize_whitespace | replace: \'. . .\', \'.\' | replace: \'. .\', \'.\' | replace: \'| |\', \'|\' | append: \' \' | jsonify }},
     "url": "{{ url | relative_url }}",
+    {% include lunr/custom-data.json page=page %}
     "relUrl": "{{ url }}"
   }
         {%- assign i = i | plus: 1 -%}
@@ -72,6 +73,7 @@ permalink: /assets/js/search-data.json
     "title": {{ page.title | jsonify }},
     "content": {{ parts[0] | replace: \'</h\', \' . </h\' | replace: \'<hr\', \' . <hr\' | replace: \'</p\', \' . </p\' | replace: \'<ul\', \' . <ul\' | replace: \'</ul\', \' . </ul\' | replace: \'<ol\', \' . <ol\' | replace: \'</ol\', \' . </ol\' | replace: \'</tr\', \' . </tr\' | replace: \'<li\', \' | <li\' | replace: \'</li\', \' | </li\' | replace: \'</td\', \' | </td\' | replace: \'<td\', \' | <td\' | replace: \'</th\', \' | </th\' | replace: \'<th\', \' | <th\' | strip_html | remove: \'Table of contents\' | normalize_whitespace | replace: \'. . .\', \'.\' | replace: \'. .\', \'.\' | replace: \'| |\', \'|\' | append: \' \' | jsonify }},
     "url": "{{ page.url | relative_url }}",
+    {% include lunr/custom-data.json page=page %}
     "relUrl": "{{ page.url }}"
   }
         {%- assign i = i | plus: 1 -%}
-- 
GitLab