Commit 6dab519a authored by Nacim Goura's avatar Nacim Goura

optimize search

parent 1ef75d2e
node_modules/ node_modules/
.idea/* .idea/*
.deploy/*
...@@ -18,7 +18,7 @@ standard-minifier-js@2.1.0 # JS minifier run for production mode ...@@ -18,7 +18,7 @@ standard-minifier-js@2.1.0 # JS minifier run for production mode
es5-shim@4.6.15 # ECMAScript 5 compatibility for older browsers. es5-shim@4.6.15 # ECMAScript 5 compatibility for older browsers.
ecmascript@0.8.0 # Enable ECMAScript2015+ syntax in app code ecmascript@0.8.0 # Enable ECMAScript2015+ syntax in app code
fourseven:scss # Compile scss to css fourseven:scss # Compile scss to css
fortawesome:fontawesome fortawesome:fontawesome # icon
# package for view # package for view
kadira:flow-router # FlowRouter is a very simple router for Meteor kadira:flow-router # FlowRouter is a very simple router for Meteor
......
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import indexationElastic from '../../libs/elasticsearch/elasticsearch'; import indexationElastic from '../../libs/elasticsearch/elasticsearch';
...@@ -8,8 +7,6 @@ const esType = Meteor.settings.private.elasticsearch.esType; ...@@ -8,8 +7,6 @@ const esType = Meteor.settings.private.elasticsearch.esType;
export default class IndexGeneric { export default class IndexGeneric {
indexByBulk(data) { indexByBulk(data) {
console.log('indexation');
console.log(data);
return indexationElastic.bulk(data); return indexationElastic.bulk(data);
} }
...@@ -42,26 +39,34 @@ export default class IndexGeneric { ...@@ -42,26 +39,34 @@ export default class IndexGeneric {
'lorsqu', 'puisqu', 'lorsqu', 'puisqu',
], ],
}, },
french_stop: { french_synonym: {
type: 'stop', type: 'synonym',
stopwords: '_french_', ignore_case: true,
expand: true,
synonyms: ['gosse, enfant'],
}, },
french_stemmer: { french_stemmer: {
type: 'stemmer', type: 'stemmer',
language: 'french', language: 'light_french',
}, },
}, },
analyzer: { analyzer: {
analyzer_french: { french_heavy: {
tokenizer: 'standard', tokenizer: 'icu_tokenizer',
char_filter: ['html_strip'],
filter: [ filter: [
'french_elision', 'french_elision',
'lowercase', 'icu_folding',
'french_stop', 'french_synonym',
'french_stemmer', 'french_stemmer',
], ],
}, },
french_light: {
tokenizer: 'icu_tokenizer',
filter: [
'french_elision',
'icu_folding',
],
},
}, },
}, },
}, },
...@@ -74,20 +79,38 @@ export default class IndexGeneric { ...@@ -74,20 +79,38 @@ export default class IndexGeneric {
}, },
title: { title: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
}, },
title_suggest: { title_suggest: {
type: 'completion', type: 'completion',
analyzer: 'analyzer_french', analyzer: 'french_light',
max_input_length: 100, max_input_length: 100,
}, },
description: { description: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
}, },
body: { body: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
}, },
html: { html: {
type: 'text', type: 'text',
...@@ -99,15 +122,15 @@ export default class IndexGeneric { ...@@ -99,15 +122,15 @@ export default class IndexGeneric {
}, },
h1: { h1: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
}, },
h2: { h2: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
}, },
breadcrumb: { breadcrumb: {
type: 'text', type: 'text',
analyzer: 'analyzer_french', analyzer: 'french_light',
}, },
createdAt: { createdAt: {
type: 'date', type: 'date',
...@@ -125,15 +148,61 @@ export default class IndexGeneric { ...@@ -125,15 +148,61 @@ export default class IndexGeneric {
* @returns {promise} * @returns {promise}
*/ */
search(term) { search(term) {
const params = {}; const params = {
from: 0,
size: 10,
};
if (term) { if (term) {
/**
*
* common ( sépare les tokens les plus présents dans l’index des autres, et ne les utilise que pour améliorer la pertinence )
* fuzziness (permet une recherche même avec des fautes)
*/
params.query = { params.query = {
match: { bool: {
title: { must: [
query: term, {
slop: 50, common: {
}, 'body.stemmed': {
query: term,
},
},
},
{
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'description',
'description.stemmed',
'body',
'body.stemmed',
'url',
],
},
},
],
should: [
{
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'title',
'title.stemmed',
'url',
'breadcrumb',
'h1',
'h2',
'html',
],
},
},
],
minimum_should_match: 1,
boost: 2.0,
}, },
}; };
} }
......
...@@ -79,6 +79,7 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -79,6 +79,7 @@ export default class IndexWebsite extends IndexGeneric {
*/ */
parseData($, url) { parseData($, url) {
const body = $('body'); const body = $('body');
body.html(checkData.cleanHtml(body.html()));
const title = $('title').text(); const title = $('title').text();
const dataForIndex = { const dataForIndex = {
tag: 'site', tag: 'site',
...@@ -87,8 +88,8 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -87,8 +88,8 @@ export default class IndexWebsite extends IndexGeneric {
input: title, input: title,
}, },
description: $('meta[name=description]').attr('content'), description: $('meta[name=description]').attr('content'),
body: checkData.cleanText(body.text()), body: checkData.slugText(checkData.cleanText(body.text())),
html: checkData.cleanHtml(body.html()), html: checkData.cleanText(body.html()),
url: decodeURI(url), url: decodeURI(url),
createdAt: new Date(), createdAt: new Date(),
}; };
...@@ -96,7 +97,7 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -96,7 +97,7 @@ export default class IndexWebsite extends IndexGeneric {
console.log(title); console.log(title);
if ($('div').hasClass('.breadcrumb')) { if ($('div').hasClass('.breadcrumb')) {
dataForIndex.menu = $('.breadcrumb').text(); dataForIndex.breadcrumb = $('.breadcrumb').text();
} }
if ($('h1').length) { if ($('h1').length) {
......
...@@ -38,7 +38,7 @@ Meteor.methods({ ...@@ -38,7 +38,7 @@ Meteor.methods({
return index.autoComplete(term) return index.autoComplete(term)
.then((result) => { .then((result) => {
if (result.suggest) { if (result.suggest) {
return _.map(result.suggest[0].options, 'text'); return _.take(_.map(result.suggest[0].options, 'text'), 5);
} }
return []; return [];
}); });
......
...@@ -13,6 +13,11 @@ ...@@ -13,6 +13,11 @@
<button type="submit" class="btn btn-success">Valider</button> <button type="submit" class="btn btn-success">Valider</button>
</span> </span>
</div> </div>
<h4 class="hidden">
<i class="fa fa-cog fa-spin fa-2x fa-fw"></i>
<span class="sr-only">Loading...</span>
Indexation en cours...
</h4>
</form> </form>
</div> </div>
</div> </div>
......
...@@ -16,8 +16,12 @@ Template.siteIndexationTpl.events({ ...@@ -16,8 +16,12 @@ Template.siteIndexationTpl.events({
}); });
const url = event.target.urlSite.value; const url = event.target.urlSite.value;
$(event.target).find('h4').removeClass('hidden');
Meteor.callPromise('indexWebsite', url) Meteor.callPromise('indexWebsite', url)
.then(() => { .then(() => {
$(event.target).find('h4').addClass('hidden');
displayNotif({ displayNotif({
type: 'success', type: 'success',
title: 'Indexation : ', title: 'Indexation : ',
...@@ -25,6 +29,7 @@ Template.siteIndexationTpl.events({ ...@@ -25,6 +29,7 @@ Template.siteIndexationTpl.events({
save: true, save: true,
}); });
}).catch((error) => { }).catch((error) => {
$(event.target).find('h4').addClass('hidden');
console.log(error); console.log(error);
displayNotif({ displayNotif({
type: 'error', type: 'error',
......
...@@ -2,6 +2,18 @@ ...@@ -2,6 +2,18 @@
import SimpleSchema from 'simpl-schema'; import SimpleSchema from 'simpl-schema';
import sanitizeHtml from 'sanitize-html'; import sanitizeHtml from 'sanitize-html';
import _ from 'lodash'; import _ from 'lodash';
import slug from 'slug';
slug.defaults.modes.pretty = {
replacement: ' ',
symbols: true,
remove: null,
lower: true,
charmap: slug.charmap,
multicharmap: {
'&&': 'et', '||': 'ou',
},
};
const checkData = { const checkData = {
...@@ -15,36 +27,31 @@ const checkData = { ...@@ -15,36 +27,31 @@ const checkData = {
}, },
cleanText(str) { cleanText(str) {
return _.trim(_.replace(str, /[^\S]{2,}/gm, ' ')); // enleve saut de ligne et slash
let cleanStr = _.replace(str, /[\n\\/]/gm, ' ');
// met un espace avant une majuscule
cleanStr = _.replace(cleanStr, /([A-Z])/gm, ' $1');
// enleve les multiples espace
cleanStr = _.replace(cleanStr, /[^\S]{2,}/gm, ' ');
return _.trim(cleanStr);
},
slugText(str) {
return str;
}, },
cleanHtml(html) { cleanHtml(html) {
return sanitizeHtml(html, { return sanitizeHtml(html, {
allowedTags: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p', allowedTags: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p', 'a', 'ul', 'ol',
'nl', 'b', 'i', 'strong', 'em', 'strike', 'div', 'nl', 'li', 'b', 'i', 'strong', 'em', 'strike', 'code', 'hr', 'br', 'div',
'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre'], 'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre'],
allowedAttributes: ['class'], allowedAttributes: [],
selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'], selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
transformTags: { allowedSchemes: ['http', 'https', 'ftp', 'mailto'],
a: () => ({ allowedSchemesByTag: {},
tagName: 'a', allowProtocolRelative: true,
text: '',
}),
li: () => ({
tagName: 'li',
text: '',
}),
footer: () => ({
tagName: 'footer',
text: '',
}),
nav: () => ({
tagName: 'nav',
text: '',
}),
},
}); });
} },
}; };
export default checkData; export default checkData;
...@@ -5,8 +5,7 @@ ...@@ -5,8 +5,7 @@
"scripts": { "scripts": {
"start": "meteor --settings settings.json", "start": "meteor --settings settings.json",
"test": "meteor test --driver-package practicalmeteor:mocha --port 3001", "test": "meteor test --driver-package practicalmeteor:mocha --port 3001",
"eslint": "eslint .; exit 0", "eslint": "eslint .; exit 0"
"stylelint": "stylelint './client/main.scss'; exit 0"
}, },
"dependencies": { "dependencies": {
"babel-runtime": "^6.23.0", "babel-runtime": "^6.23.0",
...@@ -14,13 +13,13 @@ ...@@ -14,13 +13,13 @@
"bootstrap-sass": "^3.3.7", "bootstrap-sass": "^3.3.7",
"crawler": "^1.0.5", "crawler": "^1.0.5",
"elasticsearch": "^13.0.1", "elasticsearch": "^13.0.1",
"eslint-plugin-lodash": "^2.4.2",
"izitoast": "^1.1.1", "izitoast": "^1.1.1",
"lodash": "^4.17.4", "lodash": "^4.17.4",
"meteor-node-stubs": "~0.2.11", "meteor-node-stubs": "~0.2.11",
"sanitize-html": "^1.14.1", "sanitize-html": "^1.14.1",
"simpl-schema": "^0.3.0", "simpl-schema": "^0.3.0",
"sitemapper": "^2.1.13", "sitemapper": "^2.1.13",
"slug": "^0.9.1",
"sweetalert2": "^6.6.5" "sweetalert2": "^6.6.5"
}, },
"devDependencies": { "devDependencies": {
...@@ -31,6 +30,7 @@ ...@@ -31,6 +30,7 @@
"eslint-import-resolver-meteor": "^0.4.0", "eslint-import-resolver-meteor": "^0.4.0",
"eslint-plugin-import": "^2.3.0", "eslint-plugin-import": "^2.3.0",
"eslint-plugin-jsx-a11y": "^5.0.3", "eslint-plugin-jsx-a11y": "^5.0.3",
"eslint-plugin-lodash": "^2.4.2",
"eslint-plugin-meteor": "^4.0.1", "eslint-plugin-meteor": "^4.0.1",
"eslint-plugin-promise": "^3.5.0", "eslint-plugin-promise": "^3.5.0",
"eslint-plugin-react": "^7.0.1" "eslint-plugin-react": "^7.0.1"
......
...@@ -2561,6 +2561,12 @@ slice-ansi@0.0.4: ...@@ -2561,6 +2561,12 @@ slice-ansi@0.0.4:
version "0.0.4" version "0.0.4"
resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-0.0.4.tgz#edbf8903f66f7ce2f8eafd6ceed65e264c831b35" resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-0.0.4.tgz#edbf8903f66f7ce2f8eafd6ceed65e264c831b35"
slug@^0.9.1:
version "0.9.1"
resolved "https://registry.yarnpkg.com/slug/-/slug-0.9.1.tgz#af08f608a7c11516b61778aa800dce84c518cfda"
dependencies:
unicode ">= 0.3.1"
sntp@1.x.x: sntp@1.x.x:
version "1.0.9" version "1.0.9"
resolved "https://registry.yarnpkg.com/sntp/-/sntp-1.0.9.tgz#6541184cc90aeea6c6e7b35e2659082443c66198" resolved "https://registry.yarnpkg.com/sntp/-/sntp-1.0.9.tgz#6541184cc90aeea6c6e7b35e2659082443c66198"
...@@ -2803,6 +2809,10 @@ underscore@1.8.3: ...@@ -2803,6 +2809,10 @@ underscore@1.8.3:
version "1.8.3" version "1.8.3"
resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.8.3.tgz#4f3fb53b106e6097fcf9cb4109f2a5e9bdfa5022" resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.8.3.tgz#4f3fb53b106e6097fcf9cb4109f2a5e9bdfa5022"
"unicode@>= 0.3.1":
version "9.0.1"
resolved "https://registry.yarnpkg.com/unicode/-/unicode-9.0.1.tgz#104706272c6464c574801be1b086f7245cf25158"
unpipe@~1.0.0: unpipe@~1.0.0:
version "1.0.0" version "1.0.0"
resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec" resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment