Commit 6dab519a authored by Nacim Goura's avatar Nacim Goura

optimize search

parent 1ef75d2e
node_modules/
.idea/*
.deploy/*
......@@ -18,7 +18,7 @@ standard-minifier-js@2.1.0 # JS minifier run for production mode
es5-shim@4.6.15 # ECMAScript 5 compatibility for older browsers.
ecmascript@0.8.0 # Enable ECMAScript2015+ syntax in app code
fourseven:scss # Compile scss to css
fortawesome:fontawesome
fortawesome:fontawesome # icon
# package for view
kadira:flow-router # FlowRouter is a very simple router for Meteor
......
import { Meteor } from 'meteor/meteor';
import indexationElastic from '../../libs/elasticsearch/elasticsearch';
......@@ -8,8 +7,6 @@ const esType = Meteor.settings.private.elasticsearch.esType;
export default class IndexGeneric {
indexByBulk(data) {
console.log('indexation');
console.log(data);
return indexationElastic.bulk(data);
}
......@@ -42,26 +39,34 @@ export default class IndexGeneric {
'lorsqu', 'puisqu',
],
},
french_stop: {
type: 'stop',
stopwords: '_french_',
french_synonym: {
type: 'synonym',
ignore_case: true,
expand: true,
synonyms: ['gosse, enfant'],
},
french_stemmer: {
type: 'stemmer',
language: 'french',
language: 'light_french',
},
},
analyzer: {
analyzer_french: {
tokenizer: 'standard',
char_filter: ['html_strip'],
french_heavy: {
tokenizer: 'icu_tokenizer',
filter: [
'french_elision',
'lowercase',
'french_stop',
'icu_folding',
'french_synonym',
'french_stemmer',
],
},
french_light: {
tokenizer: 'icu_tokenizer',
filter: [
'french_elision',
'icu_folding',
],
},
},
},
},
......@@ -74,20 +79,38 @@ export default class IndexGeneric {
},
title: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
},
title_suggest: {
type: 'completion',
analyzer: 'analyzer_french',
analyzer: 'french_light',
max_input_length: 100,
},
description: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
},
body: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
},
html: {
type: 'text',
......@@ -99,15 +122,15 @@ export default class IndexGeneric {
},
h1: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
},
h2: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
},
breadcrumb: {
type: 'text',
analyzer: 'analyzer_french',
analyzer: 'french_light',
},
createdAt: {
type: 'date',
......@@ -125,15 +148,61 @@ export default class IndexGeneric {
* @returns {promise}
*/
search(term) {
const params = {};
const params = {
from: 0,
size: 10,
};
if (term) {
/**
*
* common ( sépare les tokens les plus présents dans l’index des autres, et ne les utilise que pour améliorer la pertinence )
* fuzziness (permet une recherche même avec des fautes)
*/
params.query = {
match: {
title: {
bool: {
must: [
{
common: {
'body.stemmed': {
query: term,
},
},
},
{
multi_match: {
query: term,
slop: 50,
fuzziness: 'AUTO',
fields: [
'description',
'description.stemmed',
'body',
'body.stemmed',
'url',
],
},
},
],
should: [
{
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'title',
'title.stemmed',
'url',
'breadcrumb',
'h1',
'h2',
'html',
],
},
},
],
minimum_should_match: 1,
boost: 2.0,
},
};
}
......
......@@ -79,6 +79,7 @@ export default class IndexWebsite extends IndexGeneric {
*/
parseData($, url) {
const body = $('body');
body.html(checkData.cleanHtml(body.html()));
const title = $('title').text();
const dataForIndex = {
tag: 'site',
......@@ -87,8 +88,8 @@ export default class IndexWebsite extends IndexGeneric {
input: title,
},
description: $('meta[name=description]').attr('content'),
body: checkData.cleanText(body.text()),
html: checkData.cleanHtml(body.html()),
body: checkData.slugText(checkData.cleanText(body.text())),
html: checkData.cleanText(body.html()),
url: decodeURI(url),
createdAt: new Date(),
};
......@@ -96,7 +97,7 @@ export default class IndexWebsite extends IndexGeneric {
console.log(title);
if ($('div').hasClass('.breadcrumb')) {
dataForIndex.menu = $('.breadcrumb').text();
dataForIndex.breadcrumb = $('.breadcrumb').text();
}
if ($('h1').length) {
......
......@@ -38,7 +38,7 @@ Meteor.methods({
return index.autoComplete(term)
.then((result) => {
if (result.suggest) {
return _.map(result.suggest[0].options, 'text');
return _.take(_.map(result.suggest[0].options, 'text'), 5);
}
return [];
});
......
......@@ -13,6 +13,11 @@
<button type="submit" class="btn btn-success">Valider</button>
</span>
</div>
<h4 class="hidden">
<i class="fa fa-cog fa-spin fa-2x fa-fw"></i>
<span class="sr-only">Loading...</span>
Indexation en cours...
</h4>
</form>
</div>
</div>
......
......@@ -16,8 +16,12 @@ Template.siteIndexationTpl.events({
});
const url = event.target.urlSite.value;
$(event.target).find('h4').removeClass('hidden');
Meteor.callPromise('indexWebsite', url)
.then(() => {
$(event.target).find('h4').addClass('hidden');
displayNotif({
type: 'success',
title: 'Indexation : ',
......@@ -25,6 +29,7 @@ Template.siteIndexationTpl.events({
save: true,
});
}).catch((error) => {
$(event.target).find('h4').addClass('hidden');
console.log(error);
displayNotif({
type: 'error',
......
......@@ -2,6 +2,18 @@
import SimpleSchema from 'simpl-schema';
import sanitizeHtml from 'sanitize-html';
import _ from 'lodash';
import slug from 'slug';
slug.defaults.modes.pretty = {
replacement: ' ',
symbols: true,
remove: null,
lower: true,
charmap: slug.charmap,
multicharmap: {
'&&': 'et', '||': 'ou',
},
};
const checkData = {
......@@ -15,36 +27,31 @@ const checkData = {
},
cleanText(str) {
return _.trim(_.replace(str, /[^\S]{2,}/gm, ' '));
// enleve saut de ligne et slash
let cleanStr = _.replace(str, /[\n\\/]/gm, ' ');
// met un espace avant une majuscule
cleanStr = _.replace(cleanStr, /([A-Z])/gm, ' $1');
// enleve les multiples espace
cleanStr = _.replace(cleanStr, /[^\S]{2,}/gm, ' ');
return _.trim(cleanStr);
},
slugText(str) {
return str;
},
cleanHtml(html) {
return sanitizeHtml(html, {
allowedTags: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p',
'nl', 'b', 'i', 'strong', 'em', 'strike', 'div',
allowedTags: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p', 'a', 'ul', 'ol',
'nl', 'li', 'b', 'i', 'strong', 'em', 'strike', 'code', 'hr', 'br', 'div',
'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre'],
allowedAttributes: ['class'],
allowedAttributes: [],
selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
transformTags: {
a: () => ({
tagName: 'a',
text: '',
}),
li: () => ({
tagName: 'li',
text: '',
}),
footer: () => ({
tagName: 'footer',
text: '',
}),
nav: () => ({
tagName: 'nav',
text: '',
}),
},
allowedSchemes: ['http', 'https', 'ftp', 'mailto'],
allowedSchemesByTag: {},
allowProtocolRelative: true,
});
}
},
};
export default checkData;
......@@ -5,8 +5,7 @@
"scripts": {
"start": "meteor --settings settings.json",
"test": "meteor test --driver-package practicalmeteor:mocha --port 3001",
"eslint": "eslint .; exit 0",
"stylelint": "stylelint './client/main.scss'; exit 0"
"eslint": "eslint .; exit 0"
},
"dependencies": {
"babel-runtime": "^6.23.0",
......@@ -14,13 +13,13 @@
"bootstrap-sass": "^3.3.7",
"crawler": "^1.0.5",
"elasticsearch": "^13.0.1",
"eslint-plugin-lodash": "^2.4.2",
"izitoast": "^1.1.1",
"lodash": "^4.17.4",
"meteor-node-stubs": "~0.2.11",
"sanitize-html": "^1.14.1",
"simpl-schema": "^0.3.0",
"sitemapper": "^2.1.13",
"slug": "^0.9.1",
"sweetalert2": "^6.6.5"
},
"devDependencies": {
......@@ -31,6 +30,7 @@
"eslint-import-resolver-meteor": "^0.4.0",
"eslint-plugin-import": "^2.3.0",
"eslint-plugin-jsx-a11y": "^5.0.3",
"eslint-plugin-lodash": "^2.4.2",
"eslint-plugin-meteor": "^4.0.1",
"eslint-plugin-promise": "^3.5.0",
"eslint-plugin-react": "^7.0.1"
......
......@@ -2561,6 +2561,12 @@ slice-ansi@0.0.4:
version "0.0.4"
resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-0.0.4.tgz#edbf8903f66f7ce2f8eafd6ceed65e264c831b35"
slug@^0.9.1:
version "0.9.1"
resolved "https://registry.yarnpkg.com/slug/-/slug-0.9.1.tgz#af08f608a7c11516b61778aa800dce84c518cfda"
dependencies:
unicode ">= 0.3.1"
sntp@1.x.x:
version "1.0.9"
resolved "https://registry.yarnpkg.com/sntp/-/sntp-1.0.9.tgz#6541184cc90aeea6c6e7b35e2659082443c66198"
......@@ -2803,6 +2809,10 @@ underscore@1.8.3:
version "1.8.3"
resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.8.3.tgz#4f3fb53b106e6097fcf9cb4109f2a5e9bdfa5022"
"unicode@>= 0.3.1":
version "9.0.1"
resolved "https://registry.yarnpkg.com/unicode/-/unicode-9.0.1.tgz#104706272c6464c574801be1b086f7245cf25158"
unpipe@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment