Commit c2422588 authored by Nacim Goura's avatar Nacim Goura

add autocompletion and fix bug

parent 148d1046
......@@ -48,7 +48,7 @@ html-tools@1.0.11
htmljs@1.0.11
http@1.2.12
id-map@1.0.9
johanbrook:publication-collector@1.0.9
johanbrook:publication-collector@1.0.10
jquery@1.11.10
kadira:blaze-layout@2.3.0
kadira:flow-router@2.12.1
......
......@@ -48,7 +48,7 @@ export default class crawlWebsite extends CrawlGeneric {
console.log(`${urls.length} à parser!`);
const crawl = new Crawler({
retries: 5,
retries: 10,
skipDuplicates: true,
rotateUA: true,
userAgent: [
......@@ -141,7 +141,8 @@ export default class crawlWebsite extends CrawlGeneric {
domain: this.config.domain,
title,
title_suggest: {
input: title,
// replace - and _ and multiple space for autocompletion
input: title.replace(/[-_]/g, ' ').replace(/[^\S]{2,}/, ' ').trim(),
},
description: $('meta[name=description]').attr('content'),
body: checkData.cleanText(body.text()),
......
const Crawler = require('simplecrawler');
const crawler = new Crawler('http://www.anap.fr/');
crawler.on('fetchcomplete', (queueItem, data, res) => {
console.log(queueItem.url);
});
crawler.start();
......@@ -52,7 +52,7 @@ export async function searchDocument(data, userId) {
}
export async function searchAll(data) {
const userId = Meteor.userId().toLowerCase();
const userId = Meteor.userId();
try {
return {
website: await searchWebsite(data, userId),
......
......@@ -5,8 +5,9 @@ import indexationElastic from '/imports/libs/elasticsearch/elasticsearch';
export default class Search {
constructor(userId) {
this.esIndex = userId;
constructor(userId, config) {
this.esIndex = userId.toLowerCase();
this.config = config;
}
/**
......@@ -29,15 +30,22 @@ export default class Search {
params._source = ["title", "url"];
params.query = {
"bool": {
"must_not": [
{
"match_phrase": {
"url": "page",
},
},
],
"must": [
{
"multi_match": {
"query": term,
"fuzziness": "AUTO",
"fields": [
"title.stemmed",
"urlText.stemmed",
"description.stemmed",
"url",
],
"boost": 3,
},
......@@ -47,15 +55,12 @@ export default class Search {
{
"multi_match": {
"query": term,
"fuzziness": "AUTO",
"fields": [
"title",
"description",
"urlText",
"title",
"h1",
"breadcrumb",
"urlText",
"urlText.stemmed",
],
"boost": 2,
},
......@@ -63,18 +68,18 @@ export default class Search {
{
"multi_match": {
"query": term,
"fuzziness": "AUTO",
"fields": [
"body.stemmed",
"body",
"h2",
"html",
"listPdf",
],
"boost": 1,
},
},
],
"minimum_should_match": 1,
"minimum_should_match": 2,
},
};
}
......@@ -89,6 +94,7 @@ export default class Search {
};
if (term) {
params._source = ["title", "url"];
params.query = {
bool: {
must: [{
......@@ -181,15 +187,16 @@ export default class Search {
* @returns {promise}
*/
autoComplete(term) {
let params = {};
const params = {};
if (term) {
params = {
suggest: {
prefix: term,
completion: {
field: 'title_suggest',
size: 25,
params.suggest = {
text: term,
completion: {
field: 'title_suggest',
size: 25,
fuzzy: {
fuzziness: 2,
},
},
};
......
......@@ -12,14 +12,17 @@ export function addTest(newTest) {
if (!newTest.userId) {
newTest.userId = Meteor.userId();
}
newTest.searchTerm = newTest.searchTerm.trim();
newTest.createdAt = moment().toDate();
testSearchCollection.simpleSchema().validate(newTest);
newTest.urlExpected = newTest.urlExpected.replace('https', 'http').replace(/\/$/, '').trim();
// call search
searchWebsite({ searchTerm: newTest.searchTerm })
searchWebsite({ searchTerm: newTest.searchTerm }, newTest.userId)
.then((results) => {
_.forEach(results.list, (result, index) => {
if (newTest.urlExpected === result.url) {
if (newTest.urlExpected.toLowerCase().trim() === result.url.toLowerCase().trim()) {
newTest.urlPosition = index + 1;
}
});
......
......@@ -9,19 +9,9 @@ exports.analyser = {
type: 'elision',
articles_case: true,
articles: [
'l',
'm',
't',
'qu',
'n',
's',
'j',
'd',
'c',
'jusqu',
'quoiqu',
'lorsqu',
'puisqu',
'l', 'm', 't', 'qu', 'n', 's',
'j', 'd', 'c', 'jusqu', 'quoiqu',
'lorsqu', 'puisqu',
],
},
// synonyme
......@@ -30,8 +20,13 @@ exports.analyser = {
ignore_case: true,
expand: true,
synonyms: [
'gosse, enfant',
'pmi, protection maternelle et infantile',
'psycho, psychiatrie',
'anap, Agence nationale d\'appui à la performance des établissements de santé et médico-sociaux',
'ars, agences régionales de santé',
'c dans l\'oxygene, c dans l\'air',
'+, plus',
'%, pour cent',
],
},
// radical des mots
......@@ -39,6 +34,18 @@ exports.analyser = {
type: 'stemmer',
language: 'light_french',
},
// phonetic (alternative a fuziness pour les erreurs d'orthographe)
phonetic_filter: {
type: 'phonetic',
encoder: 'metaphone',
replace: false,
},
},
tokenizer: {
tokeniser_url_mail: {
type: 'uax_url_email',
max_token_length: 5,
},
},
analyzer: {
// français elevé
......@@ -50,7 +57,7 @@ exports.analyser = {
'french_synonym',
'french_stemmer',
'lowercase',
'asciifolding',
'phonetic_filter',
],
},
// français léger
......@@ -63,16 +70,19 @@ exports.analyser = {
'french_elision',
'icu_folding',
'lowercase',
'asciifolding',
'phonetic_filter',
],
},
// analyzer for url
url_analyzer: {
tokenizer: 'uax_url_email',
tokenizer: 'tokeniser_url_mail',
filter: [
'french_synonym',
'french_stemmer',
'french_elision',
'lowercase',
'icu_folding',
'lowercase',
'phonetic_filter',
],
},
},
......
......@@ -65,3 +65,14 @@ adminSection.route('/notif', {
});
},
});
adminSection.route('/test', {
name: 'testSearch',
action() {
BlazeLayout.render('mainLayoutTpl', {
sidebar: 'sidebarLayoutTpl',
main: 'testSearchTpl',
navbar: 'navbarLayoutTpl',
});
},
});
......@@ -18,6 +18,7 @@ import '/imports/ui/pages/admin/indexation/indexation';
import '/imports/ui/pages/admin/account/account';
import '/imports/ui/pages/admin/statistique/statistique';
import '/imports/ui/pages/notif/notif';
import '/imports/ui/components/testSearch/testSearch';
// Set up all routes in the app
import './adminRoutes';
......
......@@ -19,4 +19,6 @@ SyncedCron.add({
},
});
SyncedCron.start();
if (Meteor.isProduction) {
SyncedCron.start();
}
......@@ -12,7 +12,6 @@
</button>
</span>
</div>
<!--
{{#if listAutoCompleteResults}}
<div class="autocomplete-dropdown">
{{#each result in listAutoCompleteResults}}
......@@ -22,7 +21,6 @@
{{/each}}
</div>
{{/if}}
-->
{{/autoForm}}
</div>
......
......@@ -14,23 +14,25 @@ Template.searchTpl.helpers({
});
Template.searchTpl.events({
/* 'input #searchResult': (event) => {
'input input[name$=searchTerm]': (event) => {
event.preventDefault();
const term = event.target.value;
Meteor.callPromise('autoCompletion', term)
.then((results) => {
Session.set('autoCompleteResults', results);
});
}, */
if (term && term.length > 2) {
Meteor.callPromise('autoCompletion', term)
.then((results) => {
Session.set('autoCompleteResults', results);
});
}
},
'click .autocomplete-title': (event) => {
event.preventDefault();
const term = event.target.textContent;
$('#searchResult').val(term);
$('input[name$=searchTerm]').val(term);
Session.set('autoCompleteResults', null);
},
......
......@@ -10,12 +10,15 @@
<li>
<a href="/admin"><i class="fa fa-fw fa-etsy fa-20px" aria-hidden="true"></i> Indexation</a>
</li>
<li>
<a href="/admin/test"><i class="fa fa-fw fa-gear fa-20px" aria-hidden="true"></i> Tests</a>
</li>
{{#if grantedAccessAdmin}}
<li>
<a href="/admin/account"><i class="fa fa-fw fa-user fa-20px" aria-hidden="true"></i> Comptes</a>
</li>
<li>
<a href="/admin/stat"><i class="fa fa-fw fa-bar-chart-o fa-20px" aria-hidden="true"></i> Statistiques</a>
<a href="/admin/stat"><i class="fa fa-fw fa-bar-chart-o fa-20px" aria-hidden="true"></i> Statistiques</a>
</li>
{{/if}}
<li>
......
......@@ -5,10 +5,22 @@
<div class="panel panel-default wrapper">
<div class="panel-body">
<h4 class="text-center">Gestion de l'indexation en général</h4>
<button class="btn btn-danger" id="initElastic">Initialisation ElasticSearch</button>
<!--<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button>
<button class="btn btn-primary btn-reindex" id="apiReindexation">Réindexer API</button>
<button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button>-->
<div class="row">
<button class="btn btn-danger" id="initElastic">Initialisation ElasticSearch</button>
<!--<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button>
<button class="btn btn-primary btn-reindex" id="apiReindexation">Réindexer API</button>
<button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button>-->
</div>
<hr>
<div class="row">
<label>Liste des synonymes : </label>
<textarea class="form-control" name="synonym" id="" cols="30" rows="10"></textarea>
<div class="alert alert-warning">
<strong>Attention!</strong> Changer les synonymes nécessite une réindexation!
</div>
</div>
</div>
</div>
</template>
......@@ -6,6 +6,10 @@ import displayNotif from '/imports/api/notif/notifClient';
import './main.html';
Template.mainIndexationTpl.hooks({
rendered() {},
});
Template.mainIndexationTpl.events({
'click #initElastic': (event) => {
event.preventDefault();
......@@ -33,7 +37,7 @@ Template.mainIndexationTpl.events({
message: error.message,
save: true,
});
});
});
});
},
'click .btn-reindex': (event) => {
......
......@@ -4,6 +4,4 @@
{{> searchTpl ""}}
{{> testSearchTpl ""}}
</template>
......@@ -70,7 +70,7 @@ const checkData = {
// url for crawl
checkCrawlUrl(url) {
return !/\.(jpeg|jpg|gif|png|js|css|ico|eot|svg|woff|ttf|zip|rar|tar|pdf|xml|xlsx|docx)/.test(url);
return !/\.(jpeg|jpg|gif|png|js|css|ico|eot|svg|woff|ttf|zip|rar|tar|pdf|xml|xlsx|docx|mp3)/.test(url);
},
// clean url
......
pmi, protection maternelle et infantile
psycho, psychiatrie
anap, Agence nationale d\'appui à la performance des établissements de santé et médico-sociaux
ars, agences régionales de santé
c dans l'oxygene, c dans l'air
+, plus
%, pour cent
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment