Commit 230545f8 authored by Nacim Goura's avatar Nacim Goura

add reglage for crawl

parent 81979ffb
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
"import/extensions": ["off", "never"], "import/extensions": ["off", "never"],
"import/no-extraneous-dependencies": "off", "import/no-extraneous-dependencies": "off",
"no-underscore-dangle": "off", "no-underscore-dangle": "off",
"class-methods-use-this": "off" "class-methods-use-this": "off",
"no-param-reassign": "off",
"meteor/no-session": "off"
} }
} }
...@@ -45,3 +45,4 @@ practicalmeteor:chai ...@@ -45,3 +45,4 @@ practicalmeteor:chai
deanius:promise deanius:promise
dynamic-import dynamic-import
aldeed:tabular aldeed:tabular
ajduke:bootstrap-tagsinput
accounts-base@1.3.0 accounts-base@1.3.0
accounts-password@1.3.6 accounts-password@1.3.6
ajduke:bootstrap-tagsinput@0.7.1
alanning:roles@1.2.16 alanning:roles@1.2.16
aldeed:autoform@6.2.0 aldeed:autoform@6.2.0
aldeed:collection2-core@2.0.1 aldeed:collection2-core@2.0.1
...@@ -99,6 +100,7 @@ templating-tools@1.1.2 ...@@ -99,6 +100,7 @@ templating-tools@1.1.2
tmeasday:check-npm-versions@0.3.1 tmeasday:check-npm-versions@0.3.1
tmeasday:test-reporter-helpers@0.2.1 tmeasday:test-reporter-helpers@0.2.1
tracker@1.1.3 tracker@1.1.3
twbs:bootstrap@3.3.6
ui@1.0.13 ui@1.0.13
underscore@1.0.10 underscore@1.0.10
url@1.1.0 url@1.1.0
......
,ngoura,ngoura,15.06.2017 09:13,file:///home/ngoura/.config/libreoffice/4;
\ No newline at end of file
import SimpleSchema from 'simpl-schema';
import { check } from 'meteor/check'; import { check } from 'meteor/check';
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import { Accounts } from 'meteor/accounts-base'; import { Accounts } from 'meteor/accounts-base';
Meteor.methods({ Meteor.methods({
// add account
addAccount(user) { addAccount(user) {
check(user, Object); // validate user data
new SimpleSchema({
email: {
type: String,
regEx: SimpleSchema.RegEx.Email,
},
username: String,
password: String,
}).validate(user);
const id = Accounts.createUser(user); const id = Accounts.createUser(user);
if (!id) { if (!id) {
throw new Meteor.Error('Error', 'Impossible de se connecter'); throw new Meteor.Error('Error', 'Impossible de se connecter');
} }
}, },
// delete account
deleteAccount(id) { deleteAccount(id) {
check(id, String); check(id, String);
Meteor.users.remove({ _id: id }); Meteor.users.remove({ _id: id });
......
import { check } from 'meteor/check';
import { Meteor } from 'meteor/meteor';
import configCollection from '../../collections/configCollection';
Meteor.methods({
// define config
defineConfig(config) {
check(config, Object);
const oldConfig = configCollection.find({ userId: Meteor.userId() }).fetch();
if (config.forbiddenWord) {
config.forbiddenWord = config.forbiddenWord.split(',');
}
if (config.domain) {
config.domain = config.domain.replace('https', 'http');
}
// if config already exist, replace old value with new
if (oldConfig && oldConfig.length) {
configCollection.update(oldConfig[0]._id, {
$set: {
domain: config.domain || oldConfig.domain,
forbiddenWord: config.forbiddenWord || oldConfig.forbiddenWord,
},
});
} else {
// else insert
config.userId = Meteor.userId();
configCollection.insert(config);
}
},
getConfig() {
return configCollection.findOne({ userId: Meteor.userId() });
},
});
...@@ -10,11 +10,12 @@ export default class crawlWebsite { ...@@ -10,11 +10,12 @@ export default class crawlWebsite {
constructor(url) { constructor(url) {
this.urls = []; this.urls = [];
this.crawler = new Crawler(url); this.crawler = new Crawler(url);
let i = 0;
this.crawler.on('fetchcomplete', (queueItem) => { this.crawler.on('fetchcomplete', (queueItem) => {
const url = queueItem.url; const url = queueItem.url;
console.log(i++);
if (checkData.checkURL(url)) { if (checkData.checkURL(url)) {
console.log(url);
this.urls.push(url); this.urls.push(url);
} }
}); });
......
...@@ -29,6 +29,7 @@ exports.analyser = { ...@@ -29,6 +29,7 @@ exports.analyser = {
expand: true, expand: true,
synonyms: [ synonyms: [
'gosse, enfant', 'gosse, enfant',
'pmi, protection maternelle et infantile',
], ],
}, },
french_stemmer: { french_stemmer: {
...@@ -55,6 +56,9 @@ exports.analyser = { ...@@ -55,6 +56,9 @@ exports.analyser = {
'lowercase', 'lowercase',
], ],
}, },
url_analyzer: {
tokenizer: 'uax_url_email',
},
}, },
}, },
}, },
...@@ -107,7 +111,17 @@ exports.mapping = { ...@@ -107,7 +111,17 @@ exports.mapping = {
}, },
url: { url: {
type: 'text', type: 'text',
analyzer: 'standard', analyzer: 'url_analyzer',
},
urlText: {
type: 'text',
analyzer: 'french_light',
fields: {
stemmed: {
type: 'text',
analyzer: 'french_heavy',
},
},
}, },
h1: { h1: {
type: 'text', type: 'text',
......
...@@ -64,6 +64,8 @@ export default class IndexGeneric { ...@@ -64,6 +64,8 @@ export default class IndexGeneric {
'description.stemmed', 'description.stemmed',
'body', 'body',
'body.stemmed', 'body.stemmed',
'urlText',
'urlText.stemmed',
'url', 'url',
], ],
}, },
......
import url from 'url';
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import Sitemapper from 'sitemapper'; import Sitemapper from 'sitemapper';
import Crawler from 'crawler'; import Crawler from 'crawler';
...@@ -12,23 +13,28 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -12,23 +13,28 @@ export default class IndexWebsite extends IndexGeneric {
/** /**
* realize indexation * realize indexation
* @param url * @param urlWebsite
* @returns Promise * @returns Promise
*/ */
async initIndexation(url) { async initIndexation(urlWebsite) {
const crawl = new CrawlWebsite(url); this.config = {
domain: url.parse(urlWebsite).hostname,
forbiddenWord: [],
};
this.config = await Meteor.call('getConfig', {});
/* let urls = [url]; console.log(this.config);
let urls = [urlWebsite];
// si c'est un sitemap on récupère ces url // si c'est un sitemap on récupère ces url
if (checkData.isSitemap(url)) { if (checkData.isSitemap(urlWebsite)) {
const { sites } = await sitemap.fetch(url); const { sites } = await sitemap.fetch(urlWebsite);
urls = sites; urls = sites;
} }
return this.crawlUrl(_.uniq(urls)) return this.crawlUrl(_.uniq(urls))
.then(dataToIndex => this.indexByBulk(dataToIndex)) .then(dataToIndex => this.indexByBulk(dataToIndex))
.catch((error) => { .catch((error) => {
throw error; throw error;
});*/ });
} }
/** /**
...@@ -44,18 +50,39 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -44,18 +50,39 @@ export default class IndexWebsite extends IndexGeneric {
this.listDataError = []; this.listDataError = [];
this.listDataForIndex = []; this.listDataForIndex = [];
this.listUrlAlreadyVisited = urls;
this.listPdf = [];
console.log(`${urls.length} à parser!`); console.log(`${urls.length} à parser!`);
const crawl = new Crawler({ const crawl = new Crawler({
skipDuplicates: true,
callback: (error, res, done) => { callback: (error, res, done) => {
if (error || res.statusCode !== 200) { if (error || res.statusCode !== 200) {
this.listDataError.push({ this.listDataError.push({
url: res.options.uri, url: res.options.uri,
error: error || res.statusCode, error: error || res.statusCode,
}); });
} else { } else if (res && res.$) {
res.$('a').each((index, a) => {
const urlHref = res.$(a).attr('href');
if (urlHref) {
const toQueueUrl = url.resolve(res.request.uri.href, res.$(a).attr('href'));
if (toQueueUrl.includes(this.config.domain) && !this.listUrlAlreadyVisited.includes(toQueueUrl)) {
if (!new RegExp(this.config.forbiddenWord.join('|')).test(toQueueUrl)) {
this.listUrlAlreadyVisited.push(toQueueUrl);
console.log(toQueueUrl);
crawl.queue(toQueueUrl);
}
}
}
});
this.parseData(res.$, res.options.uri); this.parseData(res.$, res.options.uri);
} else {
if (!this.listUrlAlreadyVisited.includes(res.options.uri)) {
this.listUrlAlreadyVisited.push(res.options.uri);
this.listPdf.push(res.options.uri);
}
} }
done(); done();
...@@ -65,6 +92,7 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -65,6 +92,7 @@ export default class IndexWebsite extends IndexGeneric {
crawl.queue(urls); crawl.queue(urls);
crawl.on('drain', () => { crawl.on('drain', () => {
console.error(this.listDataError); console.error(this.listDataError);
console.log(this.listPdf);
resolve(this.listDataForIndex); resolve(this.listDataForIndex);
}); });
}); });
...@@ -75,7 +103,7 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -75,7 +103,7 @@ export default class IndexWebsite extends IndexGeneric {
* @param $ * @param $
* @param url * @param url
*/ */
parseData($, url) { parseData($, currentUrl) {
const body = $('body'); const body = $('body');
body.html(checkData.cleanHtml(body.html())); body.html(checkData.cleanHtml(body.html()));
const title = checkData.cleanText($('title').text()); const title = checkData.cleanText($('title').text());
...@@ -88,7 +116,8 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -88,7 +116,8 @@ export default class IndexWebsite extends IndexGeneric {
description: $('meta[name=description]').attr('content'), description: $('meta[name=description]').attr('content'),
body: checkData.slugText(checkData.cleanText(body.text())), body: checkData.slugText(checkData.cleanText(body.text())),
html: checkData.cleanText(body.html()), html: checkData.cleanText(body.html()),
url: decodeURI(url), urlText: checkData.slugText(checkData.cleanText(decodeURI(currentUrl))),
url: decodeURI(currentUrl),
createdAt: new Date(), createdAt: new Date(),
}; };
...@@ -108,7 +137,7 @@ export default class IndexWebsite extends IndexGeneric { ...@@ -108,7 +137,7 @@ export default class IndexWebsite extends IndexGeneric {
index: { index: {
_index: Meteor.settings.private.elasticsearch.esIndex, _index: Meteor.settings.private.elasticsearch.esIndex,
_type: Meteor.settings.private.elasticsearch.esType, _type: Meteor.settings.private.elasticsearch.esType,
_id: url, _id: currentUrl,
}, },
}); });
......
import { check } from 'meteor/check'; import SimpleSchema from 'simpl-schema';
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import _ from 'lodash'; import _ from 'lodash';
import testSearchCollection from '../../collections/testSearchCollection'; import testSearchCollection from '../../collections/testSearchCollection';
Meteor.methods({ Meteor.methods({
addTest(test) { addTest(test) {
check(test, Object); // test data
new SimpleSchema({
urlExpected: {
type: String,
regEx: SimpleSchema.RegEx.Url,
},
term: String,
}).validate(test);
// replace https by http
test.urlExpected = _.replace(test.urlExpected, 'https', 'http'); test.urlExpected = _.replace(test.urlExpected, 'https', 'http');
// call search
Meteor.call('searchByTerm', test.term, Meteor.bindEnvironment((err, results) => { Meteor.call('searchByTerm', test.term, Meteor.bindEnvironment((err, results) => {
if (err) { if (err) {
console.log(err); console.log(err);
...@@ -18,8 +29,17 @@ Meteor.methods({ ...@@ -18,8 +29,17 @@ Meteor.methods({
test.urlPosition = index + 1; test.urlPosition = index + 1;
} }
}); });
const oldTest = testSearchCollection.find({ term: test.term }).fetch();
if (oldTest && oldTest.length) {
testSearchCollection.update(oldTest[0]._id, {
$set: {
urlPosition: test.urlPosition,
},
});
} else {
testSearchCollection.insert(test); testSearchCollection.insert(test);
} }
}
})); }));
}, },
deleteTest(id) { deleteTest(id) {
......
import SimpleSchema from 'simpl-schema';
import { Mongo } from 'meteor/mongo';
/**
* this collection is for config (crawl, api, index)
* @type {Mongo.Collection}
*/
const configCollection = new Mongo.Collection('configs');
SimpleSchema.configCollection = new SimpleSchema({
userId: {
type: String,
},
domain: {
type: String,
regEx: SimpleSchema.RegEx.Url,
},
forbiddenWord: {
type: Array,
},
'forbiddenWord.$': {
type: String,
},
});
configCollection.attachSchema(SimpleSchema.configCollection);
export default configCollection;
...@@ -3,7 +3,7 @@ import SimpleSchema from 'simpl-schema'; ...@@ -3,7 +3,7 @@ import SimpleSchema from 'simpl-schema';
import { Mongo } from 'meteor/mongo'; import { Mongo } from 'meteor/mongo';
/** /**
* this local collection keep tracks of all tabs of this application * this collection is temporaire for testsearch
* @type {Mongo.Collection} * @type {Mongo.Collection}
*/ */
const testSearchCollection = new Mongo.Collection('testSearch'); const testSearchCollection = new Mongo.Collection('testSearch');
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import '../../api/indexation/methods'; import '../../api/indexation/methods';
import '../../api/account/methods'; import '../../api/account/methods';
import '../../api/config/methods';
import '../../api/testSearch/methods'; import '../../api/testSearch/methods';
import '../../tabular/tabularUser'; import '../../tabular/tabularUser';
import '../../tabular/tabularTestSearch'; import '../../tabular/tabularTestSearch';
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
<div class="panel-body"> <div class="panel-body">
{{> tabular table=TabularTables.testSearch class="table table-striped table-bordered table-condensed text-center"}} {{> tabular table=TabularTables.testSearch class="table table-striped table-bordered table-condensed text-center"}}
</div> </div>
<div class="panel-footer">
<button class="btn btn-primary launch-test">Relancer les tests</button>
</div>
</div> </div>
......
...@@ -20,6 +20,11 @@ Template.testSearchTpl.events({ ...@@ -20,6 +20,11 @@ Template.testSearchTpl.events({
console.log(error); console.log(error);
}); });
}, },
'click .launch-test': (event) => {
event.preventDefault();
},
}); });
Template.testSearchActionTable.events({ Template.testSearchActionTable.events({
......
...@@ -6,6 +6,7 @@ import displayNotif from '../../../../components/notifs/notifs'; ...@@ -6,6 +6,7 @@ import displayNotif from '../../../../components/notifs/notifs';
import './add.html'; import './add.html';
Template.addAccountTpl.events({ Template.addAccountTpl.events({
// add account
'submit form': (event) => { 'submit form': (event) => {
event.preventDefault(); event.preventDefault();
......
<template name="apiIndexationTpl"> <template name="apiIndexationTpl">
<div class="panel panel-default wrapper"> <div class="panel panel-default wrapper">
<div class="panel-body"> <div class="panel-body">
<h3 class="text-center">Gestion de l'indexation des API</h3> <h4 class="text-center">Gestion de l'indexation des API</h4>
</div> </div>
</div> </div>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
<div class="panel panel-default wrapper"> <div class="panel panel-default wrapper">
<div class="panel-body"> <div class="panel-body">
<h3 class="text-center">Gestion de l'indexation en général</h3> <h4 class="text-center">Gestion de l'indexation en général</h4>
<button class="btn btn-info" id="initElastic">Initialisation ElasticSearch</button> <button class="btn btn-info" id="initElastic">Initialisation ElasticSearch</button>
<button class="btn btn-primary" id="reindexElastic">Réindexation ElasticSearch</button> <button class="btn btn-primary" id="reindexElastic">Réindexation ElasticSearch</button>
</div> </div>
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
<div class="panel panel-default wrapper"> <div class="panel panel-default wrapper">
<div class="panel-body"> <div class="panel-body">
<h3 class="text-center">Gestion de l'indexation des sites</h3> <h4 class="text-center">Gestion de l'indexation des sites</h4>
<form class="form-horizontal" id="formUrlSite"> <form id="formUrlSite" class="form-horizontal" method="post">
<label class="control-label">Url du site ou du sitemap à indexer : </label> <label class="control-label">Url du site ou du sitemap à indexer : </label>
<div class="input-group"> <div class="input-group">
<input type="url" class="form-control" placeholder="Url" required="required" name="urlSite"> <input type="url" class="form-control" placeholder="Url" required="required" name="urlSite">
...@@ -19,6 +19,23 @@ ...@@ -19,6 +19,23 @@
Indexation en cours... Indexation en cours...
</h4> </h4>
</form> </form>
<hr>
<h4 class="text-center">Réglages</h4>
<div class="row">
<form id="formReglageSite" class="form-horizontal" method="post">
<div class="form-group">
<label>Nom de domaine du site</label>
<input type="text" class="form-control hostUrlWebsite" name="hostUrlWebsite" required="true">
</div>
<div class="form-group">
<label>Mot dans l'url à ne pas indexer (séparé par une virgule)</label>
<input type="text" class="form-control forbiddenWordWebsite" name="forbiddenWordWebsite" data-role="tagsinput" required="true">
</div>
<input type="submit" class="btn btn-success" value="Valider">
</form>
</div>
</div> </div>
</div> </div>
</template> </template>
...@@ -7,6 +7,20 @@ import displayNotif from '../../../../components/notifs/notifs.js'; ...@@ -7,6 +7,20 @@ import displayNotif from '../../../../components/notifs/notifs.js';
import './site.html'; import './site.html';
Template.siteIndexationTpl.onRendered(() => {
$('.forbiddenWordWebsite').tagsinput();
Meteor.callPromise('getConfig', {})
.then((config) => {
$('.hostUrlWebsite').val(config.domain);
if (config.forbiddenWord && config.forbiddenWord.length) {
config.forbiddenWord = config.forbiddenWord.join(',');
$('.forbiddenWordWebsite').tagsinput('add', config.forbiddenWord);
}
$('.bootstrap-tagsinput').addClass('form-control');
});
});
Template.siteIndexationTpl.events({ Template.siteIndexationTpl.events({
'submit #formUrlSite': (event) => { 'submit #formUrlSite': (event) => {
event.preventDefault(); event.preventDefault();
...@@ -42,4 +56,31 @@ Template.siteIndexationTpl.events({ ...@@ -42,4 +56,31 @@ Template.siteIndexationTpl.events({
}); });
event.target.reset(); event.target.reset();
}, },
'submit #formReglageSite': (event) => {
event.preventDefault();
const config = {
domain: event.target.hostUrlWebsite.value,
forbiddenWord: event.target.forbiddenWordWebsite.value,
};
Meteor.callPromise('defineConfig', config)
.then(() => {
displayNotif({
type: 'success',
title: 'Configuration : ',
message: 'Configuration ajouté avec succès!',
save: true,
});
}).catch((error) => {
displayNotif({
type: 'error',
title: 'Configuration : ',
message: error.reason ? error.reason : error,
save: true,
});
});
event.target.reset();
$('.forbiddenWordWebsite').tagsinput('removeAll');
},
}); });
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import { Session } from 'meteor/session';
import { FlowRouter } from 'meteor/kadira:flow-router'; import { FlowRouter } from 'meteor/kadira:flow-router';
import { Template } from 'meteor/templating'; import { Template } from 'meteor/templating';
import displayNotif from '../../components/notifs/notifs'; import displayNotif from '../../components/notifs/notifs';
...@@ -29,6 +30,12 @@ Template.loginTpl.events({ ...@@ -29,6 +30,12 @@ Template.loginTpl.events({
title: 'Succès : ', title: 'Succès : ',
message: `Bienvenue ${user.login}!`, message: `Bienvenue ${user.login}!`,
}); });
// get global config for user
Meteor.callPromise('getConfig', {})
.then((config) => {
console.log(config);
Session.set('config', config);
});
} }
}); });
}, },
......