Commit b668f68b authored by Nacim Goura's avatar Nacim Goura

optimization index job

parent 9d8e0551
...@@ -9,7 +9,7 @@ const hooksFormApiCrawl = { ...@@ -9,7 +9,7 @@ const hooksFormApiCrawl = {
userId: Meteor.userId(), userId: Meteor.userId(),
type: 'info', type: 'info',
title: 'indexApi', title: 'indexApi',
message: `Début de l'indexation ${doc.type} pour ${doc.idPage}!`, message: 'Ajouté à la liste de job!',
save: true, save: true,
}); });
return doc; return doc;
......
...@@ -8,6 +8,7 @@ export default class CrawlTwitter { ...@@ -8,6 +8,7 @@ export default class CrawlTwitter {
constructor(config, data) { constructor(config, data) {
console.log('crawl Twittter'); console.log('crawl Twittter');
this.name = data.name;
this.config = config; this.config = config;
this.data = data; this.data = data;
this.listDataForIndex = []; this.listDataForIndex = [];
...@@ -44,6 +45,7 @@ export default class CrawlTwitter { ...@@ -44,6 +45,7 @@ export default class CrawlTwitter {
_.forEach(tweets, (item, index) => { _.forEach(tweets, (item, index) => {
const dataForIndex = { const dataForIndex = {
tag: 'api', tag: 'api',
jobName: this.name,
apiName: 'twitter', apiName: 'twitter',
domain: this.data.idPage, domain: this.data.idPage,
url: `https://twitter.com/${this.data.idPage}/status/${item.id_str}`, url: `https://twitter.com/${this.data.idPage}/status/${item.id_str}`,
......
...@@ -9,6 +9,7 @@ export default class CrawlNetwork extends CrawlGeneric { ...@@ -9,6 +9,7 @@ export default class CrawlNetwork extends CrawlGeneric {
constructor(data) { constructor(data) {
super(); super();
this.name = data.name;
this.fileToIndex = []; this.fileToIndex = [];
if (data.listFilename) { if (data.listFilename) {
this.fileToIndex = data.listFilename; this.fileToIndex = data.listFilename;
...@@ -40,6 +41,7 @@ export default class CrawlNetwork extends CrawlGeneric { ...@@ -40,6 +41,7 @@ export default class CrawlNetwork extends CrawlGeneric {
this.listDataForIndex.push({ this.listDataForIndex.push({
tag: 'network', tag: 'network',
jobName: this.name,
data: this.base64Encode(file.path), data: this.base64Encode(file.path),
createdAt: new Date(), createdAt: new Date(),
}); });
......
...@@ -48,8 +48,13 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -48,8 +48,13 @@ export default class crawlWebsite extends CrawlGeneric {
console.log(`${urls.length} à parser!`); console.log(`${urls.length} à parser!`);
const crawl = new Crawler({ const crawl = new Crawler({
retries: 5,
skipDuplicates: true, skipDuplicates: true,
userAgent: 'Mozilla/5.0 (compatible; fr-crawler/1.1)', rotateUA: true,
userAgent: [
'Mozilla/5.0 (compatible; fr-crawler/1.1)',
'Googlebot/2.1 (+http://www.google.com/bot.html)',
],
callback: (error, res, done) => { callback: (error, res, done) => {
if (error || res.statusCode !== 200) { if (error || res.statusCode !== 200) {
this.listDataError.push({ this.listDataError.push({
...@@ -91,7 +96,8 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -91,7 +96,8 @@ export default class crawlWebsite extends CrawlGeneric {
}); });
// if url has not parameter => parse data // if url has not parameter => parse data
if (!/(#.*|\?.*)/g.test(res.options.uri)) { if (!/(#.*|\?.*)/g.test(res.options.uri)) {
this.parseData(res.$, res.options.uri, listPdf) const urlParse = res.options.uri.replace('https', 'http').replace(/\/$/, '');
this.parseData(res.$, urlParse, listPdf, res.headers.date)
.then(() => { .then(() => {
jobCollection.update({ name: this.name }, jobCollection.update({ name: this.name },
{ {
...@@ -123,8 +129,9 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -123,8 +129,9 @@ export default class crawlWebsite extends CrawlGeneric {
* @param $ * @param $
* @param currentUrl * @param currentUrl
* @param listPdf * @param listPdf
* @param headerDate
*/ */
async parseData($, currentUrl, listPdf) { async parseData($, currentUrl, listPdf, headerDate) {
const body = $('body'); const body = $('body');
if (this.config.excludeElement) { if (this.config.excludeElement) {
$(this.config.excludeElement).remove(); $(this.config.excludeElement).remove();
...@@ -133,6 +140,7 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -133,6 +140,7 @@ export default class crawlWebsite extends CrawlGeneric {
const title = checkData.cleanText($('title').text()); const title = checkData.cleanText($('title').text());
const dataForIndex = { const dataForIndex = {
tag: 'website', tag: 'website',
jobName: this.name,
domain: this.config.domain, domain: this.config.domain,
title, title,
title_suggest: { title_suggest: {
...@@ -143,7 +151,7 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -143,7 +151,7 @@ export default class crawlWebsite extends CrawlGeneric {
html: body.html(), html: body.html(),
urlText: checkData.cleanText(decodeURI(currentUrl)), urlText: checkData.cleanText(decodeURI(currentUrl)),
url: decodeURI(currentUrl), url: decodeURI(currentUrl),
createdAt: new Date(), createdAt: new Date(headerDate),
}; };
if ($(this.config.breadcrumb).text().length) { if ($(this.config.breadcrumb).text().length) {
......
import { Meteor } from 'meteor/meteor'; import { Meteor } from 'meteor/meteor';
import pify from 'pify';
import { indexWebsite, indexApi, indexNetwork } from '/imports/api/indexation/methods'; import { indexWebsite, indexApi, indexNetwork } from '/imports/api/indexation/methods';
import jobCollection from '/imports/api/job/jobCollection'; import jobCollection from '/imports/api/job/jobCollection';
import elasticsearch from '/imports/libs/elasticsearch/elasticsearch';
export function deleteIndexJob(job) {
return elasticsearch.deleteByQuery({
index: job.userId.toLowerCase(),
type: job.type,
body: {
query: {
term: {
jobName: job.name,
},
},
},
});
}
export function addJob(data) { export function addJob(data) {
console.log(data); console.log(data);
...@@ -26,7 +42,7 @@ export function addJob(data) { ...@@ -26,7 +42,7 @@ export function addJob(data) {
* run job if stopped * run job if stopped
* @param idJob * @param idJob
*/ */
export function startJob(idJob) { export async function startJob(idJob) {
const job = jobCollection.findOne({ _id: idJob }); const job = jobCollection.findOne({ _id: idJob });
if (job && job.status === 'running') { if (job && job.status === 'running') {
return { return {
...@@ -34,13 +50,14 @@ export function startJob(idJob) { ...@@ -34,13 +50,14 @@ export function startJob(idJob) {
}; };
} }
try { try {
jobCollection.update({ _id: idJob }, { jobCollection.rawCollection().update({ _id: idJob }, {
$set: { $set: {
status: 'running', status: 'running',
numberIndexed: 0, numberIndexed: 0,
}, },
}); });
let jobIndex = null; let jobIndex = null;
deleteIndexJob(job);
switch (job.type) { switch (job.type) {
case 'website': case 'website':
jobIndex = indexWebsite(job.task); jobIndex = indexWebsite(job.task);
...@@ -54,17 +71,16 @@ export function startJob(idJob) { ...@@ -54,17 +71,16 @@ export function startJob(idJob) {
default: default:
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de l\'éxécution du job!'); throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de l\'éxécution du job!');
} }
return jobIndex.then((result) => { const result = await jobIndex;
jobCollection.update({ _id: idJob }, { await jobCollection.rawCollection().update({ _id: idJob }, {
$set: { $set: {
status: 'stopped', status: 'stopped',
lastExecutionDate: new Date(), lastExecutionDate: new Date(),
}, },
});
return result;
}); });
return result;
} catch (err) { } catch (err) {
jobCollection.update({ _id: idJob }, { jobCollection.rawCollection().update({ _id: idJob }, {
$set: { $set: {
status: 'stopped', status: 'stopped',
lastExecutionDate: new Date(), lastExecutionDate: new Date(),
...@@ -74,8 +90,19 @@ export function startJob(idJob) { ...@@ -74,8 +90,19 @@ export function startJob(idJob) {
} }
} }
export function deleteJob(id) { export async function deleteJob(id) {
jobCollection.remove({ _id: id }); const job = jobCollection.findOne({ _id: id });
if (job) {
try {
await deleteIndexJob(job);
return jobCollection.rawCollection().remove({ _id: id });
} catch (error) {
console.log(error);
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de la suppression du job!');
}
} else {
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de la suppression du job!');
}
} }
Meteor.methods({ Meteor.methods({
......
...@@ -22,7 +22,7 @@ const hooksFormLogin = { ...@@ -22,7 +22,7 @@ const hooksFormLogin = {
title: 'Succès : ', title: 'Succès : ',
message: `Bienvenue ${result.login}!`, message: `Bienvenue ${result.login}!`,
}); });
FlowRouter.go('/admin'); FlowRouter.go('/');
const config = await Meteor.callPromise('getConfig'); const config = await Meteor.callPromise('getConfig');
Session.set('config', config); Session.set('config', config);
}, },
......
...@@ -31,34 +31,43 @@ export default class Search { ...@@ -31,34 +31,43 @@ export default class Search {
query: term, query: term,
fuzziness: 'AUTO', fuzziness: 'AUTO',
fields: [ fields: [
'description.stemmed',
'body.stemmed', 'body.stemmed',
'urlText.stemmed',
'title.stemmed', 'title.stemmed',
], ],
}, },
}, }],
],
should: [{ should: [{
multi_match: { multi_match: {
query: term, query: term,
fuzziness: 'AUTO', fuzziness: 'AUTO',
fields: [ fields: [
'description', 'description',
'description.stemmed',
'urlText', 'urlText',
'body', 'urlText.stemmed',
'title', 'title',
'url', 'url',
'h1',
'urlText',
'urlText.stemmed',
],
boost: 3.0,
},
}, {
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'body',
'breadcrumb', 'breadcrumb',
'h1', 'h1',
'h2', 'h2',
'html', 'html',
'url',
], ],
boost: 1.0,
}, },
}], }],
minimum_should_match: 1, minimum_should_match: 1,
boost: 2.0,
}, },
}; };
} }
......
...@@ -27,8 +27,8 @@ SimpleSchema.testSearchCollection = new SimpleSchema({ ...@@ -27,8 +27,8 @@ SimpleSchema.testSearchCollection = new SimpleSchema({
type: String, type: String,
label: 'Url attendu : ', label: 'Url attendu : ',
autoValue() { autoValue() {
if (this.isInsert) { if (this.value) {
return this.value.replace('https', 'http'); return this.value.replace('https', 'http').replace(/\/$/, '');
} }
}, },
}, },
......
...@@ -72,6 +72,7 @@ exports.analyser = { ...@@ -72,6 +72,7 @@ exports.analyser = {
filter: [ filter: [
'french_elision', 'french_elision',
'lowercase', 'lowercase',
'icu_folding',
], ],
}, },
}, },
...@@ -91,6 +92,9 @@ exports.mapping = { ...@@ -91,6 +92,9 @@ exports.mapping = {
tag: { tag: {
type: 'keyword', type: 'keyword',
}, },
jobName: {
type: 'keyword',
},
domain: { domain: {
type: 'keyword', type: 'keyword',
}, },
......
...@@ -142,6 +142,10 @@ export default { ...@@ -142,6 +142,10 @@ export default {
}); });
}, },
deleteByQuery(query) {
return client.deleteByQuery(query);
},
/** /**
* Create mapping * Create mapping
* @param esIndex * @param esIndex
......
import { Meteor } from 'meteor/meteor';
import { SyncedCron } from 'meteor/percolate:synced-cron'; import { SyncedCron } from 'meteor/percolate:synced-cron';
import notifsCollection from '/imports/api/notif/notifsCollection'; import notifsCollection from '/imports/api/notif/notifsCollection';
import moment from 'moment'; import moment from 'moment';
...@@ -20,6 +19,4 @@ SyncedCron.add({ ...@@ -20,6 +19,4 @@ SyncedCron.add({
}, },
}); });
if (Meteor.isProduction) { SyncedCron.start();
SyncedCron.start();
}
...@@ -3,6 +3,7 @@ import { Meteor } from 'meteor/meteor'; ...@@ -3,6 +3,7 @@ import { Meteor } from 'meteor/meteor';
import { Template } from 'meteor/templating'; import { Template } from 'meteor/templating';
import displayNotif from '/imports/api/notif/notifClient'; import displayNotif from '/imports/api/notif/notifClient';
import Files from '/imports/api/crawl/network/networkCollection'; import Files from '/imports/api/crawl/network/networkCollection';
import swal from 'sweetalert2';
import './buttonTabular.html'; import './buttonTabular.html';
...@@ -51,6 +52,30 @@ Template.jobActionTable.events({ ...@@ -51,6 +52,30 @@ Template.jobActionTable.events({
}); });
}, },
'click .delete-job': function () { 'click .delete-job': function () {
Meteor.call('deleteJob', this._id); swal({
title: 'Attention!',
text: 'Supprimer le job supprimera également les données liées à cet index!',
type: 'warning',
showCancelButton: true,
confirmButtonText: 'Oui!',
cancelButtonText: 'Non!',
}).then(() => {
Meteor.callPromise('deleteJob', this._id)
.then((result) => {
displayNotif({
type: 'success',
title: 'Job : ',
message: 'Job supprimé avec succès!',
save: true,
});
}).catch((error) => {
displayNotif({
type: 'error',
title: 'Job : ',
message: JSON.stringify(error.message),
save: true,
});
});
});
}, },
}); });
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
<div class="panel-body"> <div class="panel-body">
<h4 class="text-center">Gestion de l'indexation en général</h4> <h4 class="text-center">Gestion de l'indexation en général</h4>
<button class="btn btn-danger" id="initElastic">Initialisation ElasticSearch</button> <button class="btn btn-danger" id="initElastic">Initialisation ElasticSearch</button>
<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button> <!--<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button>
<button class="btn btn-primary btn-reindex" id="apiReindexation">Réindexer API</button> <button class="btn btn-primary btn-reindex" id="apiReindexation">Réindexer API</button>
<button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button> <button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button>-->
</div> </div>
</div> </div>
</template> </template>
...@@ -1741,7 +1741,7 @@ isstream@~0.1.2: ...@@ -1741,7 +1741,7 @@ isstream@~0.1.2:
version "0.1.2" version "0.1.2"
resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a" resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
izitoast@^1.1.1: izitoast@^1.1.4:
version "1.1.4" version "1.1.4"
resolved "https://registry.yarnpkg.com/izitoast/-/izitoast-1.1.4.tgz#a9f0ab7d9532dde9041a9657df15de6344b9e9d5" resolved "https://registry.yarnpkg.com/izitoast/-/izitoast-1.1.4.tgz#a9f0ab7d9532dde9041a9657df15de6344b9e9d5"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment