Commit b668f68b authored by Nacim Goura's avatar Nacim Goura

optimization index job

parent 9d8e0551
......@@ -9,7 +9,7 @@ const hooksFormApiCrawl = {
userId: Meteor.userId(),
type: 'info',
title: 'indexApi',
message: `Début de l'indexation ${doc.type} pour ${doc.idPage}!`,
message: 'Ajouté à la liste de job!',
save: true,
});
return doc;
......
......@@ -8,6 +8,7 @@ export default class CrawlTwitter {
constructor(config, data) {
console.log('crawl Twittter');
this.name = data.name;
this.config = config;
this.data = data;
this.listDataForIndex = [];
......@@ -44,6 +45,7 @@ export default class CrawlTwitter {
_.forEach(tweets, (item, index) => {
const dataForIndex = {
tag: 'api',
jobName: this.name,
apiName: 'twitter',
domain: this.data.idPage,
url: `https://twitter.com/${this.data.idPage}/status/${item.id_str}`,
......
......@@ -9,6 +9,7 @@ export default class CrawlNetwork extends CrawlGeneric {
constructor(data) {
super();
this.name = data.name;
this.fileToIndex = [];
if (data.listFilename) {
this.fileToIndex = data.listFilename;
......@@ -40,6 +41,7 @@ export default class CrawlNetwork extends CrawlGeneric {
this.listDataForIndex.push({
tag: 'network',
jobName: this.name,
data: this.base64Encode(file.path),
createdAt: new Date(),
});
......
......@@ -48,8 +48,13 @@ export default class crawlWebsite extends CrawlGeneric {
console.log(`${urls.length} à parser!`);
const crawl = new Crawler({
retries: 5,
skipDuplicates: true,
userAgent: 'Mozilla/5.0 (compatible; fr-crawler/1.1)',
rotateUA: true,
userAgent: [
'Mozilla/5.0 (compatible; fr-crawler/1.1)',
'Googlebot/2.1 (+http://www.google.com/bot.html)',
],
callback: (error, res, done) => {
if (error || res.statusCode !== 200) {
this.listDataError.push({
......@@ -91,7 +96,8 @@ export default class crawlWebsite extends CrawlGeneric {
});
// if url has not parameter => parse data
if (!/(#.*|\?.*)/g.test(res.options.uri)) {
this.parseData(res.$, res.options.uri, listPdf)
const urlParse = res.options.uri.replace('https', 'http').replace(/\/$/, '');
this.parseData(res.$, urlParse, listPdf, res.headers.date)
.then(() => {
jobCollection.update({ name: this.name },
{
......@@ -123,8 +129,9 @@ export default class crawlWebsite extends CrawlGeneric {
* @param $
* @param currentUrl
* @param listPdf
* @param headerDate
*/
async parseData($, currentUrl, listPdf) {
async parseData($, currentUrl, listPdf, headerDate) {
const body = $('body');
if (this.config.excludeElement) {
$(this.config.excludeElement).remove();
......@@ -133,6 +140,7 @@ export default class crawlWebsite extends CrawlGeneric {
const title = checkData.cleanText($('title').text());
const dataForIndex = {
tag: 'website',
jobName: this.name,
domain: this.config.domain,
title,
title_suggest: {
......@@ -143,7 +151,7 @@ export default class crawlWebsite extends CrawlGeneric {
html: body.html(),
urlText: checkData.cleanText(decodeURI(currentUrl)),
url: decodeURI(currentUrl),
createdAt: new Date(),
createdAt: new Date(headerDate),
};
if ($(this.config.breadcrumb).text().length) {
......
import { Meteor } from 'meteor/meteor';
import pify from 'pify';
import { indexWebsite, indexApi, indexNetwork } from '/imports/api/indexation/methods';
import jobCollection from '/imports/api/job/jobCollection';
import elasticsearch from '/imports/libs/elasticsearch/elasticsearch';
export function deleteIndexJob(job) {
return elasticsearch.deleteByQuery({
index: job.userId.toLowerCase(),
type: job.type,
body: {
query: {
term: {
jobName: job.name,
},
},
},
});
}
export function addJob(data) {
console.log(data);
......@@ -26,7 +42,7 @@ export function addJob(data) {
* run job if stopped
* @param idJob
*/
export function startJob(idJob) {
export async function startJob(idJob) {
const job = jobCollection.findOne({ _id: idJob });
if (job && job.status === 'running') {
return {
......@@ -34,13 +50,14 @@ export function startJob(idJob) {
};
}
try {
jobCollection.update({ _id: idJob }, {
jobCollection.rawCollection().update({ _id: idJob }, {
$set: {
status: 'running',
numberIndexed: 0,
},
});
let jobIndex = null;
deleteIndexJob(job);
switch (job.type) {
case 'website':
jobIndex = indexWebsite(job.task);
......@@ -54,17 +71,16 @@ export function startJob(idJob) {
default:
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de l\'éxécution du job!');
}
return jobIndex.then((result) => {
jobCollection.update({ _id: idJob }, {
const result = await jobIndex;
await jobCollection.rawCollection().update({ _id: idJob }, {
$set: {
status: 'stopped',
lastExecutionDate: new Date(),
},
});
return result;
});
} catch (err) {
jobCollection.update({ _id: idJob }, {
jobCollection.rawCollection().update({ _id: idJob }, {
$set: {
status: 'stopped',
lastExecutionDate: new Date(),
......@@ -74,8 +90,19 @@ export function startJob(idJob) {
}
}
export function deleteJob(id) {
jobCollection.remove({ _id: id });
export async function deleteJob(id) {
const job = jobCollection.findOne({ _id: id });
if (job) {
try {
await deleteIndexJob(job);
return jobCollection.rawCollection().remove({ _id: id });
} catch (error) {
console.log(error);
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de la suppression du job!');
}
} else {
throw new Meteor.Error('Erreur', 'Une erreur s\'est produite lors de la suppression du job!');
}
}
Meteor.methods({
......
......@@ -22,7 +22,7 @@ const hooksFormLogin = {
title: 'Succès : ',
message: `Bienvenue ${result.login}!`,
});
FlowRouter.go('/admin');
FlowRouter.go('/');
const config = await Meteor.callPromise('getConfig');
Session.set('config', config);
},
......
......@@ -31,34 +31,43 @@ export default class Search {
query: term,
fuzziness: 'AUTO',
fields: [
'description.stemmed',
'body.stemmed',
'urlText.stemmed',
'title.stemmed',
],
},
},
],
}],
should: [{
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'description',
'description.stemmed',
'urlText',
'body',
'urlText.stemmed',
'title',
'url',
'h1',
'urlText',
'urlText.stemmed',
],
boost: 3.0,
},
}, {
multi_match: {
query: term,
fuzziness: 'AUTO',
fields: [
'body',
'breadcrumb',
'h1',
'h2',
'html',
'url',
],
boost: 1.0,
},
}],
minimum_should_match: 1,
boost: 2.0,
},
};
}
......
......@@ -27,8 +27,8 @@ SimpleSchema.testSearchCollection = new SimpleSchema({
type: String,
label: 'Url attendu : ',
autoValue() {
if (this.isInsert) {
return this.value.replace('https', 'http');
if (this.value) {
return this.value.replace('https', 'http').replace(/\/$/, '');
}
},
},
......
......@@ -72,6 +72,7 @@ exports.analyser = {
filter: [
'french_elision',
'lowercase',
'icu_folding',
],
},
},
......@@ -91,6 +92,9 @@ exports.mapping = {
tag: {
type: 'keyword',
},
jobName: {
type: 'keyword',
},
domain: {
type: 'keyword',
},
......
......@@ -142,6 +142,10 @@ export default {
});
},
deleteByQuery(query) {
return client.deleteByQuery(query);
},
/**
* Create mapping
* @param esIndex
......
import { Meteor } from 'meteor/meteor';
import { SyncedCron } from 'meteor/percolate:synced-cron';
import notifsCollection from '/imports/api/notif/notifsCollection';
import moment from 'moment';
......@@ -20,6 +19,4 @@ SyncedCron.add({
},
});
if (Meteor.isProduction) {
SyncedCron.start();
}
SyncedCron.start();
......@@ -3,6 +3,7 @@ import { Meteor } from 'meteor/meteor';
import { Template } from 'meteor/templating';
import displayNotif from '/imports/api/notif/notifClient';
import Files from '/imports/api/crawl/network/networkCollection';
import swal from 'sweetalert2';
import './buttonTabular.html';
......@@ -51,6 +52,30 @@ Template.jobActionTable.events({
});
},
'click .delete-job': function () {
Meteor.call('deleteJob', this._id);
swal({
title: 'Attention!',
text: 'Supprimer le job supprimera également les données liées à cet index!',
type: 'warning',
showCancelButton: true,
confirmButtonText: 'Oui!',
cancelButtonText: 'Non!',
}).then(() => {
Meteor.callPromise('deleteJob', this._id)
.then((result) => {
displayNotif({
type: 'success',
title: 'Job : ',
message: 'Job supprimé avec succès!',
save: true,
});
}).catch((error) => {
displayNotif({
type: 'error',
title: 'Job : ',
message: JSON.stringify(error.message),
save: true,
});
});
});
},
});
......@@ -6,9 +6,9 @@
<div class="panel-body">
<h4 class="text-center">Gestion de l'indexation en général</h4>
<button class="btn btn-danger" id="initElastic">Initialisation ElasticSearch</button>
<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button>
<!--<button class="btn btn-primary btn-reindex" id="websiteReindexation">Réindexer sites web</button>
<button class="btn btn-primary btn-reindex" id="apiReindexation">Réindexer API</button>
<button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button>
<button class="btn btn-primary btn-reindex" id="documentReindexation">Réindexer documents</button>-->
</div>
</div>
</template>
......@@ -1741,7 +1741,7 @@ isstream@~0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
izitoast@^1.1.1:
izitoast@^1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/izitoast/-/izitoast-1.1.4.tgz#a9f0ab7d9532dde9041a9657df15de6344b9e9d5"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment