Commit 29374670 authored by Nacim Goura's avatar Nacim Goura

add list for result

parent 986910a2
......@@ -23,7 +23,6 @@ export default class crawlWebsite extends CrawlGeneric {
this.urlWebsite = data.urlWebsite;
this.userId = this.config.userId;
this.config = _.find(this.config.listConfig, n => n.domain === data.nameConfig);
this.numberIndexed = 0;
return this.start();
}
......@@ -65,9 +64,7 @@ export default class crawlWebsite extends CrawlGeneric {
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
],
callback: (error, res, done) => {
if (crawl.queueSize % 1000 === 0) {
console.log(crawl.queueSize, res.options.uri);
}
console.log(crawl.queueSize, res.options.uri);
if (error || res.statusCode !== 200) {
this.listDataError.push({
url: res.options.uri,
......@@ -83,7 +80,7 @@ export default class crawlWebsite extends CrawlGeneric {
// clean url
toQueueUrl = checkData.cleanUrl(toQueueUrl);
// check if same domain name
if (toQueueUrl.includes(this.config.domain)) {
if (toQueueUrl.includes(this.config.domain) && !toQueueUrl.includes('recherche')) {
// check if already visited
if (!this.listUrlAlreadyVisited.includes(toQueueUrl)) {
// check if url has forbidden word
......@@ -106,15 +103,13 @@ export default class crawlWebsite extends CrawlGeneric {
const urlParse = res.options.uri.replace('https', 'http').replace(/\/$/, '');
this.parseData(res.$, urlParse, listPdf, res.headers.date)
.then(() => {
if (this.numberIndexed % 100 === 0) {
jobCollection.update({ name: this.name },
{
set: {
numberIndexed: this.numberIndexed,
},
jobCollection.update({ name: this.name },
{
$inc: {
numberIndexed: 1,
},
);
}
},
);
}).catch((err) => {
console.log(err);
});
......@@ -148,6 +143,11 @@ export default class crawlWebsite extends CrawlGeneric {
}
body.html(checkData.cleanHtml(body.html()));
const title = checkData.cleanText($('title').text());
const description = `${$('meta[name=description]').attr('content')}
${$('meta[property="og:title"]').attr('content')}
${$('meta[name=title]').attr('content')}
${$('meta[property="video:actor"]').attr('content')}
${$('meta[property="video:director"]').attr('content')} `;
const dataForIndex = {
tag: 'website',
jobName: this.name,
......@@ -157,10 +157,10 @@ export default class crawlWebsite extends CrawlGeneric {
// replace - and _ and multiple space for autocompletion
input: title.replace(/[-_]/g, ' ').replace(/[^\S]{2,}/, ' ').trim(),
},
description: $('meta[name=description]').attr('content'),
description: checkData.cleanText(description),
body: checkData.cleanText(body.text()),
html: body.html(),
urlText: checkData.cleanText(decodeURI(currentUrl)),
urlText: checkData.cleanText(decodeURI(currentUrl)).replace(/http|www|html/g, '').replace(/\.|-/g, ' '),
url: decodeURI(currentUrl),
createdAt: new Date(headerDate),
};
......@@ -187,8 +187,6 @@ export default class crawlWebsite extends CrawlGeneric {
this.listDataForIndex.push(dataForIndex);
this.numberIndexed++;
return this.contextIndex.indexOne(_index, _type, _id, dataForIndex);
}
}
<template name="listResultSearch">
<div class="panel panel-default wrapper">
<div class="panel-body">
{{#if websiteResults}}
<div class="row">
<div class="col-md-12">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de site</h3>
</div>
<div class="panel-body">
{{#each result in websiteResults.list}}
<li>
<a href="{{result.url}}" target="_blank"><b>{{result.title}}</b> <br>({{result.url}})</a>
</li>
{{/each}}
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de réseaux sociaux</h3>
</div>
<div class="panel-body">
{{#each result in apiResults.list}}
<li>
<a href="{{result.url}}" target="_blank">{{result.title}}</a>
</li>
{{/each}}
</div>
</div>
</div>
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de fichiers</h3>
</div>
<div class="panel-body">
{{#each result in documentResults.list}}
<li>
{{result.attachment.title}}
</li>
{{/each}}
</div>
</div>
</div>
</div>
{{else}}
<h3 class="text-center">Aucun résultat!</h3>
{{/if}}
</div>
</div>
</template>
import './list.html';
Template.listResultSearch.helpers({
websiteResults: () => Session.get('websiteResults'),
apiResults: () => Session.get('apiResults'),
documentResults: () => Session.get('documentResults'),
});
......@@ -4,54 +4,7 @@
<br>
{{#if websiteResults}}
<div class="row">
<div class="col-md-12">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de site</h3>
</div>
<div class="panel-body">
{{#each result in websiteResults.list}}
<li>
<a href="{{result.url}}" target="_blank"><b>{{result.title}}</b> <br>({{result.url}})</a>
</li>
{{/each}}
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de réseaux sociaux</h3>
</div>
<div class="panel-body">
{{#each result in apiResults.list}}
<li>
<a href="{{result.url}}" target="_blank">{{result.title}}</a>
</li>
{{/each}}
</div>
</div>
</div>
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de fichiers</h3>
</div>
<div class="panel-body">
{{#each result in documentResults.list}}
<li>
{{result.attachment.title}}
</li>
{{/each}}
</div>
</div>
</div>
</div>
{{/if}}
{{> tabsTpl module="resultSearch"}}
{{> Template.dynamic template=template}}
</template>
import { Template } from 'meteor/templating';
import { Session } from 'meteor/session';
import TabsCollection from '/imports/api/tabs/tabsCollection';
import '/imports/ui/components/tabs/tabs';
import '/imports/ui/components/resultSearch/list/list';
import './resultSearch.html';
/**
* define tab depending on session
*/
Template.resultSearchTpl.onCreated(() => {
Session.set('resultSearch', null);
const tabs = Meteor.settings.public.tabs.resultSearch;
TabsCollection.remove({});
tabs.forEach((tab) => {
TabsCollection.insert(tab);
});
});
Template.resultSearchTpl.helpers({
websiteResults: () => Session.get('websiteResults'),
apiResults: () => Session.get('apiResults'),
documentResults: () => Session.get('documentResults'),
template() {
const currentTab = Session.get('resultSearch');
return currentTab.layout;
},
});
......@@ -77,7 +77,7 @@ const checkData = {
cleanUrl(url) {
return url.trim() // remove space before and after
.replace('https', 'http') // replace https by http
.replace(/#.*/, '') // remove # and text after
.replace(/[#?].*/, '') // remove # and text after
.replace(/\/$/, '') // remove last slash
.toLowerCase(); // lowercase
},
......
......@@ -72,6 +72,17 @@
"closable": false,
"sort": 2
}
],
"resultSearch": [
{
"module": "resultSearch",
"layout": "listResultSearch",
"label": "Liste",
"state": {},
"activ": true,
"closable": false,
"sort": 1
}
]
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment