Commit 29374670 authored by Nacim Goura's avatar Nacim Goura

add list for result

parent 986910a2
...@@ -23,7 +23,6 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -23,7 +23,6 @@ export default class crawlWebsite extends CrawlGeneric {
this.urlWebsite = data.urlWebsite; this.urlWebsite = data.urlWebsite;
this.userId = this.config.userId; this.userId = this.config.userId;
this.config = _.find(this.config.listConfig, n => n.domain === data.nameConfig); this.config = _.find(this.config.listConfig, n => n.domain === data.nameConfig);
this.numberIndexed = 0;
return this.start(); return this.start();
} }
...@@ -65,9 +64,7 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -65,9 +64,7 @@ export default class crawlWebsite extends CrawlGeneric {
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
], ],
callback: (error, res, done) => { callback: (error, res, done) => {
if (crawl.queueSize % 1000 === 0) { console.log(crawl.queueSize, res.options.uri);
console.log(crawl.queueSize, res.options.uri);
}
if (error || res.statusCode !== 200) { if (error || res.statusCode !== 200) {
this.listDataError.push({ this.listDataError.push({
url: res.options.uri, url: res.options.uri,
...@@ -83,7 +80,7 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -83,7 +80,7 @@ export default class crawlWebsite extends CrawlGeneric {
// clean url // clean url
toQueueUrl = checkData.cleanUrl(toQueueUrl); toQueueUrl = checkData.cleanUrl(toQueueUrl);
// check if same domain name // check if same domain name
if (toQueueUrl.includes(this.config.domain)) { if (toQueueUrl.includes(this.config.domain) && !toQueueUrl.includes('recherche')) {
// check if already visited // check if already visited
if (!this.listUrlAlreadyVisited.includes(toQueueUrl)) { if (!this.listUrlAlreadyVisited.includes(toQueueUrl)) {
// check if url has forbidden word // check if url has forbidden word
...@@ -106,15 +103,13 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -106,15 +103,13 @@ export default class crawlWebsite extends CrawlGeneric {
const urlParse = res.options.uri.replace('https', 'http').replace(/\/$/, ''); const urlParse = res.options.uri.replace('https', 'http').replace(/\/$/, '');
this.parseData(res.$, urlParse, listPdf, res.headers.date) this.parseData(res.$, urlParse, listPdf, res.headers.date)
.then(() => { .then(() => {
if (this.numberIndexed % 100 === 0) { jobCollection.update({ name: this.name },
jobCollection.update({ name: this.name }, {
{ $inc: {
set: { numberIndexed: 1,
numberIndexed: this.numberIndexed,
},
}, },
); },
} );
}).catch((err) => { }).catch((err) => {
console.log(err); console.log(err);
}); });
...@@ -148,6 +143,11 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -148,6 +143,11 @@ export default class crawlWebsite extends CrawlGeneric {
} }
body.html(checkData.cleanHtml(body.html())); body.html(checkData.cleanHtml(body.html()));
const title = checkData.cleanText($('title').text()); const title = checkData.cleanText($('title').text());
const description = `${$('meta[name=description]').attr('content')}
${$('meta[property="og:title"]').attr('content')}
${$('meta[name=title]').attr('content')}
${$('meta[property="video:actor"]').attr('content')}
${$('meta[property="video:director"]').attr('content')} `;
const dataForIndex = { const dataForIndex = {
tag: 'website', tag: 'website',
jobName: this.name, jobName: this.name,
...@@ -157,10 +157,10 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -157,10 +157,10 @@ export default class crawlWebsite extends CrawlGeneric {
// replace - and _ and multiple space for autocompletion // replace - and _ and multiple space for autocompletion
input: title.replace(/[-_]/g, ' ').replace(/[^\S]{2,}/, ' ').trim(), input: title.replace(/[-_]/g, ' ').replace(/[^\S]{2,}/, ' ').trim(),
}, },
description: $('meta[name=description]').attr('content'), description: checkData.cleanText(description),
body: checkData.cleanText(body.text()), body: checkData.cleanText(body.text()),
html: body.html(), html: body.html(),
urlText: checkData.cleanText(decodeURI(currentUrl)), urlText: checkData.cleanText(decodeURI(currentUrl)).replace(/http|www|html/g, '').replace(/\.|-/g, ' '),
url: decodeURI(currentUrl), url: decodeURI(currentUrl),
createdAt: new Date(headerDate), createdAt: new Date(headerDate),
}; };
...@@ -187,8 +187,6 @@ export default class crawlWebsite extends CrawlGeneric { ...@@ -187,8 +187,6 @@ export default class crawlWebsite extends CrawlGeneric {
this.listDataForIndex.push(dataForIndex); this.listDataForIndex.push(dataForIndex);
this.numberIndexed++;
return this.contextIndex.indexOne(_index, _type, _id, dataForIndex); return this.contextIndex.indexOne(_index, _type, _id, dataForIndex);
} }
} }
<template name="listResultSearch">
<div class="panel panel-default wrapper">
<div class="panel-body">
{{#if websiteResults}}
<div class="row">
<div class="col-md-12">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de site</h3>
</div>
<div class="panel-body">
{{#each result in websiteResults.list}}
<li>
<a href="{{result.url}}" target="_blank"><b>{{result.title}}</b> <br>({{result.url}})</a>
</li>
{{/each}}
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de réseaux sociaux</h3>
</div>
<div class="panel-body">
{{#each result in apiResults.list}}
<li>
<a href="{{result.url}}" target="_blank">{{result.title}}</a>
</li>
{{/each}}
</div>
</div>
</div>
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de fichiers</h3>
</div>
<div class="panel-body">
{{#each result in documentResults.list}}
<li>
{{result.attachment.title}}
</li>
{{/each}}
</div>
</div>
</div>
</div>
{{else}}
<h3 class="text-center">Aucun résultat!</h3>
{{/if}}
</div>
</div>
</template>
import './list.html';
Template.listResultSearch.helpers({
websiteResults: () => Session.get('websiteResults'),
apiResults: () => Session.get('apiResults'),
documentResults: () => Session.get('documentResults'),
});
...@@ -4,54 +4,7 @@ ...@@ -4,54 +4,7 @@
<br> <br>
{{#if websiteResults}} {{> tabsTpl module="resultSearch"}}
<div class="row"> {{> Template.dynamic template=template}}
<div class="col-md-12">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de site</h3>
</div>
<div class="panel-body">
{{#each result in websiteResults.list}}
<li>
<a href="{{result.url}}" target="_blank"><b>{{result.title}}</b> <br>({{result.url}})</a>
</li>
{{/each}}
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de réseaux sociaux</h3>
</div>
<div class="panel-body">
{{#each result in apiResults.list}}
<li>
<a href="{{result.url}}" target="_blank">{{result.title}}</a>
</li>
{{/each}}
</div>
</div>
</div>
<div class="col-md-6">
<div class="panel panel-default">
<div class="panel-heading text-center">
<h3 class="panel-title">Résultat de fichiers</h3>
</div>
<div class="panel-body">
{{#each result in documentResults.list}}
<li>
{{result.attachment.title}}
</li>
{{/each}}
</div>
</div>
</div>
</div>
{{/if}}
</template> </template>
import { Template } from 'meteor/templating'; import { Template } from 'meteor/templating';
import { Session } from 'meteor/session'; import { Session } from 'meteor/session';
import TabsCollection from '/imports/api/tabs/tabsCollection';
import '/imports/ui/components/tabs/tabs';
import '/imports/ui/components/resultSearch/list/list';
import './resultSearch.html'; import './resultSearch.html';
/**
* define tab depending on session
*/
Template.resultSearchTpl.onCreated(() => {
Session.set('resultSearch', null);
const tabs = Meteor.settings.public.tabs.resultSearch;
TabsCollection.remove({});
tabs.forEach((tab) => {
TabsCollection.insert(tab);
});
});
Template.resultSearchTpl.helpers({ Template.resultSearchTpl.helpers({
websiteResults: () => Session.get('websiteResults'), template() {
apiResults: () => Session.get('apiResults'), const currentTab = Session.get('resultSearch');
documentResults: () => Session.get('documentResults'), return currentTab.layout;
},
}); });
...@@ -77,7 +77,7 @@ const checkData = { ...@@ -77,7 +77,7 @@ const checkData = {
cleanUrl(url) { cleanUrl(url) {
return url.trim() // remove space before and after return url.trim() // remove space before and after
.replace('https', 'http') // replace https by http .replace('https', 'http') // replace https by http
.replace(/#.*/, '') // remove # and text after .replace(/[#?].*/, '') // remove # and text after
.replace(/\/$/, '') // remove last slash .replace(/\/$/, '') // remove last slash
.toLowerCase(); // lowercase .toLowerCase(); // lowercase
}, },
......
...@@ -72,6 +72,17 @@ ...@@ -72,6 +72,17 @@
"closable": false, "closable": false,
"sort": 2 "sort": 2
} }
],
"resultSearch": [
{
"module": "resultSearch",
"layout": "listResultSearch",
"label": "Liste",
"state": {},
"activ": true,
"closable": false,
"sort": 1
}
] ]
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment