Commit 33a87090 authored by Nacim Goura's avatar Nacim Goura

add search engine

parent 898bc444
......@@ -2,9 +2,7 @@
const Sitemapper = require('sitemapper');
const Crawler = require("crawler");
const utf8 = require('utf8');
const chalk = require('chalk');
const sanitizeHtml = require('sanitize-html');
const Q = require('q');
const searchSite = require('../model/search.server.models');
......@@ -16,8 +14,8 @@ module.exports = {
* @returns array sites
*/
getSiteMap(urlSiteMap) {
let sitemap = new Sitemapper();
let deferred = Q.defer();
let sitemap = new Sitemapper();
sitemap.fetch(urlSiteMap).then((data) => {
deferred.resolve(data.sites);
}, function(err) {
......@@ -29,16 +27,29 @@ module.exports = {
/**
* crawl array sites
* @param listSite
* @param term
*/
indexUrl(listSite) {
let c = new Crawler({
let listError = [];
let deferred = Q.defer();
let crawler = new Crawler({
maxConnections : 1000,
jQuery: {
name: 'cheerio',
options: {
normalizeWhitespace: true,
xmlMode: false,
decodeEntities: true
}
},
// This will be called for each crawled page
callback : function (error, res, done) {
if(error){
console.log(chalk.red(error));
}else if(res.statusCode === 200) {
if(error || res.statusCode !== 200){
listError.push({
url: res.options.uri,
error: error ? error : null,
status: res.statusCode
});
}else {
let $ = res.$;
let urlVisited = res.options.uri;
......@@ -46,7 +57,7 @@ module.exports = {
let obj = {
body : cleanHtml($),
title: $("title").text().toLowerCase(),
urlVisited: utf8.decode(urlVisited)
url: decodeURIComponent(urlVisited)
};
searchSite.index(obj)
.then(function(resp) {
......@@ -62,16 +73,36 @@ module.exports = {
searchSite.initElasticsearch()
.then((success) => {
console.log(chalk.green(success));
c.queue(listSite);
crawler.queue(listSite);
crawler.on('drain', function(){
if(listError !== []) {
console.log(chalk.red("================="));
console.dir(listError, {depth: null});
console.log(chalk.red("================="));
}
deferred.resolve("indexation done");
});
}, function(err) {
console.log(chalk.red(err));
deferred.reject(err);
});
return deferred.promise;
},
search(term) {
let deferred = Q.defer();
searchSite.search(term)
.then((data) => {
deferred.resolve(data.hits.hits);
}, (err) => {
deferred.reject(err);
});
return deferred.promise;
}
};
/**
* remove useless tags
* clean html
*/
function cleanHtml($) {
let bodyText = $('html > body').text().toLowerCase();
......
......@@ -245,16 +245,18 @@ exports.bulk = function (body, callback) {
/**
* Search object
*/
exports.search = function (esIndex, esType, params, callback) {
exports.search = function (esIndex, esType, params) {
let deferred = Q.defer();
client.search({
index: esIndex,
type: esType,
body: params
}, function (err, exists) {
if (callback) {
callback(err, exists);
}
}).then(function (exists) {
deferred.resolve(exists);
}, function(err) {
deferred.reject(err);
});
return deferred.promise;
};
/**
......@@ -262,7 +264,6 @@ exports.search = function (esIndex, esType, params, callback) {
*/
exports.initAnalyser = function (esIndex, obj) {
let deferred = Q.defer();
this.indexExists(esIndex).then((exists) => {
if (exists) {
this.indexDelete(esIndex)
......
......@@ -81,7 +81,7 @@ exports.initElasticsearch = function () {
type: "text",
analyzer: "analyzer_html"
},
urlVisited: {
url: {
type: "text",
analyzer: "standard"
}
......@@ -103,4 +103,35 @@ exports.initElasticsearch = function () {
});
return deferred.promise;
};
exports.search = function(term) {
console.log(term);
let deferred = Q.defer();
let params = {};
if(term) {
params.query = {
match: {
"title": {
query: term,
slop: 50
}
}
};
}
console.log(params);
elasticsearch.search(esIndex, esType, params)
.then((data) => {
console.log(data);
deferred.resolve(data);
}, (err) => {
deferred.reject(err);
});
return deferred.promise;
};
\ No newline at end of file
......@@ -18,9 +18,7 @@
"morgan": "~1.7.0",
"node-sass-middleware": "^0.11.0",
"q": "^1.4.1",
"sanitize-html": "^1.14.1",
"sitemapper": "^2.1.7",
"utf8": "^2.1.2"
"sitemapper": "^2.1.7"
},
"devDependencies": {
"eslint": "^3.16.1"
......
"use strict";
const crawler = require('../controller/crawlerController');
const searchController = require('../controller/searchController');
module.exports = function(app) {
app.route('/').get(function(req, res) {
res.render('index', { title: 'Express' });
res.render('index');
});
app.route('/index').get(function(req, res) {
crawler.getSiteMap('http://www.hautesavoie.fr/sitemap.xml')
app.route('/').post(function(req, res) {
let term = req.body.search;
searchController.search(term)
.then(function(sites) {
crawler.indexUrl(sites);
}, function(err) {
res.render('index', {
sites: sites
});
}, (err) => {
console.log(err);
res.render('index', {
alert: err
});
});
res.render('index', { title: 'Express' });
});
app.route('/api').post(function(req, res) {
crawler.getSiteMap('http://www.hautesavoie.fr/sitemap.xml')
.then(function(data) {
console.log(data);
app.route('/index').get(function(req, res) {
searchController.getSiteMap('http://www.hautesavoie.fr/sitemap.xml')
.then(function(sites) {
searchController.indexUrl(sites)
.then((success) => {
console.log(success);
}, (err) => {
console.log(err);
});
}, function(err) {
console.log(err);
});
res.render('index', { alert: "indexation en cours..."});
});
app.route('/api').post(function() {
});
};
<div class="row">
<form action="/api" method="post">
<input type="search" class="form-control" placeholder="Rechercher...">
<form action="/" method="post">
<input type="search" class="form-control" name="search" placeholder="Rechercher...">
<input type="submit" class="btn btn-success" value="Rechercher">
</form>
</div>
<div class="row">
{{#if sites}}
<ul>
{{#each sites}}
<li>
<a href="{{this._source.url}}">{{this._source.title}}</a>
</li>
{{/each}}
</ul>
{{/if}}
</div>
\ No newline at end of file
<!DOCTYPE html>
<html>
<head>
<title>{{ title }}</title>
<title>IdSearch</title>
<link rel='stylesheet' href='/stylesheets/style.css' />
</head>
<body>
......@@ -9,6 +9,13 @@
{{> header}}
<div class="container">
{{#if alert}}
<div class="alert alert-info">
<strong>Info!</strong> {{alert}}
</div>
{{/if}}
{{{body}}}
</div>
......
......@@ -14,7 +14,7 @@
<ul class="nav navbar-nav">
<li>
<a class="" href="/admin" ><i class="fa fa-lock"></i> Admin</a>
<a class="" href="/index" ><i class="fa fa-lock"></i> Index</a>
</li>
</ul>
</div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment