Newer
Older
indexation / index.js
@kieffer kieffer on 2 May 2017 5 KB Upgrade UI
#!/usr/bin/env node

/* global module */
/* jslint node: true */
/* jslint indent: 2 */
'use strict';

/*
Launch command :
node index.js --inputDir=./data/ --outputDir=./public/data/ --root=./public/ --httpPort=3000
*/
const INotifyWait = require('inotifywait'),
  pug = require('pug'),
  async = require('async'),
  express = require('express'),
  serveDirectory = require('serve-index'),
  utils = require('tdm-utils'),
  argv = require('minimist')(process.argv.slice(2)),
  ezs = require('ezs'),
  fs = require('fs'),
  path = require('path');

const config = require('./config.json');

let app = express(),
  corpora = {};

const domain = argv.domain || 'localhost',
  httpPort = argv.httpPort || 3000,
  url = ['http://', domain, ':', httpPort].join(''),
  publicDir = (argv.publicDir) ? path.normalize(argv.publicDir) : '/public',
  outputDir = (argv.outputDir) ? path.normalize(argv.outputDir) : '/public/data',
  inputDir = (argv.inputDir) ? path.normalize(argv.inputDir) : '/data',
  publicRoute = path.join('/', publicDir),
  views = ['d3', 'sigma'],
  routes = ['/corpora', publicRoute, 'd3/:corpus', 'sigma/:corpus'];

ezs.use(require('ezs-basics'));
ezs.use(require('ezs-istex'));

let watcher = new INotifyWait(inputDir, {
  'recursive': false
});

// Public route
app.use(publicRoute, express.static(publicDir));
app.use(publicRoute, serveDirectory(publicDir));

// graph Routes
app.use(path.join(routes[0], ':corpus'), function(req, res) {
  let result = corpora[req.params.corpus] || "Corpus Not Found";
  res.json(result);
});

app.use(routes[0], function(req, res) {
  res.json(corpora);
});

app.listen(httpPort, function() {
  console.log('listening on ' + url);
  console.log('publicDir : ' + publicDir);
  console.log('outputDir : ' + outputDir);
  console.log('inputDir : ' + inputDir);
  console.log('routes : ' + routes.join(' - '));
  console.log('views : ' + views.join(' - '));

  // Create routes dynamically
  views.map(function(route) {
    (function(route) {
      app.use(path.join('/', route, ':corpus'), function(req, res) {
        let corpus = corpora[req.params.corpus],
          html = "Corpus Not Found";
        if (corpus) {
          html = pug.renderFile(path.join('views/', route + '.pug'), {
            'publicDir': publicRoute,
            'graph': path.join('/', corpus.out, 'graph.json'),
            'corpus': corpus,
            'data': JSON.stringify(corpus)
          });
        }
        res.send(html);
      });
    })(route);
  });

  // Default route
  app.use('/', function(req, res) {
    var html = pug.renderFile(path.join('views/index.pug'), {
      'publicDir': publicRoute,
      'corpora': corpora
    });
    res.send(html);
  });

  watcher.on('add', function(filename, stats) {
    console.log('add', filename, stats);
    if (!stats.isDir) {
      console.log('new file detected');
      initCorpus(outputDir, filename);
    }
  });

  watcher.on('change', function(filename, stats) {
    console.log('change');
  });

  watcher.on('unlink', function(filename, stats) {
    console.log('unlink');
  });

  watcher.on('unknown', function(filename, raw_object, stats) {
    console.log('unknown', raw_object);
  });

  watcher.on('ready', function(unix_process_object) {
    // console.log('[ready]');
  });

  watcher.on('close', function() {
    // console.log('[close]');
  });

  watcher.on('error', function(err) {
    // console.log('[error]');
  });

  // Look up outputDir to recreate index
  fs.readdir(inputDir, function(err, files) {
    return async.each(files, function(file, callback) {
      let fullPath = path.join(inputDir, file); // Chemin complet du fichier à partir de l'inputDir
      return fs.stat(fullPath, function(err, stats) {
        if (err) callback(err);
        if (stats.isFile() && path.basename(fullPath).match(/[\w\W]+(.corpus)/gm)) {
          initCorpus(outputDir, fullPath);
        }
        callback();
      });
    }, function(err) {
      if (err) console.log(err);
    });
  });
});

function addFile(outputDir, filename) {
  console.log('addFile', filename);
  utils.corpusManager.init(filename, outputDir, function(err, res) {
    if (err) return err;
    corpora[res.name] = res;
    let keywordsFile = path.join(res.out, 'keywords.json'),
      graphFile = path.join(res.out, 'graph.json');
    fs.createReadStream(res.file)
      .pipe(ezs('stringify'))
      .pipe(ezs('ISTEXCorpus'))
      .pipe(ezs('ISTEXQuery', {
        params: {
          output: config.output
        }
      }))
      .pipe(ezs('ISTEXHarvest'))
      .pipe(ezs('ISTEXRequest'))
      .pipe(ezs('ISTEXDownload', {
        criteria: config.criteria,
        key: config.key,
        out: res.in
      }))
      .pipe(ezs('ISTEXKeywords', {
        out: res.out
      }))
      .pipe(ezs('ISTEXRegroup', {
        out: keywordsFile
      }))
      .pipe(ezs('ISTEXGraphs', {
        options: {
          out: graphFile
        }
      }))
      .pipe(ezs('jsonify'))
      .pipe(process.stdout);
  });
}

function initCorpus(outputDir, filename) {
  console.log('initCorpus', filename);
  let corpusName = path.basename(filename),
    outPath = path.join(outputDir, corpusName);
  return fs.stat(outPath, function(err, stats) {
    if (err) return addFile(outputDir, filename);
    corpora[corpusName] = {
      name: corpusName,
      file: outPath,
      in : path.join(outputDir, 'in', corpusName),
      out: path.join(outputDir, 'out', corpusName)
    };
  });
}