この質問 の助けを借りてこのアプリを構築しました。
app.js:
_var mongolib = require('./middlewares/db.js');
var downloaderCoverageWho = require('./routers/downloaderCoverageWho.js');
var downloaderCoverageIta = require('./routers/downloaderCoverageIta.js');
const start = async function() {
const conn = await mongolib.connectToMongoDb();
const coverages = await mongolib.createACollection('coverages');
const isPageHasUpdates = true;
if(isPageHasUpdates) {
await downloadCoverageIta();
await downloadCoverageWho();
}
await mongolib.closeConnection();
await console.log('d3.js creation...');
return 'FINISH';
}
start()
.then(res => console.log(res))
.catch(err => console.log(err));
async function downloadCoverageWho() {
await downloaderCoverageWho.download();
console.log('Finish');
}
async function downloadCoverageIta() {
await downloaderCoverageIta.download();
console.log('Finish');
}
_
db.js:
_var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017/';
const dbName = 'db';
var collCovName = 'coverages';
var collCov;
let myClient;
let myConn;
var methods = {};
methods.getConnection = function() {
return myConn;
}
methods.connectToMongoDb = async function() {
return MongoClient.connect(url + dbName)
.then(function(conn) {
console.log('Connected to MongoDB');
myClient = conn;
myConn = myClient.db(dbName);
return myConn;
})
.catch(function(err) {
console.log('Error during connection');
throw err;
});
}
methods.closeConnection = async function() {
myClient.close()
.then(function() {
console.log('Connection closed');
})
.catch(function(err) {
console.log('Error closing connection');
throw err;
});
}
methods.createACollection = async function(collectionName) {
return myConn.createCollection(collectionName)
.then(function() {
console.log('Collection', collectionName, 'created');
})
.catch(function(err) {
console.log('Error during creation of collection', collectionName);
throw err;
});
}
methods.insert = async function(collectionName, obj) {
return myConn.collection(collectionName).updateOne(obj, {$set: obj}, {upsert: true})
.then(function(res) {
console.log('Inserted 1 element in', collectionName);
})
.catch(function(err) {
console.log('Error during insertion in', collectionName);
throw err;
});
}
module.exports = methods;
_
downloadCoverageIta.js:
_var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');
var util = require('../helpers/util.js');
var mongolib = require('../middlewares/db.js');
var methods = {};
var outDir = './output/';
var finalFilename = outDir + 'coverage-ita.json'
var urls = [
{year: '2013', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_1_file.pdf'},
{year: '2012', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_5_fileAllegati_itemFile_0_file.pdf'},
{year: '2011', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_6_fileAllegati_itemFile_0_file.pdf'},
{year: '2010', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_7_fileAllegati_itemFile_0_file.pdf'},
{year: '2009', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_8_fileAllegati_itemFile_0_file.pdf'},
{year: '2008', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_15_fileAllegati_itemFile_0_file.pdf'},
{year: '2007', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_14_fileAllegati_itemFile_0_file.pdf'},
{year: '2006', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_13_fileAllegati_itemFile_0_file.pdf'},
{year: '2005', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_12_fileAllegati_itemFile_0_file.pdf'},
{year: '2004', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_11_fileAllegati_itemFile_0_file.pdf'},
{year: '2003', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_10_fileAllegati_itemFile_0_file.pdf'},
{year: '2002', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_9_fileAllegati_itemFile_0_file.pdf'},
{year: '2001', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_1_fileAllegati_itemFile_0_file.pdf'},
{year: '2000', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_0_file.pdf'}
];
var jsons = [];
methods.download = async function(req, res) {
jsons = await extractText()
.then(function() {
console.log('Extract text success');
})
.catch(function() {
console.log('Extract text error');
});
};
async function extractText() {
var config = {
preserveLineBreaks: true
};
//await extractTextTest();
await urls.forEach(async function(url) {
await textract.fromUrl(url.link, config, async function(error, text) {
if(error) {
throw error;
}
switch(url.year) {
case '2000':
case '2001':
case '2002':
case '2003':
case '2004':
case '2005':
case '2006':
case '2007':
case '2008':
case '2009':
case '2010':
case '2011':
case '2012':
await extractTextType1(url, text)
.then(function() {
console.log('extractTextType1 success');
})
.catch(function() {
console.log('extractTextType1 error');
});
break;
case '2013':
extractTextType2(url, text)
.then(function() {
console.log('extractTextType2 success');
})
.catch(function() {
console.log('extractTextType2 error');
});
break;
default:
console.log('Error: no case');
}
});
});
}
async function extractTextTest() { // THIS WORKS
var obj = {A: 'aa', B: 'bb', C: 'cc'};
await mongolib.insert('coverages', obj);
}
async function extractTextType1(url, text) {
var matrix = [];
var map = [];
var vaccines = [];
var regionsTemp = [];
var regions = [];
var regionLength = [1, 2, 1, 2, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
// text to matrix
var textArray = text.split('\n');
for(var i = 0; i < 23; i++) {
matrix[i] = textArray[i].split(' ');
}
matrix[0].shift();
vaccines = matrix[0];
map[0] = vaccines;
for(var i = 0; i < regionLength.length; i++) {
var j = i + 1;
var indexToRemove = 0;
var numberToRemove = regionLength[i];
var region = matrix[j].splice(indexToRemove, numberToRemove);
regionsTemp.Push(region);
map[j+1] = matrix[j];
}
for(var i = 0; i < regionsTemp.length; i++) {
var region = '';
if(regionLength[i] > 1) {
region = regionsTemp[i].join(' ');
}
else {
region = regionsTemp[i].join('');
}
regions.Push(region);
}
map[1] = regions;
vaccines = map.shift();
regions = map.shift();
var thisJson = await map.reduce(function(result, v, i) {
v.forEach(function(o, k) {
var obj = util.createJsonObjectCoverage(url.year, 'Italy', vaccines[k], regions[i], o);
// save on db
mongolib.insert('coverages', obj); // HERE
result.Push(obj);
});
return result;
}, jsons);
util.printOnFile(jsons, finalFilename);
}
function extractTextType2(url, text) {
var matrix = [];
var map = [];
var vaccines = [];
var regions = [];
var textArray = text.split('\n');
for(var i = 0; i < 36; i++) {
matrix[i] = textArray[i].split(' ');
}
vaccines.Push(matrix[0][1].replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, ''));
for(var i = 1; i < 10; i++) {
vaccines.Push(matrix[i][0].replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, ''));
}
var meningo = ''.concat(matrix[10][0], matrix[11]).replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, '');
vaccines.Push(meningo);
var pneumo = ''.concat(matrix[12][0], ' ', matrix[13]).replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, '');
vaccines.Push(pneumo);
map[0] = vaccines;
for(var i = 14; i < matrix.length; i++) {
regions.Push(matrix[i][0]);
}
map[1] = regions;
for(var i = 14; i < matrix.length; i++) {
matrix[i].shift();
map.Push(matrix[i]);
}
vaccines = map.shift();
regions = map.shift();
var thisJson = map.reduce(function(result, v, i) {
v.forEach(function(o, k) {
var obj = util.createJsonObjectCoverage(url.year, 'Italy', vaccines[k], regions[i], o);
// save on db
mongolib.insert('coverages', obj); // HERE
result.Push(obj);
});
return result;
}, jsons);
util.printOnFile(jsons, finalFilename);
}
module.exports = methods;
_
downloaderCoverageWho.js:
_var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var util = require('../helpers/util.js');
var mongolib = require('../middlewares/db.js');
var methods = {};
var countries = {
'Albania': 'ALB',
'Austria': 'AUT'
};
var outDir = './output/';
var finalData = outDir + 'coverage-eu.json'
var jsons = [];
methods.download = async function(req, res) {
for(country in countries) {
var url = 'http://apps.who.int/immunization_monitoring/globalsummary/coverages?c=' + countries[country];
request(url, (function(country) {
var thisCountry = country;
return function(error, res, html) {
if(error) {
throw error;
}
$ = cheerio.load(html);
var years = [];
var vaccines = [];
var coverages = [];
$('.ts .year').each(function() {
years.Push($(this).text().trim());
});
$('.ts .odd td a, .ts .even td a').each(function() {
vaccines.Push($(this).text().trim());
});
$('.ts .odd .statistics_small, .ts .even .statistics_small').each(function() {
coverages.Push($(this).text().trim());
});
const numYears = years.length;
const numVaccines = vaccines.length;
for(var vaccineIdx = 0; vaccineIdx < numVaccines; vaccineIdx++) {
for(var yearIdx = 0; yearIdx < numYears; yearIdx++) {
let obj = {
year: years[yearIdx],
country: country,
region: "",
vaccine: vaccines[vaccineIdx],
coverage: coverages[vaccineIdx*numYears + yearIdx]
}
jsons.Push(obj);
// save on db
mongolib.insert('coverages', obj); // PROBLEM HERE
}
}
util.printOnFile(jsons, finalData);
}
})(country));
}
};
module.exports = methods;
_
コードを実行すると、次のエラーが表示されます。
(ノード:11952)UnhandledPromiseRejectionWarning:未処理のプロミス拒否(拒否ID:2950):MongoError:サーバーインスタンスプールが破棄されました
両方のファイル(downloaderCoverageIta
とdownloaderCoverageWho
)にも同じ問題があると思います。
私は here を読んで、おそらくinserts
が完了する前にdb.close()
を呼び出しているが、これは正しくない。修正方法がわかりません。
_async/await
_を使用するのは初めてです。どうすれば解決できますか?
問題が見つかりました...
JavaScriptが非同期であるという事実を忘れました:) mongo.close()が問題でした。私はそれをコメントし、それは働いた
すべての要求応答が処理され、データベースアクセスが終了する前にdb.closeConnection()
が呼び出されると思います。
この動作の原因は、リクエストを作成するのを待っているだけで、リクエストの結果(応答)を待っていないことです。
非同期関数内で次のようなステートメントを使用してリクエストを開始します。
_request(url, (function(country) {
...
})(...));
_
つまり、非同期関数はrequest(...)
関数呼び出しを待機しますが、応答を処理するコールバック関数は待機しません。
request(...)
は次の方法でも使用できます。
_const response = await request(url);
_
これで、非同期関数は応答を待機します。
「エラー」:「サーバーインスタンスプールが破壊されました」
mongoose.set( 'useCreateIndex'、true)
このコードのため、このエラーを取得するだけです...コメントとしてマークして、このエラーを再度取得しないでください...