/*
██████╗ ██╗ ██╗██████╗ ██████╗ ██╗ ██╗██████╗ ██╗ ██╗███╗ ███╗██████╗
██╔══██╗██║ ██║██╔══██╗██╔══██╗██║ ██║██╔══██╗██║ ██║████╗ ████║██╔══██╗
██████╔╝██║ ██║██████╔╝██████╔╝██║ ██║██║ ██║██║ ██║██╔████╔██║██████╔╝
██╔══██╗██║ ██║██╔══██╗██╔══██╗██║ ██║██║ ██║██║ ██║██║╚██╔╝██║██╔═══╝
██████╔╝╚██████╔╝██║ ██║██████╔╝╚██████╔╝██████╔╝╚██████╔╝██║ ╚═╝ ██║██║
╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝
by Clorhidrico https://www.burbuja.info/inmobiliaria/members/clorhidrico.132035
CHANGELOG:
v0.8
-Bug fix
v0.7
-Output dir (es obligatorio poner antes el usuario y el pass. Si no se quiere hacer login o no estás registrado
se puede poner como usuario un espacio " " y password la que sea y luego el directorio de salida).
Por defecto el directorio de salida es el mismo donde está este script.
-StatusCode check
v0.6
-Login error fix
v0.5
-Opción de descargar a partir de una página en adelante (ejemplo descarga página 3 hasta el final):
https://www.burbuja.info/inmobiliaria/threads/te-tocan-los-130-000-000-euros-del-euromillon-que-haces.1658681/page-3
-Opción de descargar una única página (ejemplo descargar página 4):
https://www.burbuja.info/inmobiliaria/threads/te-tocan-los-130-000-000-euros-del-euromillon-que-haces.1658681/page-4!
v0.4
-Bug arreglado
v0.3
-Soporte para enlaces de hilos en formato antiguo
v0.2
-Limpieza
*/
"use strict";
var VERSION = 0.8;
var WAITFOR_TIMEOUT = 10000;
var PAGE_TIMEOUT = 15000;
var fs = require('fs');
var system = require('system');
print_banner();
if (system.args.length < 2) {
console.log('ERROR: faltan argumentos');
console.log(system.args[0] + ' url_hilo [<usuario> <pass> [<output_dir>]]');
phantom.exit(1);
}
if (system.args.length >= 5) {
var OUTPUT_DIR = system.args[4];
} else {
var OUTPUT_DIR = "";
}
var statusCode = null;
var threads_list = null;
var conta_threads = 0;
if (fs.exists(system.args[1])) {
//Es una lista de enlaces
var file_h = fs.open(system.args[1], 'r');
var line;
threads_list = [];
while (!file_h.atEnd()) {
line = file_h.readLine();
if (/^https:\/\/www\.burbuja\.info\/inmobiliaria\/threads\//.test(line)) {
console.log(line);
threads_list.push(line);
} else if (/^https:\/\/www\.burbuja\.info\/inmobiliaria\/burbuja-inmobiliaria\//.test(line)) {
//Enlace en formato antiguo: lo cambiamos
var matches = line.match(/^https:\/\/www\.burbuja\.info\/inmobiliaria\/burbuja-inmobiliaria\/([0-9]+)-(.*?)\.html/);
var thread = 'https://www.burbuja.info/inmobiliaria/threads/' + matches[2] + '.' + matches[1];
console.log(thread);
threads_list.push(thread);
}
}
file_h.close();
if (threads_list.length > 0) {
save_thread(threads_list[conta_threads]);
} else {
phantom.exit();
}
} else if (/^https:\/\/www\.burbuja\.info\/inmobiliaria\/threads\//.test(system.args[1])) {
console.log(system.args[1]);
save_thread(system.args[1]);
} else if (/^https:\/\/www\.burbuja\.info\/inmobiliaria\/burbuja-inmobiliaria\//.test(system.args[1])) {
//Enlace en formato antiguo: lo cambiamos
var matches = system.args[1].match(/^https:\/\/www\.burbuja\.info\/inmobiliaria\/burbuja-inmobiliaria\/([0-9]+)-(.*?)\.html/);
var thread = 'https://www.burbuja.info/inmobiliaria/threads/' + matches[2] + '.' + matches[1];
console.log(thread);
save_thread(thread);
} else {
phantom.exit();
}
function print_banner() {
console.log('██████╗ ██╗ ██╗██████╗ ██████╗ ██╗ ██╗██████╗ ██╗ ██╗███╗ ███╗██████╗ ');
console.log('██╔══██╗██║ ██║██╔══██╗██╔══██╗██║ ██║██╔══██╗██║ ██║████╗ ████║██╔══██╗');
console.log('██████╔╝██║ ██║██████╔╝██████╔╝██║ ██║██║ ██║██║ ██║██╔████╔██║██████╔╝');
console.log('██╔══██╗██║ ██║██╔══██╗██╔══██╗██║ ██║██║ ██║██║ ██║██║╚██╔╝██║██╔═══╝ ');
console.log('██████╔╝╚██████╔╝██║ ██║██████╔╝╚██████╔╝██████╔╝╚██████╔╝██║ ╚═╝ ██║██║ ');
console.log('╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ');
console.log('v' + VERSION + ' by clorhidrico');
console.log('');
}
function save_thread(url) {
console.log('');
var p = url.match(/page-([0-9]+)(!)?[^/]*$/);
url = url.replace(/\/[^/.]*$/, '');
var page = require('webpage').create();
page.customHeaders = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0'
};
page.onResourceReceived = function(response) {
statusCode = response.status;
};
page.onError = function(msg, trace) {
return;
}
page.viewportSize = {
width: 1080,
height: 1920
};
page.settings.resourceTimeout = PAGE_TIMEOUT;
var hilo = url.replace(/\//g, '_').replace(/https:__www\.burbuja\.info_inmobiliaria_threads_/, '');
fs.makeDirectory(OUTPUT_DIR+"/"+hilo);
page.includeJs('https://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js', function() {
if (system.args.length >= 4 && system.args[2].length>0 && system.args[2]!=" " && system.args[3].length>0) {
if (p) {
login(url, page, p[1], p[2] ? true : false);
} else {
login(url, page);
}
} else {
console.log("Guardando hilo...");
if (p) {
save_page(url, page, p[1], p[2] ? true : false);
} else {
save_page(url, page, 1);
}
}
});
}
//Modificada para incluir un timeout callback
function waitFor(testFx, onReady, timeOutMillis, onTimeout) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : WAITFOR_TIMEOUT,
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ((new Date().getTime() - start < maxtimeOutMillis) && !condition) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if (!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
if (onTimeout) {
typeof(onTimeout) === "string" ? eval(onTimeout): onTimeout();
clearInterval(interval);
} else {
console.log("'waitFor()' timeout");
phantom.exit(1);
}
} else {
// Condition fulfilled (timeout and/or condition is 'true')
//console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady): onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
function login(url, page, p, one_page) {
console.log("Haciendo login...");
page.open("https://www.burbuja.info/inmobiliaria/login", function(status) {
if(statusCode >= 400) {
setTimeout(function(){
console.log(statusCode+" Reintentando (5s)...");
login(url, page, p, one_page);
}, 5000);
} else {
waitFor(function() {
return page.evaluate(function() {
return ('complete' === document.readyState);
});
}, function() {
if (page.evaluate(function() {
return ($("input[name=login]").is(":visible") && $("input[name=password]").is(":visible"));
})) {
page.evaluate(function(user, pass) {
$("input[name=login]").attr("value", user);
$("input[name=password]").attr("value", pass);
$("button[type=submit]").click();
}, system.args[2], system.args[3]);
waitFor(function() {
return page.evaluate(function() {
return $('a[href="/inmobiliaria/account/"]').is(":visible");
});
}, function() {
console.log("Login OK!");
console.log("Guardando hilo...");
if (p) {
save_page(url, page, p, one_page);
} else {
save_page(url, page, 1);
}
}, WAITFOR_TIMEOUT, function() {
console.log("LOGIN ERROR");
console.log("Guardando hilo...");
if (p) {
save_page(url, page, p, one_page);
} else {
save_page(url, page, 1);
}
});
} else {
console.log("Login OK!");
console.log("Guardando hilo...");
if (p) {
save_page(url, page, p, one_page);
} else {
save_page(url, page, 1);
}
}
}, WAITFOR_TIMEOUT, function() {
console.log("LOGIN ERROR");
console.log("Guardando hilo...");
if (p) {
save_page(url, page, p, one_page);
} else {
save_page(url, page, 1);
}
});
}
});
}
function save_page(url, page, conta_page, one_page) {
page.open((url + "/page-" + conta_page.toString()), function(status) {
if(statusCode >= 400) {
setTimeout(function(){
console.log(statusCode+" Reintentando (5s)...");
save_page(url, page, conta_page);
}, 5000);
} else {
waitFor(function() {
return page.evaluate(function() {
return ('complete' === document.readyState);
});
}, function() {
if (page.evaluate(function() {
return ($("input[name=login]").is(":visible") && $("input[name=password]").is(":visible"));
})) {
console.log("ERROR (Tienes que hacer login para acceder a ese contenido)");
phantom.exit();
}
page.evaluate(function() {
$('iframe').replaceWith(function() {
return $('<a>', {
text: $(this).attr('src'),
href: $(this).attr('src')
});
});
});
var hilo = url.replace(/\//g, '_').replace(/https:__www\.burbuja\.info_inmobiliaria_threads_/, '');
if (conta_page == 1 || page.url == (url + "/page-" + conta_page)) {
console.log(hilo + " (PAG " + conta_page.toString() + ")");
page.render(OUTPUT_DIR + "/" + hilo + "/" + hilo + "_" + conta_page + '.pdf');
if (!one_page) {
save_page(url, page, ++conta_page);
} else {
console.log("OK " + hilo + " " + conta_page + "!");
if (threads_list && conta_threads < threads_list.length - 1) {
save_thread(threads_list[++conta_threads]);
} else {
phantom.exit();
}
}
} else {
if (fs.exists(OUTPUT_DIR + "/" + hilo + "/" + hilo + "_" + (conta_page - 1) + '.pdf')) {
console.log("OK " + hilo + " " + (conta_page - 1));
}
if (threads_list && conta_threads < threads_list.length - 1) {
save_thread(threads_list[++conta_threads]);
} else {
phantom.exit();
}
}
}, WAITFOR_TIMEOUT, function() {
console.log("Reintentando...");
save_page(url, page, conta_page);
});
}
});
}