JavaScript Snippet: List All External host-name links

Posted at

Something quick to help me list new adwares/advertisements/banners/etc.. to update my other private project of hosts.eladkarako.com

(function(){
"use strict"

var val = ""
, array
, is_external = false
, is_javascript = false
;

array = Array.prototype.map.call(document.querySelectorAll('*[src],*[href]'), function(item){
val = "undefined" !== typeof item.src ? item.src :
"undefined" !== typeof item.href ? item.href :
"";

val = val.replace('http://','')
.replace('https://','')
.replace('//','')
.replace(/\\/.*$/g,'') //poor's man's trim all non-hostname
;
return val;
});

array = Array.prototype.filter.call(array, function(item){
is_external = -1 === item.indexOf(location.hostname);
is_not_javascript = -1 === item.toLowerCase().indexOf('javascript:');

return ( true === is_external && true === is_not_javascript );
});

console.log(array);

}());


on pages such as this one: http://www.icefilms.info/ip.php?v=219279& it lists the following..

["ads.comeadvertisewithus.com", "asset.pagefair.net", "asset.pagefair.com", "ajax.googleapis.com", "forum.icefilms.info", "www.icedivx.com", "www.imdb.com", "www.youtube.com", "creative.wwwpromoter.com", "creative.wwwpromoter.com", "www.addthis.com", "s7.addthis.com", "encrypted-tbn3.gstatic.com", "encrypted-tbn3.gstatic.com", "www.imdb.com", "www.imdb.com", "www.imdb.com", "www.imdb.com", "forum.icefilms.info", "ajax.googleapis.com", "data.scorepresshidden.info", "data.scorepresshidden.info", "forum.icefilms.info", "st.chatango.com", "www.google-analytics.com", "ads.comeadvertisewithus.com", "get.scorepresshidden.info", "s7.addthis.com"]