I'm trying to get open graph metadata from an URL using nodejs (with cheerio), using the code bellow.
I have this thing to fill: var result={};
for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) {
if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) {
if (metalist[ogCounter].attribs.property.indexOf('og') == 0) {
var ogname = metalist[ogCounter].attribs.property.split(':');
var property = ogname[1];
var content = metalist[ogCounter].attribs.content;
if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
result[property] = [result[property], content];
}
}
}
}
}
After I populate the result I converted in a JSon and with this code I get something like:
type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: [
"http://www.youtube.com/v/fWNaR-rxAic?version=3&autohide=1",
"application/x-shockwave-flash",
"1920",
"1080"
]
But the thing I want is something like:
type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: {
"http://www.youtube.com/v/fWNaR-rxAic?version=3&autohide=1",
{
type:"application/x-shockwave-flash",
width:"1920",
height:"1080"
}
}
I'm trying this "if" but it doesn't work:
if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
var subresult={};
subresult[name[2]]=content;
subresult[property]=result[property] ;
result[property] = subresult;
}
}
I don't want to cycle all the meta 2 times and I'm not good with javascript and nodejs function... Any suggestion? Thanks
This is tricky because of how you want to display og:video
. I don't think you can do it that way. The easiest thing to do would be to assign it an identifier for example name
and have it at the same level as og:video:width
Example result
{
"type": "video.other",
"url": "http://philippeharewood.com/facebook/video.html",
"title": "Simple Plan",
"video": {
"name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&autohide=1",
"type": "application/x-shockwave-flash",
"width": "398",
"height": "224",
"release_date": "2012-05-29T21:30"
}
}
How it could be done,
var cheerio = require('cheerio')
var request = require('request')
var url = 'http://philippeharewood.com/facebook/video.html';
var result = {};
request(url, function(error, response, body) {
var $ = cheerio.load(body);
var meta = $('meta')
var keys = Object.keys(meta)
keys.forEach(function(key){
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property.indexOf('og') == 0
)
{
var og = meta[key].attribs.property.split(':');
if(og.length > 2) {
if(result[og[1]]) {
if (typeof result[og[1]] == 'string'
|| result[og[1]] instanceof String
)
{
var set = {};
set['name'] = result[og[1]];
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
else {
ex_set = result[og[1]];
ex_set[og[2]] = meta[key].attribs.content;
result[og[1]] = ex_set;
}
}
else {
var set = {};
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
}
else {
result[og[1]] = meta[key].attribs.content;
}
}
});
console.log(JSON.stringify(result, undefined, 2));
});
Here's my answer. @phwd totally answered the question, but I thought it'd be nice to make a more general solution that parsed all meta
tags to n
levels.
var cheerio = require('cheerio'),
request = require('request'),
url = 'http://philippeharewood.com/facebook/video.html',
result = {},
attr = function( tag, prop ){ return tag.attribs && tag.attribs[prop] || ""; }
request( url, function( err, res, body ) {
var metas = cheerio.load(body)('meta')
var keys = Object.keys(metas)
keys.forEach(function(i){
var meta = metas[i],
property = attr(meta,'property'),
parts = property.split(":");
if ( property ) {
var og = property.split(':'),
parent = result;
for ( var j = 0; j < og.length; j++ ){
var token = og[j],
current = parent[token],
name;
if ( j+1 == og.length ) { // leaf node
// expected leaf is already a branch so append a name attr
if ( current instanceof Object ) name = token;
// leaf should take the value given
else parent[token] = attr(meta,'content');
} else { // branch node
// if no such branch exists, make one
if ( !(current instanceof Object) ) {
// if the branch is already a leaf, move value to name attr
if ( typeof current == "string" ) name = current;
current = {};
parent[token] = current;
}
}
if ( name ) current["name"] = name;
name = undefined
parent = current;
}
}
});
console.log(JSON.stringify( result.og, undefined, 2));
});