Why is this useful?

I was building a site recently which users could store links to products and pages they like. Presenting those links was boring without the other information like the title, description, keywords and an image.

I could have let the user enter these details manually, but that would make it hard work for them when all they really want to do is add a link to save it and move on. I decided to find a way to get this information from the url automatically, so I came up with this code.

Here's an example of it being used

A class to hold the information

namespace CodeShare.Library.Models.MetaData
{

public class MetaInformation
{
public bool HasData { get; set; }
public string Url { get; set; }
public string Title { get; set; }
public string Description { get; set; }
public string Keywords { get; set; }
public string ImageUrl { get; set; }
public string SiteName { get; set; }

public MetaInformation(string url)
{
Url = url;
HasData = false;
}

public MetaInformation(string url, string title, string description, string keywords, string imageUrl, string siteName)
{
Url = url;
Title = title;
Description = description;
Keywords = keywords;
ImageUrl = imageUrl;
SiteName = siteName;
}
}
}

The code which does the work

using HtmlAgilityPack;
namespace CodeShare.Library.MetaData
{
    public static class MetaScraper
    {
        /// <summary>
        /// Uses HtmlAgilityPack to get the meta information from a url
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static Models.MetaData.MetaInformation GetMetaDataFromUrl(string url)
        {
            // Get the URL specified
            var webGet = new HtmlWeb();
            var document = webGet.Load(url);
            var metaTags = document.DocumentNode.SelectNodes("//meta");
            Models.MetaData.MetaInformation metaInfo = new Models.MetaData.MetaInformation(url);
            if (metaTags != null)
            {
                int matchCount = 0;
                foreach (var tag in metaTags)
                {
                    var tagName = tag.Attributes["name"];
                    var tagContent = tag.Attributes["content"];
                    var tagProperty = tag.Attributes["property"];
                    if (tagName != null && tagContent != null)
                    {
                        switch (tagName.Value.ToLower())
                        {
                            case "title":
                                metaInfo.Title = tagContent.Value;
                                matchCount++;
                                break;
                            case "description":
                                metaInfo.Description = tagContent.Value;
                                matchCount++;
                                break;
                            case "twitter:title":
                                metaInfo.Title = string.IsNullOrEmpty(metaInfo.Title) ? tagContent.Value : metaInfo.Title;
                                matchCount++;
                                break;
                            case "twitter:description":
                                metaInfo.Description = string.IsNullOrEmpty(metaInfo.Description) ? tagContent.Value : metaInfo.Description;
                                matchCount++;
                                break;
                            case "keywords":
                                metaInfo.Keywords = tagContent.Value;
                                matchCount++;
                                break;
                            case "twitter:image":
                                metaInfo.ImageUrl = string.IsNullOrEmpty(metaInfo.ImageUrl) ? tagContent.Value : metaInfo.ImageUrl;
                                matchCount++;
                                break;
                        }
                    }
                    else if (tagProperty != null && tagContent != null)
                    {
                        switch (tagProperty.Value.ToLower())
                        {
                            case "og:title":
                                metaInfo.Title = string.IsNullOrEmpty(metaInfo.Title) ? tagContent.Value : metaInfo.Title;
                                matchCount++;
                                break;
                            case "og:description":
                                metaInfo.Description = string.IsNullOrEmpty(metaInfo.Description) ? tagContent.Value : metaInfo.Description;
                                matchCount++;
                                break;
                            case "og:image":
                                metaInfo.ImageUrl = string.IsNullOrEmpty(metaInfo.ImageUrl) ? tagContent.Value : metaInfo.ImageUrl;
                                matchCount++;
                                break;
                        }
                    }
                }
                metaInfo.HasData = matchCount > 0;
            }
            return metaInfo;
        }
    }
}

You can change the case statements to look for the properties you are interested in. This is just my example to get you started.

Hopefully you will find it useful. Feel free to use as you want and share with others.

Want to thank me?

If I've helped you out and you want to thank me, why not buy me a coffee?

Buy me a coffee

About the author

Paul Seal

Umbraco MVP and .NET Web Developer from Derby (UK) who specialises in building Content Management System (CMS) websites using MVC with Umbraco as a framework. Paul is passionate about web development and programming as a whole. Apart from when he's with his wife and son, if he's not writing code, he's thinking about it or listening to a podcast about it.

Related Posts

How to solve the error assets file project.assets.json not found in Visual Studio

This post tells you how to solve the error assets file project.assets.json not found in Visual Studi…

Read Post

Code to help you debug an umbraco issue on a remote site

This post gives you some razor code to help you see the values of the IPublishedContent item's prope…

Read Post

How to set the default page base type to UmbracoViewPage in Umbraco

In this post I share with you what Ronald Barendse taught us about setting the pageBaseType in Umbra…

Read Post

How to fix corrupt accented characters in Excel for a C# export as CSV

This post will help you fix the issue where you csv export has corrupt characters when opening it in…

Read Post