Get only the latest links from a sitemap

3
using HtmlAgilityPack;
using JoeBlogs;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;

namespace ReaderXML
{
    class Program
    {
        static void Main(string[] args)
        {
            var website = LeitorDeXML("http://SITE ORIGEM/").ToArray();
            var total = website.Count();
            for (int i = 1; i < total; i++)
            {
                Postagem(website[i]);
            }
        }
        private static IEnumerable<string> LeitorDeXML(string url)
        {
            Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));

            XmlReader xmlReader = new XmlTextReader(string.Format("{0}sitemap.xml", url));
            XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

            XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
            XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

            List<string> resultado = new List<string>();

            // Pega os últimos 10 elementos da lista
            List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

            foreach (XElement e in lista)
            {
                var postUrl = e.Element(locNodes).Value;
                resultado.Add(postUrl);
            }

            return resultado;
        }
        private static void Postagem(string website)
        {
            try
            {
                //LINK,USER,SENHA WP
                string link = "http://SITE NOVO";
                string username = "user wp";
                string password = "senha wp";

                //Leitura do HTML
                HtmlWeb web = new HtmlWeb();
                HtmlDocument resultat = web.Load(website);

                //Separar TITULO e CONTEUDO
                string titulopost = resultat.DocumentNode.SelectNodes("//*[contains(@class,'entry-title')]")[0].InnerHtml;
                string conteudo = resultat.DocumentNode.SelectNodes("//*[contains(@class,'entry-content')]")[0].InnerHtml; //ERRO AQUI

                //Entrada no wp
                var wp = new WordPressWrapper(link + "/xmlrpc.php", username, password);
                var post = new Post();

                //Categoria -- Não está funcionando ainda a parte da criação da categoria

                int website_corpo = 25;
                string categoria = website.Substring(website_corpo);
                int indexof_barra = categoria.IndexOf("/");
                int comeco_cat = 0;
                string categoria_f = categoria.Substring(comeco_cat, indexof_barra);

                //Data

                post.DateCreated = DateTime.Today.AddHours(0);

                //Postagem
                post.Title = titulopost;
                post.Body = conteudo;

                wp.NewPost(post, true);
            }
            catch (Exception e)
            {
                Console.WriteLine("Error: {0}", e);
                Console.ReadKey();
            }
        }
    }
}

No more than line 63 ...

Error: System.NullReferenceException: Object reference not set to an instance of an object
    
asked by anonymous 20.05.2015 / 20:17

2 answers

3

One way to do this would be to use the Enumerable.Reverse to invert the list, and with Enumerable.Take get n elements:

List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

Your role should look like this:

private static IEnumerable<string> LeitorDeXML(string url)
{
    Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));

    XmlReader xmlReader = new XmlTextReader(string.Format("{0}sitemap.xml", url));
    XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

    XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
    XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

    List<string> resultado = new List<string>();

    // Pega os últimos 10 elementos da lista
    List<XElement> lista = Enumerable.Reverse(element.Elements(urlNodes)).Take(10).Reverse().ToList();

    foreach (XElement e in lista)
    {
        var postUrl = e.Element(locNodes).Value;
        resultado.Add(postUrl);
    }

    return resultado;
}
    
20.05.2015 / 20:46
0

If you want to return the last 10, but not backwards, you can create an extension method:

namespace Extensoes 
{
    public static class IEnumerableExtensions
    {
        public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int n)
        {
            return source.Skip(Math.Max(0, source.Count() - n));
        }
    }
}

And then use this:

using Extensoes;

private static IEnumerable<string> LeitorDeXML(string url)
{
    Console.WriteLine("Carregando " + string.Format("{0}sitemap.xml", url));
    XElement element = XElement.Load(string.Format("{0}sitemap.xml", url));

    XName urlNodes = XName.Get("url", "http://www.sitemaps.org/schemas/sitemap/0.9");
    XName locNodes = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");

    return element.Elements(urlNodes).TakeLast(10).Select(x => x.Element(locNodes).Value);
}
    
20.05.2015 / 22:06