50 line code to get all the current news form TimesOfIndia

Here's the complete code to RIP timeofIndia site..You would need HtmlAgilityPack.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using System.Text;
using HtmlAgilityPack;
namespace NewsRipper
{
class Program
{
static void Main(string[] args)
{
List<categoryFeed> cFeeds = new List<categoryFeed>();
cFeeds.Add(new categoryFeed { category = "Headlines", source = "http://timesofindia.feedsportal.com/c/33039/f/533965/index.rss" });
cFeeds.Add(new categoryFeed { category = "World", source = "http://timesofindia.feedsportal.com/c/33039/f/533917/index.rss" });
cFeeds.Add(new categoryFeed { category = "Business", source = "http://timesofindia.feedsportal.com/c/33039/f/533919/index.rss" });
cFeeds.Add(new categoryFeed { category = "Sports", source = "http://timesofindia.feedsportal.com/c/33039/f/533921/index.rss" });
cFeeds.Add(new categoryFeed { category = "Health", source = "http://timesofindia.feedsportal.com/c/33039/f/533968/index.rss" });
cFeeds.Add(new categoryFeed { category = "Tech", source = "http://timesofindia.feedsportal.com/c/33039/f/533923/index.rss" });
cFeeds.Add(new categoryFeed { category = "Entertainment", source = "http://timesofindia.feedsportal.com/c/33039/f/533928/index.rss" });
List<Feed> items = new List<Feed>();
foreach (categoryFeed cf in cFeeds)
{
items.AddRange(readFeed(cf.category,cf.source));
}
items.ForEach(x=>parseUrl(ref x,x.Uri));
XElement root=new XElement("Feeds");
foreach (XElement elm in getXElementFeed(items))root.Add(elm);
System.IO.File.WriteAllText("c:\\wow.xml",root.ToString());
}
static IEnumerable<XElement> getXElementFeed(List<Feed> feed)
{
foreach(Feed f in feed)
yield return ((new XElement("News", new XElement("Title", f.Title),
new XElement("Category", f.Category),
new XElement("Uri", f.Uri),
new XElement("ImageUri", f.ImageUri),
new XElement("Description", f.Description),
new XElement("Time", f.Time.ToString().ToString()))));

}
static List<Feed> readFeed(String category, String uri)
{
XDocument doc = XDocument.Load(uri);
return doc.Descendants("item").Select(x => new Feed { Title = x.Element("title").Value, Time = DateTime.Parse(x.Element("pubDate").Value), Uri = x.Element("guid").Value,Category=category }).ToList<Feed>();
}
static void parseUrl(ref Feed f,String source)
{
HtmlDocument doc = new HtmlWeb().Load(source);
var d = doc.DocumentNode.SelectSingleNode(@"//div[@id='artext1']");
f.Description = (d==null)?"":d.InnerText;
var attr=doc.DocumentNode.SelectSingleNode(@"//div[@class='mainimg1']//img");
f.ImageUri=(attr==null)?"":attr.Attributes["src"].Value;
}
}
class Feed
{
public String Title="", ImageUri="", Description="",Uri="",Category="";
public DateTime Time=DateTime.Now;
}
class categoryFeed
{
public String source, category;
}
}

50 line code to get all the current news form TimesOfIndia

Monday, December 16, 2013

TimesOfIndiaRipper