.Net生成站点sitemap供百度抓取的类和使用
我对sitemap介绍:百度的sitemap说明地址:http://zhanzhang.baidu.com/wiki/93#01SiteMap作用:利于百度搜索举例说明:https://www.xiaoyuani.com/AwardsIntroduce/AwardNews/20130609172732828125097a79b4e4ahttps://www.xiaoyuani.com/AwardsIntroduce/AwardNews/20130624102726828125093ba40f210https://www.xiaoyuani.com/AwardsIntroduce/AwardNews/201306241038078125000d72ba2d9fehttps://www.xiaoyuani.com/AwardsIntroduce/AwardNews/20130624104148890625018d09da3eb……………………………………………………………………………………………………像上面这类网址,如果网站某列表页面有出现链接,百度会抓取,但是一般的列表页面只出现最新的几个,百度自动搜索网站上的页面,只能搜索到:https://www.xiaoyuani.com/AwardsIntroduce/AwardNews,大部分带有id的详细页面还是不会出现,所以,我们生成所有详细内容页面的地址去给百度抓取,那么所有这些页面的内容就能被百度收录,自然在搜索时就能搜索出来了。主要内容官方说明:1.第一种格式样例:txt文本格式在一个txt文本列明需要向百度提交的链接地址,将txt文本文件通过站长平台进行提交http://www.example.com/repaste/101562698_5230191316.htmlhttp://www.example.com/repaste/101586283_5230215075.htmlhttp://www.example.com/repaste/101639435_5230310576.html此文本文件需要遵循以下指南:文本文件每行都必须有一个网址。网址中不能有换行。不应包含网址列表以外的任何信息。您必须书写完整的网址,包括 http。每个文本文件最多可包含 50,000 个网址,并且应小于10MB(10,485,760字节)。如果网站所包含的网址超过 50,000 个,则可将列表分割成多个文本文件,然后分别添加每个文件。文本文件需使用 UTF-8 编码或GBK编码。2.第二种格式样例:xml格式您先了解单个xml数据格式如下:<?xml version="1.0" encoding="utf-8"?>XML文件需以utf-8编码<urlset>必填,这是整个链接集合的定义入口,以<urlset>作为开始,</urlset>作为结束<url>必填,这是具体某一个链接的定义入口,每一条数据都要用<url>和</url>包含在里面。<loc>https://www.xiaoyuani.com/yoursite.html</loc>必填,页面链接地址,长度不得超过256字节。<lastmod>2009-12-14</lastmod>选填,用来指定该链接的最后更新时间<changefreq>daily</changefreq>选填,用这个标签告诉此链接可能会出现的更新频率<priority>0.8</priority>选填,可以不提交该标签,用来指定此链接相对于其他链接的优先权比值,此值定于0.0-1.0之间</url>必填,标签闭合</urlset>必填,标签闭合上述xml sitemap向百度提交了一个url:https://www.xiaoyuani.com/若有多条url,按照上述格式重复<url></url>之间的片断,列明所有url地址,打包到一个xml文件,向站长平台进行提交。3.第三种格式样例:Sitemap索引格式如需提交大量sitemap文件,则可将其列在sitemap索引文件中,然后将该索引文件提交。您无需分别提交每个文件。<?xml version="1.0" encoding="UTF-8" ?>XML文件需以utf-8编码<sitemapindex>必填,以 <sitemapindex> 开始标记作为开始,以 </sitemapindex> 结束标记作为结束<sitemap>必填,以<sitemap>标签提交一个子sitemap文件<loc>https://www.xiaoyuani.com/ext/xmlsitemap/add/201201/index_20120106.xml</loc>必填,识别sitemap的位置<lastmod>2012-01-06</lastmod>选填,识别相对sitemap文件的修改时间</sitemap>必填,标签闭合</sitemapindex>必填,标签闭合有多个sitemap,按上述格式重复<sitemap></sitemap>之间的片断,列明所有sitemap地址,向站长平台进行提交。如果网址超过50000条或文件大小超过10M,就要分隔成KinpanUrl1.xml、KinpanUrl2.xml……那么KinpanUrl.xml就是存放KinpanUrl1.xml、KinpanUrl2.xml……的路径,这个文件很小。在百度提交网址就是提交KinpanUrl.xml,百度会根据KinpanUrl.xml里面的地址去获取KinpanUrl1.xml、KinpanUrl2.xml……里面的网址。我这里做了另外的优化,把所有子站点的生成sitemap的链接放到一起,简化生成步骤,还加入了10分钟只能生成一次的限制,防止恶意生成,因为生成sitemap就是要把整个数据库的内容对应的要显示的页面都查找出来告诉百度,数据库查询比较耗资源。//生成sitemap的类/// <summary>/// 创建站点sitemap的类/// </summary>public class CreateSiteMapTxt{public CreateSiteMapTxt() { }private static CreateSiteMapTxt _Instance = null;/// <summary>/// 静态对象实例/// </summary>public static CreateSiteMapTxt Instance{get{if (_Instance == null) _Instance = new CreateSiteMapTxt();return _Instance;}}#region 创建站点的sitemap网址导航文件/// <summary>/// 创建站点的sitemap网址导航文件/// </summary>/// <returns>/// 成功返回空,失败返回错误信息/// </returns>public string CreateTxt(){try{ConfigSiteUrl site = GetSiteByDefaultControl();switch (site){case ConfigSiteUrl.KinpanUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页list.Add("/InfoNews");list.Add("/InfoNews?typeID=1");list.Add("/InfoNews?typeID=2");NewsTopicBLL bll = new NewsTopicBLL();List<string> listtemp = bll.GetIDs(1);foreach (string id in listtemp){list.Add("/NewsDescription/Index/" + id);}bll.Dispose();createSiteXml(list, site);#endregion}break;case ConfigSiteUrl.KinpanAwardUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页int sessionAward = new TangArt.GoldOnPubPlatform.App.Models.XmlHandler().GetXmlSession("~/Upload/Session.xml");list.Add("/AwardsIntroduce/");//本届金盘奖页面for (int i = 1; i < sessionAward; i++){list.Add("/Awardsreview/Index/" + i);//历届金盘奖页面}list.Add("/Enroll");//评选申报表list.Add("/audienceReg");//观众报名list.Add("/CurrentIntroduce/");//关于金盘奖NewsTopicBLL bll = new NewsTopicBLL();List<string> listtemp = bll.GetIDs(2);//金盘奖新闻foreach (string id in listtemp){list.Add("/AwardsIntroduce/AwardNews/" + id);}bll.Dispose();GuestinformationBLL GuestInfo_Bll = new BLL.GuestinformationBLL();var guest_info = GuestInfo_Bll.GetServalRecoard(0, 1, sessionAward.ToString());//本届嘉宾foreach (var item in guest_info){list.Add("/Guest/Index/" + item.ID);}var ActivityLive = GuestInfo_Bll.GetServalRecoard(0, 2, (sessionAward - 1).ToString());//11表示获取11条数据,2表示活动现场,session表示当前届数foreach (var item in ActivityLive){list.Add("/Guest/Index/" + item.ID);}GuestInfo_Bll.Dispose();AwardSubClasicBLL AC_BLL = new AwardSubClasicBLL();var SubClasic = AC_BLL.GetByRefAwardClassMainID(sessionAward, 0);for (int i = 0; i < SubClasic.Count; i++){list.Add("/WebVote/Index/" + (i + 1));}AC_BLL.Dispose();ActivityFlowClassificationBLL activity_bll = new BLL.ActivityFlowClassificationBLL();var AwardWeek = activity_bll.GetAwardWeek(12, sessionAward);//12表示获取活动周新闻,session表示届数foreach (var item in AwardWeek){list.Add("/AwardsIntroduce/AwardDescription/" + item.ID);}activity_bll.Dispose();kinpanAwardProjectBLL A_BLL = new kinpanAwardProjectBLL();var kinpanAwardProjects = A_BLL.GetAll();foreach (var item in kinpanAwardProjects){list.Add("/WebVoteDetails/Index/" + item.ID);//所有金盘奖项目}A_BLL.Dispose();createSiteXml(list, site);#endregion}break;case ConfigSiteUrl.TopicUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页list.Add("/TopicList");list.Add("/TopicList?hotOrNew=1");TopicInformationBLL iTopicInformationBll = new TopicInformationBLL();var listtemp = iTopicInformationBll.GetAll();foreach (var item in listtemp){list.Add("/TopicDetail/Index/" + item.ID);}iTopicInformationBll.Dispose();createSiteXml(list, site);#endregion}break;case ConfigSiteUrl.ExhibitionUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页list.Add("/Aboutus?id=1&classID=2013041714244184765625a6ac8f1f3");//展会介绍list.Add("/AboutUs?id=1&classID=2013041714245473828129b784c41ac");//关于我们list.Add("/ExhibitorsReg?id=2&classID=201304171425249550781926eb9025e");//参展商登记list.Add("/ExhibiWeChoose?id=2&classID=2013041714251700683594951e43c45");//为什么选择我们list.Add("/ActivitiesRow?id=2&classID=201304171425317929687756f9fd18e");//活动安排list.Add("/ExhibiWeChoose?id=2&classID=2013041714253846191409b4a796120");//展位图list.Add("/ExhibiWeChoose?id=2&classID=20130417142545844726507e95c8997");//收费标准list.Add("/ExhibiWeChoose?id=2&classID=201304171425531689453bac4bc46ee");//广告赞助list.Add("/ExhibitionManual?id=2&classID=201304171425598222656df75fa0cef");//参展商手册list.Add("/ExhibiReg?id=3&classID=201304171426340244140e4260ea374");//参观登记list.Add("/ExhibitionActivity?id=3&classID=201304171426268144531394568690f");//展会活动list.Add("/hotelService?id=3&classID=20130417142648191406298eae94aa0");//酒店交通list.Add("/hotelService?id=3&classID=2013041714264146289066ff9c76a91");//现场服务list.Add("/ExhibitionDownLoad?id=5");//下载专区list.Add("/ContactUs?id=7&classID=2013061816191867506073b036b8afc");//联系我们list.Add("/NewsDynamic?id=4");//新闻媒体list.Add("/NewsDynamic?id=4&newsType=1&classID=2013041714271025195311627d7a0f6");//展会动态list.Add("/NewsDynamic?id=4&newsType=2&classID=201304171427171025390e6c540a6fe");//行业新闻list.Add("/NewsMedia?id=4&classID=201304171427242744140985b276cbe");//合作媒体ExhibitionNewsBLL exh_newsBll = new ExhibitionNewsBLL();var listtemp = exh_newsBll.GetAll();//展会的新闻foreach (var item in listtemp){list.Add("/Exhibition/ExhibitionNewsDescription/" + item.ID);}exh_newsBll.Dispose();createSiteXml(list, site);#endregion}break;case ConfigSiteUrl.MagazineUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页list.Add("/Magazine/Magazine/sdlp/");//时代楼盘list.Add("/Magazine/MagazineReView/sdlp/");list.Add("/Magazine/MagazineReaderBack/sdlp/");list.Add("/Magazine/MagazineActivity/sdlp/");list.Add("/Magazine/Magazine/sdkj/");//时代空间list.Add("/Magazine/MagazineReView/sdkj/");list.Add("/Magazine/MagazineReaderBack/sdkj/");list.Add("/Magazine/MagazineActivity/sdkj/");list.Add("/Magazine/Magazine/chyue/");//超越list.Add("/Magazine/MagazineReView/chyue/");list.Add("/Magazine/MagazineReaderBack/chyue/");list.Add("/Magazine/MagazineActivity/chyue/");MagazineBookInfoBLL magazine_bll = new BLL.MagazineBookInfoBLL();var sdlp = magazine_bll.GetIDsByTag("sdlp");foreach (var item in sdlp){list.Add("/Magazine/MagazineEachperiod/sdlp?id=" + item);}var sdkj = magazine_bll.GetIDsByTag("sdkj");foreach (var item in sdkj){list.Add("/Magazine/MagazineEachperiod/sdkj?id=" + item);}var chyue = magazine_bll.GetIDsByTag("chyue");foreach (var item in chyue){list.Add("/Magazine/MagazineEachperiod/chyue?id=" + item);}magazine_bll.Dispose();createSiteXml(list, site);#endregion}break;case ConfigSiteUrl.LiveUrl:{#region 添加网址List<string> list = new List<string>();list.Add("/");//主页list.Add("/livevideo");//视频直播AwardLivePictureBLL AP_BLL = new BLL.AwardLivePictureBLL();var Videos = AP_BLL.GetOrderByTime();//直播视频列表foreach (var item in Videos){list.Add("/LiveVideo/index/" + item.ID);}AP_BLL.Dispose();createSiteXml(list, site);#endregion}break;}return "";}catch (Exception ex){// throw ex;return ex.Message;}}private void createSiteXml(List<string> list, ConfigSiteUrl site){string domain = ConfigurationManager.AppSettings[site.ToString()];string SiteMapIndexPath = HttpContext.Current.Server.MapPath("/Upload/SiteMap/" + site.ToString() + ".xml");if (File.Exists(SiteMapIndexPath)){FileInfo F_sitemapindex = new FileInfo(SiteMapIndexPath);if (F_sitemapindex.LastWriteTime.AddMinutes(10) > DateTime.Now){throw new Exception(string.Format("上次生成时间:{0},两次生成时间至少相隔10分钟,请不要生成太频繁!", F_sitemapindex.LastWriteTime));}}int allcount = list.Count;XmlDocument doc_index = new XmlDocument();XmlDeclaration dec_index = doc_index.CreateXmlDeclaration("1.0", "utf-8", null);doc_index.AppendChild(dec_index);XmlElement sitemapindex = doc_index.CreateElement("sitemapindex");XmlDocument doc = null;XmlElement Root = null;int cur_mod = 0;const int sitecountPerFile = 46000;//大概是10M,每个文件最多50000个网址int j = 1;for (int i = 0; i < allcount; i++)//循环创建xml网址文件{cur_mod = (i + 1) % sitecountPerFile;//if (cur_mod == 1)//第一个,创建新的xml文件{doc = new XmlDocument();XmlDeclaration dec = doc.CreateXmlDeclaration("1.0", "utf-8", null);doc.AppendChild(dec);Root = doc.CreateElement("urlset");}XmlElement url = doc.CreateElement("url");//一个网址的节点XmlElement url_1_loc = doc.CreateElement("loc");url_1_loc.InnerText = domain + list[i];//页面地址XmlElement url_2_lastmod = doc.CreateElement("lastmod");url_2_lastmod.InnerText = DateTime.Now.ToString();//内容最后更新时间XmlElement url_3_changefreq = doc.CreateElement("changefreq");url_3_changefreq.InnerText = "daily";//可能更新的频率XmlElement url_4_priority = doc.CreateElement("priority");url_4_priority.InnerText = "1";//权重(0~1之间)url.AppendChild(url_1_loc);url.AppendChild(url_2_lastmod);url.AppendChild(url_3_changefreq);url.AppendChild(url_4_priority);Root.AppendChild(url);if (cur_mod == 0 || i == allcount - 1)//最后一个,保存xml网址文件{doc.AppendChild(Root);string SiteMapDir = HttpContext.Current.Server.MapPath("/Upload/SiteMap");Common.Upload.CreateDir(SiteMapDir);string sitemap_fileName = site.ToString() + j + ".xml";string path = Path.Combine(SiteMapDir, sitemap_fileName);doc.Save(path);//sitemap文件XmlElement sitemap = doc_index.CreateElement("sitemap");XmlElement loc_index = doc_index.CreateElement("loc");loc_index.InnerText = domain + "/Upload/SiteMap/" + sitemap_fileName;//sitemap地址XmlElement lastmod_index = doc_index.CreateElement("lastmod");lastmod_index.InnerText = DateTime.Now.ToString();//内容最后更新时间sitemap.AppendChild(loc_index);sitemap.AppendChild(lastmod_index);sitemapindex.AppendChild(sitemap);j++;}}doc_index.AppendChild(sitemapindex);doc_index.Save(SiteMapIndexPath);//sitemap索引文件}#endregion#region 根据配置文件获取当前网站的子站域名/// <summary>/// 根据配置文件获取当前网站的子站域名/// </summary>/// <returns></returns>public static ConfigSiteUrl GetSiteByDefaultControl(){string DefaultControl = ConfigurationManager.AppSettings["DefaultControl"].ToLower();switch (DefaultControl){case "index":return ConfigSiteUrl.KinpanUrl;case "awards":return ConfigSiteUrl.KinpanAwardUrl;case "exhibition":return ConfigSiteUrl.ExhibitionUrl;case "magazine":return ConfigSiteUrl.MagazineUrl;case "onlinepub":return ConfigSiteUrl.TopicUrl;case "awardlivehome":return ConfigSiteUrl.LiveUrl;default:return ConfigSiteUrl.Default;}}#endregion//调用,我这个是分站生成的,在web.config中的appsettings下面配置各子站点的网址:bool isright = false;string str = "";if (Request.UrlReferrer != null){string host = Request.UrlReferrer.Host.ToLower();if (host=="localhost"||host.EndsWith("test.com"))//自己网页上链接过来的才生成,防止恶意生成。{isright = true;}}if (isright){string Info = CreateSiteMapTxt.Instance.CreateTxt();if (Info == ""){str = "生成成功" + DateTime.Now.ToString() + "<br/><br/>";ConfigSiteUrl site = CreateSiteMapTxt.GetSiteByDefaultControl();str = str + "当前生成的sitemap文件:" + ConfigurationManager.AppSettings[site.ToString()] + "/Upload/SiteMap/" + site.ToString() + ".xml <br/><br/><br/>";}else{str = "失败:" + Info + DateTime.Now.ToString() + "<br/><br/>";}}else{str = "请点击下面的链接生成sitemap文件" + DateTime.Now.ToString() + "<br/><br/>";}List<ConfigSiteUrl> list = new List<ConfigSiteUrl>();list.Add(ConfigSiteUrl.ExhibitionUrl);list.Add(ConfigSiteUrl.KinpanAwardUrl);list.Add(ConfigSiteUrl.KinpanUrl);list.Add(ConfigSiteUrl.LiveUrl);list.Add(ConfigSiteUrl.TopicUrl);list.Add(ConfigSiteUrl.MagazineUrl);str = str + string.Format("<font color='red'>提示:</font><br/>");foreach (var site in list){string siteurl = ConfigurationManager.AppSettings[site.ToString()];str = str + string.Format("网址:{0} <br/>sitemap文件地址:{1} <a href='{0}/index/SiteMap'>点击生成</a><br/><br/>", siteurl, siteurl + "/Upload/SiteMap/" + site.ToString() + ".xml");}string thisurl = Request.Url.AbsolutePath;str = str +string.Format("<a href='{0}'>点击生成当前站点sitemap</a><br/><br/>",thisurl);