00001 using System;
00002 using System.Text;
00003 using System.Collections.Specialized;
00004 using System.Text.RegularExpressions;
00005
00006 namespace Common
00007 {
00011 public class ImageUriParser
00012 {
00013
00014 public ImageUriParser() {}
00015
00022 public static HTTPRequest[] GenerateImageRequests(MessageLogger msgLog, HTTPRequest originalRequest, HTTPResponse htmlResponse){
00023 Uri[] imageUris = GetImageUris(msgLog, originalRequest.URI, htmlResponse.Body.data);
00024 HTTPRequest [] requests = new HTTPRequest[imageUris.Length];
00025 for(int i = 0; i < requests.Length; i++){
00026
00027 try {
00028 if (imageUris[i] != null) {
00029 requests[i] = new HTTPRequest(originalRequest, imageUris[i]);
00030 }
00031 } catch (Exception ex) {
00032 msgLog.LogError("Unable to create image request for {0} : {1}", imageUris[i], ex.Message);
00033 requests[i] = null;
00034 }
00035 }
00036 return requests;
00037 }
00038
00045 public static Uri[] GetImageUris(MessageLogger msgLog, Uri baseUri, byte[] htmlAsASCIIBytes) {
00046 string htmlString = Encoding.ASCII.GetString(htmlAsASCIIBytes);
00047 StringCollection imageStrings = new StringCollection();
00048 Regex r;
00049 Match m;
00050
00051 r = new Regex("<\\s*?img.*?src\\s*?=\\s*?(?:\"(?<1>[^\"]*)\"|(?<1>\\S+)).*?>", RegexOptions.IgnoreCase|RegexOptions.IgnorePatternWhitespace|RegexOptions.Compiled|RegexOptions.Singleline);
00052
00053 for (m = r.Match(htmlString); m.Success; m = m.NextMatch()) {
00054 string s = m.Groups[1].Value;
00055 s.Trim(new char[] {'\n','\r','\f'});
00056 imageStrings.Add(s);
00057 }
00058 msgLog.Log("Parse found {0} images before duplicate removal", imageStrings.Count);
00059
00060 StringCollection uniqueStrings = new StringCollection();
00061 for (int i = 0; i < imageStrings.Count; i++) {
00062 if (!uniqueStrings.Contains(imageStrings[i])) {
00063 uniqueStrings.Add(imageStrings[i]);
00064 }
00065 }
00066
00067 Uri[] result = new Uri[uniqueStrings.Count];
00068 for (int i = 0; i < uniqueStrings.Count; i++) {
00069 try {
00070 result[i] = new Uri(baseUri, uniqueStrings[i], true);
00071
00072
00073 } catch (UriFormatException) {
00074 msgLog.LogError("Dropping this parse and push IMG - bad URI");
00075 }
00076 }
00077 return result;
00078 }
00079 }
00080 }