by pietman
6. June 2011 18:12
use a regex: const string LocalLinkRegex = @"href[\s]*=[\s]*[""'][.\/]?[\w\.?=%&=\-@/$,]+[""']";
//normally it's: const string WWWRegex = @"(?<Protocol>\w+):\/\/(?<Domain>[\w@][\w.:@]+)\/?[\w\.?=%&=\-@/$,]*";
R = new Regex(LocalLinkRegex, RegexOptions.IgnoreCase);
Matches = R.Matches(wpr.PageContent);
if e.g. :
url = 'http://msdn.microsoft.com/en-us/magazine/cc300497.aspx' and
sss = 'href="hh227291.aspx"'
then UUU.AbsoluteUri will be 'http://msdn.microsoft.com/en-us/magazine/hh227291.aspx'
also works for sss = 'href="../hh227291.aspx"' or 'href="/hh227291.aspx"'
string sss = Match.ToString();
Uri UUU;
try
{
if (url == "")
UUU = new Uri(sss.ToLower());
else
{
int offsetFirstQuote = sss.IndexOf("\"");
if (offsetFirstQuote == -1)
offsetFirstQuote = sss.IndexOf("\'");
int offsetLastQuote = sss.LastIndexOf("\"");
if (offsetLastQuote == -1)
offsetLastQuote = sss.LastIndexOf("\'");
UUU = new Uri(new Uri(url), sss.ToLower().Substring(offsetFirstQuote+1,offsetLastQuote-offsetFirstQuote-1));
}
}
catch { continue; }
b29b4088-c45d-4216-a16b-67b6f8cd33ad|0|.0
Tags:
c#