.

URI manipulation

use a regex: const string LocalLinkRegex = @”href[\s]*=[\s]*[""'][.\/]?[\w\.?=%&=\-@/$,]+[""']“;

//normally it’s: const string WWWRegex = @”(?<Protocol>\w+):\/\/(?<Domain>[\w@][\w.:@]+)\/?[\w\.?=%&=\-@/$,]*”;

            R = new Regex(LocalLinkRegex, RegexOptions.IgnoreCase);
            Matches = R.Matches(wpr.PageContent);

if e.g. :

 url = ‘http://msdn.microsoft.com/en-us/magazine/cc300497.aspx’ and

 sss = ‘href=”hh227291.aspx”‘

then UUU.AbsoluteUri  will be ‘http://msdn.microsoft.com/en-us/magazine/hh227291.aspx’

also works for sss = ‘href=”../hh227291.aspx”‘ or ‘href=”/hh227291.aspx”‘


string sss = Match.ToString();
Uri UUU;
try
{
    if (url == "")
        UUU = new Uri(sss.ToLower());
    else
    {
        int offsetFirstQuote = sss.IndexOf("\"");
        if (offsetFirstQuote == -1)
            offsetFirstQuote = sss.IndexOf("\'");

        int offsetLastQuote = sss.LastIndexOf("\"");
        if (offsetLastQuote == -1)
            offsetLastQuote = sss.LastIndexOf("\'");

        UUU = new Uri(new Uri(url), sss.ToLower().Substring(offsetFirstQuote+1,offsetLastQuote-offsetFirstQuote-1));
    }
}
catch { continue; }

What's your thoughts on this?

*

Protected by WP Anti Spam