Shabat Closer

Friday, February 8, 2013

C#: extract all domains from text

simple function to find all domain in text:

using : System.Text.RegularExpressions (regex)

public void get_all_domains(string text) {
const string MatchEmailPattern =
@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)";
Regex rx = new Regex(MatchEmailPattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Find matches.
MatchCollection matches = rx.Matches(text);
// Report the number of matches found.
int noOfMatches = matches.Count;
// Report on each match.
foreach (Match match in matches) {
string dst_domain=match.Value.ToString();
if (dst_domain == "")
continue;
      //do something with  dst_domain like :
      //DB.Insert(dst_domain);
}
} 

No comments:

Post a Comment