Subject | spoon | krijnhoetmer | gruber | gruber revised | cowboy | jeffrey friedl | mattfarina | stephenhay | stephenhay revised | scottgonzales | rodneyrehm | rodneyrehm revised | imme_emosol | imme_emosol ht-&f-tp(s) | filter_var | parse_url |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
URL verification (on URLs) | ||||||||||||||||
http://foo.com/blah_blah | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://foo.com/blah_blah/ | maybe | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://foo.com/blah_blah_(wikipedia) | maybe | maybe | passed | passed | passed | maybe | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed |
http://foo.com/blah_blah_(wikipedia)_(again) | maybe | maybe | maybe | passed | passed | maybe | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed |
http://www.example.com/wpstyle/?p=364 | passed | passed | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
https://www.example.com/foo/?bar=baz&inga=42&quux | passed | passed | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://✪df.ws/123 | failed | failed | passed | passed | passed | maybe | failed | passed | passed | passed | passed | passed | passed | passed | failed | passed |
http://userid:password@example.com:8080 | failed | passed | passed | passed | passed | failed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://userid:password@example.com:8080/ | failed | passed | passed | passed | passed | failed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://userid@example.com | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://userid@example.com/ | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://userid@example.com:8080 | failed | passed | passed | passed | passed | failed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://userid@example.com:8080/ | failed | passed | passed | passed | passed | failed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://userid:password@example.com | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://userid:password@example.com/ | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://192.168.1.1/ | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://192.168.1.1:8080/ | failed | passed | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://➡.ws/䨹 | failed | failed | passed | passed | passed | failed | failed | passed | passed | passed | failed | passed | passed | passed | failed | passed |
http://⌘.ws | failed | failed | passed | passed | passed | failed | failed | passed | passed | passed | passed | passed | passed | passed | failed | passed |
http://⌘.ws/ | failed | failed | passed | passed | passed | failed | failed | passed | passed | passed | passed | passed | passed | passed | failed | passed |
http://foo.com/blah_(wikipedia)#cite-1 | maybe | passed | maybe | passed | passed | maybe | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://foo.com/blah_(wikipedia)_blah#cite-1 | maybe | passed | maybe | passed | passed | maybe | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://foo.com/unicode_(✪)_in_parens | maybe | maybe | maybe | passed | passed | maybe | failed | passed | passed | passed | passed | passed | passed | passed | failed | passed |
http://foo.com/(something)?after=parens | maybe | passed | maybe | passed | passed | maybe | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
http://☺.damowmow.com/ | failed | passed | passed | passed | passed | failed | failed | passed | passed | passed | passed | passed | passed | passed | failed | passed |
http://code.example.com/events/#&product=browser | maybe | passed | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://j.mp | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed |
ftp://foo.bar/baz | failed | failed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed |
torrent://foo.bar/baz | failed | failed | passed | passed | passed | failed | passed | failed | failed | passed | failed | passed | passed | failed | passed | passed |
image://foo.bar:993 | failed | failed | passed | passed | passed | failed | passed | failed | failed | passed | failed | passed | passed | failed | passed | passed |
irc://foo.bar:6667 | failed | failed | passed | passed | passed | failed | passed | failed | failed | passed | failed | passed | passed | failed | passed | passed |
URL verification (on non URLs, "passed" being good, "failed" being "false positive") | ||||||||||||||||
rdar://1234 | passed | passed | failed | failed | failed | passed | failed | failed | passed | failed | passed | failed | passed | passed | failed | failed |
http:// | passed | passed | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://. | passed | passed | passed | passed | failed | passed | failed | passed | passed | failed | passed | passed | passed | passed | passed | failed |
http://.. | passed | passed | passed | passed | failed | passed | failed | passed | passed | failed | passed | passed | passed | passed | passed | failed |
http://../ | passed | failed | failed | failed | failed | passed | failed | passed | passed | failed | failed | passed | passed | passed | passed | failed |
http://? | passed | passed | passed | passed | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://?? | passed | passed | passed | passed | failed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed |
http://??/ | passed | failed | failed | failed | failed | passed | passed | passed | passed | failed | failed | passed | passed | passed | passed | failed |
http://# | passed | passed | passed | failed | failed | passed | passed | passed | passed | failed | failed | passed | passed | passed | passed | passed |
http://## | passed | failed | passed | failed | failed | passed | passed | passed | passed | failed | failed | passed | passed | passed | passed | passed |
http://##/ | passed | failed | failed | failed | failed | passed | passed | passed | passed | failed | failed | passed | passed | passed | passed | failed |
http://foo.bar?q=Spaces should be encoded | passed | failed | failed | failed | failed | failed | passed | failed | passed | passed | failed | failed | passed | passed | passed | failed |
// | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | failed |
//a | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | failed |
///a | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | failed |
/// | passed | passed | passed | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | failed |
http:///a | passed | failed | failed | failed | failed | passed | passed | failed | passed | failed | failed | passed | passed | passed | passed | passed |
foo.com | passed | passed | passed | passed | failed | passed | passed | passed | passed | passed | passed | passed | passed | passed | passed | failed |
http://-a.b.co | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | passed | passed | passed | failed |
http://a.b-.co | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | failed | passed | passed | failed | failed |
URL scanning in natural text | ||||||||||||||||
what about (http://example.com) those? | passed | maybe | passed | passed | maybe | passed | failed | failed | failed | maybe | maybe | maybe | failed | failed | failed | maybe |
http://example.com?la#mc. nice site | maybe | passed | passed | passed | maybe | maybe | failed | maybe | failed | failed | passed | passed | failed | failed | failed | maybe |
"http://example.com?la#mc-" looks bad | maybe | failed | maybe | passed | maybe | maybe | failed | failed | failed | failed | passed | passed | failed | failed | failed | maybe |
www.example.com?la#mc-" is off | failed | failed | maybe | passed | maybe | failed | failed | failed | failed | failed | passed | passed | failed | failed | failed | maybe |
ftp://foo.example.com?la#mc-" is oldschool | maybe | failed | maybe | passed | maybe | maybe | failed | failed | failed | failed | passed | passed | failed | failed | failed | maybe |
yeah, looks nice www.example.org? | failed | failed | passed | passed | maybe | passed | failed | failed | failed | failed | passed | passed | failed | failed | failed | maybe |
what about IDN? http://✪df.ws/123 | failed | failed | passed | passed | passed | maybe | failed | failed | failed | passed | passed | passed | passed | passed | failed | maybe |
do you speak IRC? irc://foo:6667 | failed | failed | passed | passed | passed | failed | failed | failed | failed | passed | failed | passed | passed | failed | failed | maybe |
Name | Characters | Expression |
---|---|---|
spoon | 979 | /(((http|ftp|https):\/{2})+(([0-9a-z_-]+\.)+(aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx|cy|cz|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mn|mo|mp|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|nom|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ra|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw|arpa)(:[0-9]+)?((\/([~0-9a-zA-Z\#\+\%@\.\/_-]+))?(\?[0-9a-zA-Z\+\%@\/&\[\];=_-]+)?)?))\b/imuS |
krijnhoetmer | 114 | _(^|[\s.:;?\-\]<\(])(https?://[-\w;/?:@&=+$\|\_.!~*\|'()\[\]%#,☺]+[\w/#](\(\))?)(?=$|[\s',\|\(\).:;?\-\[\]>\)])_iS |
gruber | 71 | #\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))#iS |
gruber revised | 208 | #(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))#iS |
cowboy | 1241 | ~(?:\b[a-z\d.-]+://[^<>\s]+|\b(?:(?:(?:[^\s!@#$%^&*()_=+[\]{}\|;:'",.<>/?]+)\.)+(?:ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|coop|com|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)|(?:(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]))(?:[;/][^#?<>\s]*)?(?:\?[^#<>\s]*)?(?:#[^<>\s]*)?(?!\w))~iS |
jeffrey friedl | 241 | @\b((ftp|https?)://[-\w]+(\.\w[-\w]*)+|(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+(?: com\b|edu\b|biz\b|gov\b|in(?:t|fo)\b|mil\b|net\b|org\b|[a-z][a-z]\b))(\:\d+)?(/[^.!,?;"'<>()\[\]{}\s\x7F-\xFF]*(?:[.!,?]+[^.!,?;"'<>()\[\]{}\s\x7F-\xFF]+)*)?@iS |
mattfarina | 287 | /^([a-z][a-z0-9\*\-\.]*):\/\/(?:(?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)*(?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@)?(?:(?:[a-z0-9\-\.]|%[0-9a-f]{2})+|(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\]))(?::[0-9]+)?(?:[\/|\?](?:[\w#!:\.\?\+=&@!$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})*)?$/xiS |
stephenhay | 31 | @^[httprads:]*\/\/[^$.?#].*$@iS |
stephenhay revised | 34 | @^[hftps]*:\/\/[^/$.?#].[^\s]*$@iS |
scottgonzales | 1347 | #([a-z]([a-z]|\d|\+|-|\.)*):(\/\/(((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?((\[(|(v[\da-f]{1,}\.(([a-z]|\d|-|\.|_|~)|[!\$&'\(\)\*\+,;=]|:)+))\])|((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|(([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=])*)(:\d*)?)(\/(([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*|(\/((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)|((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)|((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)){0})(\?((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\xE000-\xF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\x00A0-\xD7FF\xF900-\xFDCF\xFDF0-\xFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?#iS |
rodneyrehm | 107 | #((https?://|ftp://|www\.|[^\s:=]+@www\.).*?[a-z_\/0-9\-\#=&])(?=(\.|,|;|\?|\!)?("|'|«|»|\[|\s|\r|\n|$))#iS |
rodneyrehm revised | 111 | #(([a-z]+://|www\.|[^\s:=]+@www\.)([^/].*?[a-z0-9].*?)([a-z_\/0-9\-\#=&]|))(?=[\.,;\?\!]?(["'«»\[\s\r\n]|$))#iS |
imme_emosol | 72 | @(https?|ftp|torrent|image|irc)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS |
imme_emosol ht-&f-tp(s) | 54 | @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS |
filter_var | PHP core function, not a Regular expression! | |
parse_url | PHP core function, not a Regular expression! |