A regular expression based URL extractor which extracts URLs from text.
Thanks to Daniel Martí invests the project mvdan/xurls. This python project developed by the same concept as the golang version.
# the alternative is regex as engine if you suffered installing on re2
pip install google-re2 pyxurls
import xurls
extractor = xurls.Strict()
url = extractor.findfirst('we have the link with scheme https://www.python.org and https://www.github.com')
# https://www.python.org
urls = extractor.findall('we have the link with scheme https://www.python.org and https://github.com')
# ['https://www.python.org', 'https://github.com']
import xurls
extractor = xurls.Relaxed()
url = extractor.findfirst('we have the link with scheme www.python.org and https://www.github.com')
# www.python.org
urls = extractor.findall('we have the link with scheme www.python.org and https://github.com')
# ['www.python.org', 'https://github.com']
import xurls
# limit to https
extractor = xurls.StrictScheme('https://')
url = extractor.findfirst('we have the link with scheme custom://domain.com and https://www.python.org noscheme.com')
# https://www.python.org
# unlimit to standard scheme
extractor = xurls.StrictScheme(xurls.express.ANY_SCHEME)
urls = extractor.findall('we have the link with scheme custom://domain.com and https://www.python.org noscheme.com')
# ['custom://domain.com', 'https://www.python.org']