Optimise _split_url for speed.
Given it's called for every request it seemed a logical choice.
Crude benchmark directly calling the sub showed a 30% improvement.
- Store $authority directly in $host, avoids the copy in
the likely scenario of there being no auth.
- Use index rather than a regex to detect auth in the host.
- If that found an @ we can now directly chop up the host
using substr, no repeated searching for @.
- Avoid the redundant do block in the port ternary.
- Only lowercase the host at return time.
James Raspass authored 10 years ago
David Golden committed 9 years ago
765 | 765 | my $url = pop; |
766 | 766 | |
767 | 767 | # URI regex adapted from the URI module |
768 | my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> | |
768 | my ($scheme, $host, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> | |
769 | 769 | or die(qq/Cannot parse URL: '$url'\n/); |
770 | 770 | |
771 | 771 | $scheme = lc $scheme; |
772 | 772 | $path_query = "/$path_query" unless $path_query =~ m<\A/>; |
773 | 773 | |
774 | my ($auth,$host); | |
775 | $authority = (length($authority)) ? $authority : 'localhost'; | |
776 | if ( $authority =~ /@/ ) { | |
777 | ($auth,$host) = $authority =~ m/\A([^@]*)@(.*)\z/; # user:pass@host | |
774 | my $auth = ''; | |
775 | if ( (my $i = index $host, '@') != -1 ) { | |
776 | # user:pass@host | |
777 | $auth = substr $host, 0, $i, ''; # take up to the @ for auth | |
778 | substr $host, 0, 1, ''; # knock the @ off the host | |
779 | ||
778 | 780 | # userinfo might be percent escaped, so recover real auth info |
779 | 781 | $auth =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; |
780 | 782 | } |
781 | 783 | else { |
782 | $host = $authority; | |
783 | $auth = ''; | |
784 | } | |
785 | $host = lc $host; | |
786 | my $port = do { | |
787 | $host =~ s/:([0-9]*)\z// && length $1 | |
788 | ? $1 | |
789 | : ($scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef); | |
790 | }; | |
791 | ||
792 | return ($scheme, $host, $port, $path_query, $auth); | |
784 | $host = length $host ? $host : 'localhost'; | |
785 | } | |
786 | my $port = $host =~ s/:(\d*)\z// && length $1 ? $1 | |
787 | : $scheme eq 'http' ? 80 | |
788 | : $scheme eq 'https' ? 443 | |
789 | : undef; | |
790 | ||
791 | return ($scheme, lc $host, $port, $path_query, $auth); | |
793 | 792 | } |
794 | 793 | |
795 | 794 | # Date conversions adapted from HTTP::Date |