repo: uritools action: commit revision: path_from: revision_from: 6f402e2d2f052972886712f60d592684c8671982: path_to: revision_to:
commit 6f402e2d2f052972886712f60d592684c8671982 Author: epochDate: Sat Apr 20 05:32:27 2019 -0500 rebased on an old copy of this repo. renamed everything. rewrote the uri parser. added uricmp. wew. diff --git a/.gitignore b/.gitignore
--- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ -matchurl -cuturl +uricmp +uricut +uriescape +urimatch +uriunescape diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1,26 +1,28 @@
-CFLAGS=-std=c99 -pedantic -Wall
-PREFIX=/usr/local
-CC=gcc
+CFLAGS:=-std=c11 -pedantic -Wall
+PREFIX:=/usr/local
+CC:=gcc
-all: cuturl matchurl urlunescape urlescape
+all: uricut urimatch uriunescape uriescape uricmp
-matchurl: matchurl.c url.h
+urimatch: urimatch.c uri.h
-cuturl: cuturl.c url.h
+uricut: uricut.c uri.h
-urlunescape: urlunescape.c url.h
+uricmp: uricmp.c uri.h
-urlescape: urlescape.c url.h
+uriunescape: uriunescape.c uri.h
+
+uriescape: uriescape.c uri.h
clean:
- rm -f matchurl
- rm -f cuturl
+ rm -f uricut urimatch uriunescape uriescape uricmp
rm -f *.o
install: all
- install matchurl $(PREFIX)/bin/matchurl
- install cuturl $(PREFIX)/bin/cuturl
- install start $(PREFIX)/bin/start
- install printfurl $(PREFIX)/bin/printfurl
- install urlunescape $(PREFIX)/bin/urlunescape
- install urlescape $(PREFIX)/bin/urlescape
+ install urimatch $(PREFIX)/bin/urimatch
+ install uricut $(PREFIX)/bin/uricut
+ install uricmp $(PREFIX)/bin/uricmp
+ install uristart $(PREFIX)/bin/uristart
+ install uriprintf $(PREFIX)/bin/uriprintf
+ install uriunescape $(PREFIX)/bin/uriunescape
+ install uriescape $(PREFIX)/bin/uriescape
diff --git a/start b/start
deleted file mode 100755
index 9a3dca6d73bd103418db6840c472f62275e593cb..0000000000000000000000000000000000000000
--- a/start
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-scheme="$(printf "%s\n" "$1" | cuturl -s)"
-line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)"
-eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")"
diff --git a/start.conf.example b/start.conf.example
deleted file mode 100644
index 6864da3afda485f5232cb8c8012db1405d70e2ed..0000000000000000000000000000000000000000
--- a/start.conf.example
+++ /dev/null
@@ -1,8 +0,0 @@
-### start single-quote escapes the url parts so place them inside single-quote or else!
-### (if you're given a bad link someone might be able to run shell commands)
-finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file -
-### new! subshells works
-whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file -
-irc:x-terminal-emulator -e irssi -c '%d' -p '%P'
-http:dillo '%U'
-DEFAULT:xdg-open '%U'
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000000000000000000000000000000000000..97ce3c27e28338149c620814167244da3fc3ec71
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,253 @@
+#ifndef uri_H
+#define uri_H
+
+#define _XOPEN_SOURCE 500 //for strdup
+#include
+#include
+#include
+#include
+
+//uri_reserved = gen-delims / sub-delims
+#define pe_gen_delims ":/?#[]@"
+#define pe_sub_delims "!$&'()*+,;="
+//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims;
+#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define pe_DIGIT "0123456789"
+#define pe_HPUT "-._~"
+//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
+
+unsigned char rfc3086_percent_encoding[256];
+
+#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
+#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
+
+char *uri_reserved={
+ pe_gen_delims
+ pe_sub_delims
+ pe_ALPHA
+ pe_DIGIT
+ pe_HPUT
+};
+
+int uriescapelength(char *in,int len) {
+ int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
+ int i;
+ for(i=0;i> 4 & 0x15)];
+ j++;
+ out[j]="0123456789ABCDEF"[(in[i] % 16)];
+ j++;
+ }
+ }
+}
+
+int uriunescape(char *in,char *out) {
+ char *o=out;
+ char *t;
+ char a,b;
+ char *s=in;
+ if(!strchr(s,'%')) memmove(out,in,strlen(in));
+ while((t=strchr(s,'%'))) {
+ if(t-s) {//if there are actually bytes to copy.
+ memmove(o,s,t-s);
+ o+=(t-s);
+ s+=(t-s);
+ }
+ if(isxdigit(t[1]) && isxdigit(t[2])) {
+ s+=3;//skip the %XX
+ a=toupper(t[1]);
+ b=toupper(t[2]);
+ *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10);
+ o++;
+ } else {
+ s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
+ *o='%';
+ o++;
+ }
+ }
+ //copy the last part.
+ memmove(o,s,strlen(s));
+ o[strlen(s)]=0;
+ return o+strlen(s)-out;
+}
+
+struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half.
+ union {
+ char *A[8];
+ struct {
+ union { char *s;char *scheme; };
+ union { char *u;char *username; };
+ union { char *k;char *password; };
+ union { char *d;char *domain; };
+ union { char *P;char *port; };
+ union { char *p;char *path; };
+ union { char *q;char *query_string; };
+ union { char *f;char *fragment_id; };
+ };
+ };
+};
+
+//returns 0 on success, returns a byte with bits set for non-matching pieces.
+unsigned int uricmp(struct uri *a,struct uri *b) {
+ int i;
+ int ret=0;
+ for(i=0;i<8;i++) {
+ if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's
+ if(!a->A[i] && b->A[i]) ret |=(1<<(i+16));
+ //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty.
+ if(a->A[i] && b->A[i]) {
+ if(strcmp(a->A[i],b->A[i])) {
+ ret|=(1<fragment_id=strchr(line,'#'))) {
+ *u->fragment_id=0;
+ u->fragment_id++;
+ }
+ if((u->query_string=strchr(line,'?'))) {
+ *u->query_string=0;
+ u->query_string++;
+ }
+ //now we have scheme, user, pass, domain, port, and path. maybe.
+ //what character can we split on now? : is a terrible choice.
+ // how about /? first / is either a separator between scheme
+ //could find the first non-scheme character.
+ //so we might have... scheme://user:pass@host:port/path
+ //or... user:pass@host:port/path ?
+ //we need to do this based on /s
+ // we're either going to find the scheme and authority separator
+ // or we're going to find the start of a path.
+ //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath
+ //or... should we do paths without
+ //scheme must start with a-z
+/* if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+ u->path=line;
+ return;//we're done. nothing else to do.
+ }
+ if(*line == '.') { //we have a relative path like: ./derp or ../merp
+ u->path=line;
+ return;//we're done here. nothing else to do.
+ }*/
+ //let's see if this starts with a scheme
+ if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) {
+ for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) {
+ if((*u->scheme >= 'a' && *u->scheme <= 'z') ||
+ (*u->scheme >= 'A' && *u->scheme <= 'Z') ||
+ (*u->scheme >= '0' && *u->scheme <= '9') ||
+ *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') {
+ //this is still a scheme.
+ } else {
+ break;
+ }
+ }
+ if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme.
+ line=strchr(line,':');
+ *line=0;
+ line++;
+ for(t=u->scheme;*t;t++) {
+ if(*t >= 'A' && *t <= 'Z') *t+=' ';
+ }
+ }
+ }
+
+ //copy-pasted from above the scheme strip attempt.
+ if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+ u->path=line;
+ return 1;//we're done. nothing else to do.
+ }
+ if(*line == '.') { //we have a relative path like: ./derp or ../merp
+ u->path=line;
+ return 1;//we're done here. nothing else to do.
+ }
+
+ if(*line == '/' && line[1] == '/') {//we have an authority section.
+ //let's left-shift this shit over until the third /
+ for(t=line+1;*(t+1) && *(t+1) != '/';t++) {
+ *t=*(t+1);
+ }
+ *t=0;
+ u->path=t+1;//if there was a /, path points at it and the stuff after.
+ //if there wasn't a /, it points at a null byte. so "empty"
+ u->username=line+1;
+ } else {
+ //we have all we need.
+ return 1;
+ }
+
+ if(u->username) {//this contains all of the authority.
+ if((u->domain=strchr(u->username,'@'))) {//we have user@host at least.
+ *u->domain=0;
+ u->domain++;
+ } else {//this isn't really a username. it is the domain.
+ u->domain=u->username;
+ u->username=0;
+ }
+ }
+ //if we still have u->username we try to split to user and password
+ if(u->username) {
+ if((u->password=strchr(u->username,':'))) {
+ *u->password=0;
+ u->password++;
+ }
+ }
+ if(u->domain) {
+ if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host
+ *u->port=0;
+ u->port++;
+ if(*u->port == ':') {
+ *u->port=0;
+ u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path
+ }
+ } else { //we're safe to split port off at :
+ if((u->port=strchr(u->domain,':'))) {
+ *u->port=0;
+ u->port++;
+ } //there isn't a port. leave it unset.
+ }
+ }
+ //I dunno.
+ return 1;
+}
+
+#endif
diff --git a/uricmp.c b/uricmp.c
new file mode 100644
index 0000000000000000000000000000000000000000..9af0fedae25e5f6db74d8852d948769b0a08bf25
--- /dev/null
+++ b/uricmp.c
@@ -0,0 +1,20 @@
+#include "uri.h"
+#include
+
+int main(int argc,char *argv[]) {
+ int i;
+ int ret;
+ struct uri *a=malloc(sizeof(struct uri));
+ struct uri *b=malloc(sizeof(struct uri));
+ if(argc < 3) {
+ fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr
+ return 1;
+ }
+ urifromline(a,argv[1]);
+ urifromline(b,argv[2]);
+ for(i=0;i<8;i++) {
+ printf("%s ? %s\n",a->A[i],b->A[i]);
+ }
+ printf("%08x\n",ret=uricmp(a,b));
+ return ret > 0 ? 2 : 0;
+}
diff --git a/cuturl.c b/uricut.c
similarity index 65%
rename from cuturl.c
rename to uricut.c
--- a/cuturl.c +++ b/uricut.c @@ -1,3 +1,5 @@ +#include "uri.h" + #include#include #include @@ -6,10 +8,6 @@ #include #include -#include "url.h" - -#define MAGIC - /* schemes are case sensitive but cononicals are lower case. domain is case insensitive. return it lowercased? @@ -32,32 +30,30 @@ #define F_PATH 1<<5 #define F_QUERY_STRING 1<<6 #define F_FRAGMENT_ID 1<<7 -#define F_WHOLE_URL 1<<8 +#define F_WHOLE_URI 1<<8 -char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0}; +char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0}; char *short_opts[]={"s","u","k","d","P","p","q","f","U"}; int main(int argc,char *argv[]) { - char *url; - char *name[2]; + char *uri; char *line=0; - short args[256];//this needs to be a short to make room for the F_WHOLE_URL + short args[256];//this needs to be a short to make room for the F_WHOLE_URI int i,j,c=0; int size=1024; - int status; char fixme=0; char using_stdin=1; char malloced=0; - struct url u; + struct uri u; if(argc > 1) { if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) { - printf("usage: echo urls | cuturl [options]\n"); - printf("usage: cuturl [options] url [options] [url]\n\n"); + printf("usage: echo uris | uricut [options]\n"); + printf("usage: uricut [options] uri [options] [uri]\n\n"); printf("options: \n"); for(i=0;long_opts[i];i++) { printf(" -%s|--%s\n",short_opts[i],long_opts[i]); } - printf("To set default values use environment variables like: CUTURL_[OPTION]\n"); + printf("To set default values use environment variables like: CUTURI_[OPTION]\n"); return 2; } } @@ -120,51 +116,12 @@ int main(int argc,char *argv[]) { for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++); line[i]=0; - url=strdup(line); - urlfromline(&u,line); + uri=strdup(line); + urifromline(&u,line); // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n"); //let's set them to what'll get printed now... -#ifdef MAGIC - magic_and_defaults(&u); -/* - u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT")); - u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT")); - u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT")); - u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT")); - serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 - if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); - u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT"))); - u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT")); - u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT")); - u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT")); -*/ -#endif - - if((name[0]=getenv("CUTURL__"))) { - setenv("CUTURL__SCHEME",u.scheme,1); - setenv("CUTURL__USERNAME",u.username,1); - setenv("CUTURL__PASSWORD",u.password,1); - setenv("CUTURL__DOMAIN",u.domain,1); - setenv("CUTURL__PORT",u.port,1); - setenv("CUTURL__PATH",u.path,1); - setenv("CUTURL__QUERY_STRING",u.query_string,1); - setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1); - name[1]=0; - switch(fork()) { - case 0: - execv(name[0],name); - perror("execv"); - return errno; - case -1: - perror("fork"); - return errno; - default: - break; - } - wait(&status); - } else { if(c) { for(i=0;i --- a/urlescape.c +++ b/uriescape.c @@ -1,13 +1,13 @@ +#include "uri.h" #include-#include "url.h" int main(int argc,char *argv[]) { int len; char *out; if(argc < 2) return 1; - len=urlescapelength(argv[1],strlen(argv[1])); + len=uriescapelength(argv[1],strlen(argv[1])); out=malloc(len+1); - urlescape(argv[1],out,len); + uriescape(argv[1],out,len); out[len]=0; printf("%s\n",out); return 0; diff --git a/matchurl.c b/urimatch.c similarity index 92% rename from matchurl.c rename to urimatch.c --- a/matchurl.c +++ b/urimatch.c @@ -1,7 +1,7 @@ +#include "uri.h" #include#include #include -#include "url.h" #define LINE_LENGTH 1024 @@ -19,16 +19,16 @@ int match(char negate,char *part,char *arg) { int main(int argc,char *argv[]) { int i; int ret=1; - struct url u; + struct uri u; char negate=0; char *line=malloc(LINE_LENGTH); char copy[LINE_LENGTH]; if(argc < 2) { - printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n"); + printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n"); printf("scheme://username:password@domain:port/path?query_string#fragment_id\n"); printf("s://u:k@d:P/p?q#f\n"); printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n"); - printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n"); + printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n"); printf("the 'n' flag can be put before any of the other flags to check for a missing.\n"); return 1; } @@ -37,7 +37,7 @@ int main(int argc,char *argv[]) { if(strchr(line,'\n')) *strchr(line,'\n')=0; strcpy(copy,line); memset(&u,0,sizeof(u)); - urlfromline(&u,line); + urifromline(&u,line); //use the character in argv[1] to match stdin against argv[2]. if match print whole line. for(i=1;i = strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) + if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) { printf("%s\n",copy); ret=0; + } break; default: printf("unknown url part letter! '%c'\n",argv[i][0]); diff --git a/printfurl b/uriprintf similarity index 80% rename from printfurl rename to uriprintf --- a/printfurl +++ b/uriprintf @@ -2,4 +2,4 @@ cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')" count="$(echo $cutargs | tr '-' '\n' | grep -c .)" printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')" -cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" +uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" diff --git a/uristart b/uristart new file mode 100755 index 0000000000000000000000000000000000000000..78454b78a2e9aaca4ceb6b37dfa0b5e7e780de42 --- /dev/null +++ b/uristart @@ -0,0 +1,4 @@ +#!/bin/sh +scheme="$(printf "%s\n" "$1" | uricut -s)" +line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')" +eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")" diff --git a/urlunescape.c b/uriunescape.c similarity index 78% rename from urlunescape.c rename to uriunescape.c--- a/urlunescape.c +++ b/uriunescape.c @@ -1,10 +1,10 @@ +#include "uri.h" #include-#include "url.h" int main(int argc,char *argv[]) { int len; for(argv++,argc--;argc;argc--,argv++) { - len=urlunescape(*argv,*argv); + len=uriunescape(*argv,*argv); write(1,*argv,len); if(argc-1) write(1," ",1); } diff --git a/url.h b/url.h deleted file mode 100644 index 6304a48ebc18b0a3eddd9ba9b96bc9d361e2187a..0000000000000000000000000000000000000000 --- a/url.h +++ /dev/null @@ -1,261 +0,0 @@ -#ifndef URL_H -#define URL_H - -#include -#include -#include -#include - -//reserved = gen-delims / sub-delims -#define pe_gen_delims ":/?#[]@" -#define pe_sub_delims "!$&'()*+,;=" -//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; -#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -#define pe_DIGIT "0123456789" -#define pe_HPUT "-._~" -//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT; - -unsigned char rfc3086_percent_encoding[256]; - -#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F')) -#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a) - -char *reserved={ - pe_gen_delims - pe_sub_delims - pe_ALPHA - pe_DIGIT - pe_HPUT -}; - -int urlescapelength(char *in,int len) { - int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end. - int i; - for(i=0;i > 4 & 0x15)]; - j++; - out[j]="0123456789ABCDEF"[(in[i] % 16)]; - j++; - } - } -} - -int urlunescape(char *in,char *out) { - char *o=out; - char *t; - char a,b; - char *s=in; - if(!strchr(s,'%')) memmove(out,in,strlen(in)); - while((t=strchr(s,'%'))) { - if(t-s) {//if there are actually bytes to copy. - memmove(o,s,t-s); - o+=(t-s); - s+=(t-s); - } - if(isxdigit(t[1]) && isxdigit(t[2])) { - s+=3;//skip the %XX - a=toupper(t[1]); - b=toupper(t[2]); - *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); - o++; - } else { - s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is. - *o='%'; - o++; - } - } - //copy the last part. - memmove(o,s,strlen(s)); - o[strlen(s)]=0; - return o+strlen(s)-out; -} - -struct url { - char *scheme; - char *username; - char *password; - char *domain; - char *port; - char *path; - char *query_string; - char *fragment_id; -}; - - -/* - schemes are case sensitive but cononicals are lower case. - domain is case insensitive. return it lowercased? - port is optional and in decimal - path - scheme://username:password@domain:port/path?query_string#fragment_id - mailto:username@domain - - optional stuff: - scheme, username, password, port, path, query_string, fragment_id -*/ - -void urlfromline(struct url *u,char *line) { - int i; - char hack=0;//we need to allow for // as host//path separator - //split at first single / into line and path - //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/ - for(i=0;line[i];i++) { - if(line[i] == '/' && line[i+1] == '/') { - if(!hack) {//only skip out on the first // because it is probably used in the scheme. - hack=1; - i++; - continue; - } - } - if(line[i] == '/') { - line[i]=0; - u->path=line+i+1; - break; - } - } - if(u->path) { - if(strchr(u->path,'?')) { - u->query_string=strchr(u->path,'?'); - *u->query_string=0; - u->query_string++; - } - } - - if(u->query_string) { - if(strchr(u->query_string,'#')) { - u->fragment_id=strchr(u->query_string,'#'); - *u->fragment_id=0; - u->fragment_id++; - } - } - - if(strstr(line,"://")) { - u->scheme=line; - u->domain=strstr(line,"://"); - *u->domain=0; - u->domain+=3; - } else { - u->domain=line; - } - - if(u->domain) { - if(strchr(u->domain,'@')) { - u->username=u->domain; - u->domain=strchr(u->domain,'@'); - *u->domain=0; - u->domain++; - } - } - - if(u->username) { - if(strchr(u->username,':')) { - u->password=strchr(u->username,':'); - *u->password=0; - u->password++; - } - } - - if(u->domain) { - if(strchr(u->domain,']')) {//the end of an IPv6 address - if(strchr(strchr(u->domain,']'),':')) { - u->port=strchr(strchr(u->domain,']'),':'); - if(u->port[1] == '?') {//for magnet links - u->port=0; - } else { - *u->port=0; - u->port++; - } - } - } else { - if(strchr(u->domain,':')) { - u->port=strchr(u->domain,':'); - if(u->port[1] == '?') {//for magnet links - u->port=0; - } else { - *u->port=0; - u->port++; - } - } - } - } - if(u->port) { - for(i=0;u->port[i];i++) { - if(u->port[i] < '0' || u->port[i] > '9') { - //this port number isn't a number! - //it is probably a different portion of the url then... and the domain is probably the scheme. - if(u->domain && !u->scheme) { - u->scheme=u->domain; - u->domain=0; - } - if(!u->path) { - u->path=u->port; - u->port=0; - } - break; - } - } - } - - if(u->domain) {//for magnet links. - if(strchr(u->domain,'?')) { - u->query_string=strchr(u->domain,'?'); - *u->query_string=0; - u->query_string++; - } - } - - if(u->domain) { - if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string - u->scheme=u->domain; - *strchr(u->scheme,':')=0; - u->domain=0; - } - } - - if(!u->scheme && u->username) {//for mailto: - u->scheme=u->username; - u->username=u->password; - u->password=0; - } -} - -#define AorB(a,b) ((a)?(a):(b)) - -void magic_and_defaults(struct url *u) { - struct servent *serv; - char sport[10]; - u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT")); - u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT")); - u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT")); - u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT")); - serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 - if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); -// else snprintf(sport,sizeof(sport)-1,"%d",serv); - u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT"))); - -// if(!strcmp(u->port,"DEFAULT")) { - //this shouldn't happen most of the time. :/ -// printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme); -// } - - u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT")); - u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT")); - u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT")); -} -#endif -----END OF PAGE-----