From 7f8f6f3305161706d44884cccd4961ba4f0490bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaakko=20Ker=C3=A4nen?= Date: Fri, 11 Dec 2020 20:40:32 +0200 Subject: [PATCH 1/1] GmRequest: Punycode for domain names To support Internationalized Domain Names, we need to encode domain names using Punycode. IssueID #73 --- src/gmrequest.c | 5 ++++- src/gmutil.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/gmutil.h | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/gmrequest.c b/src/gmrequest.c index 884486b3..41b97620 100644 --- a/src/gmrequest.c +++ b/src/gmrequest.c @@ -491,6 +491,9 @@ void deinit_GmRequest(iGmRequest *d) { void setUrl_GmRequest(iGmRequest *d, const iString *url) { set_String(&d->url, url); + /* Encode hostname to Punycode here because we want to submit the Punycode domain name + in the request. (TODO: Pending possible Gemini spec change.) */ + punyEncodeUrlHost_String(&d->url); urlEncodeSpaces_String(&d->url); } @@ -646,7 +649,7 @@ void submit_GmRequest(iGmRequest *d) { if (port == 0) { port = 1965; /* default Gemini port */ } - setUrl_TlsRequest(d->req, host, port); + setHost_TlsRequest(d->req, host, port); setContent_TlsRequest(d->req, utf8_String(collectNewFormat_String("%s\r\n", cstr_String(&d->url)))); submit_TlsRequest(d->req); diff --git a/src/gmutil.c b/src/gmutil.c index 477d0f17..67b0d939 100644 --- a/src/gmutil.c +++ b/src/gmutil.c @@ -185,10 +185,51 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat appendRange_String(absolute, orig.path); } appendRange_String(absolute, rel.query); + normalize_String(absolute); cleanUrlPath_String(absolute); return absolute; } +static iBool equalPuny_(const iString *d, iRangecc orig) { + if (!endsWith_String(d, "-")) { + return iFalse; /* This is a sufficient condition? */ + } + if (size_String(d) != size_Range(&orig) + 1) { + return iFalse; + } + return iCmpStrN(cstr_String(d), orig.start, size_Range(&orig)) == 0; +} + +void punyEncodeUrlHost_String(iString *d) { + /* `d` should be an absolute URL. */ + iUrl url; + init_Url(&url, d); + iString *encoded = new_String(); + setRange_String(encoded, (iRangecc){ url.scheme.start, url.host.start }); + /* The domain name needs to be split into segments. */ { + iRangecc seg = iNullRange; + iBool isFirst = iTrue; + while (nextSplit_Rangecc(url.host, ".", &seg)) { + if (!isFirst) { + appendChar_String(encoded, '.'); + } + isFirst = iFalse; + iString *puny = punyEncode_Rangecc(seg); + if (!isEmpty_String(puny) && !equalPuny_(puny, seg)) { + appendCStr_String(encoded, "xn--"); + append_String(encoded, puny); + } + else { + appendRange_String(encoded, seg); + } + delete_String(puny); + } + } + appendRange_String(encoded, (iRangecc){ url.host.end, constEnd_String(d) }); + set_String(d, encoded); + delete_String(encoded); +} + iString *makeFileUrl_String(const iString *localFilePath) { iString *url = cleaned_Path(localFilePath); replace_Block(&url->chars, '\\', '/'); /* in case it's a Windows path */ diff --git a/src/gmutil.h b/src/gmutil.h index 926f5a10..bbadbafd 100644 --- a/src/gmutil.h +++ b/src/gmutil.h @@ -103,6 +103,7 @@ void init_Url (iUrl *, const iString *text); iRangecc urlScheme_String (const iString *); iRangecc urlHost_String (const iString *); const iString * absoluteUrl_String (const iString *, const iString *urlMaybeRelative); +void punyEncodeUrlHost_String(iString *); iString * makeFileUrl_String (const iString *localFilePath); const char * makeFileUrl_CStr (const char *localFilePath); void urlEncodeSpaces_String (iString *); -- 2.34.1