BUrl: IDNA ToUnicode and ToASCII conversions.

* Since DNS are normally restricted to ASCII, the use of UTF-8 in domain
names is implemented using a "punycode" encoding.
* The request to the DNS server must be sent with the ASCII
representation of the domain name, however the Unicode one should be
used for user-visible parts.
* ICU provides an implementation of the conversion, which we use here.
* Conversion is currently done in-place and modifies the BUrl object
(this is similar to UrlEncode/UrlDecode).
* Adjust existing IDN test to make use of these methods. It's passing
now.
This commit is contained in:
Adrien Destugues 2014-10-21 14:39:43 +02:00
parent d341a585ae
commit 9bf4e99477
4 changed files with 65 additions and 2 deletions

View File

@ -64,6 +64,9 @@ public:
void UrlEncode(bool strict = false);
void UrlDecode(bool strict = false);
status_t IDNAToAscii();
status_t IDNAToUnicode();
// Url encoding/decoding of strings
static BString UrlEncode(const BString& url,
bool strict = false,

View File

@ -1,6 +1,7 @@
SubDir HAIKU_TOP src kits network libnetapi ;
UsePrivateHeaders app net shared support ;
UsePrivateHeaders locale shared ;
UseHeaders [ FDirName $(HAIKU_TOP) src libs compat freebsd_network compat ]
: true ;
@ -31,6 +32,11 @@ for architectureObject in [ MultiArchSubDirSetup ] {
SetupFeatureObjectsDir no-ssl ;
}
# BUrl uses ICU to perform IDNA conversions (unicode domain names)
UseBuildFeatureHeaders icu ;
Includes [ FGristFiles Url.cpp ]
: [ BuildFeatureAttribute icu : headers ] ;
SharedLibrary [ MultiArchDefaultGristFiles libbnetapi.so ] :
init.cpp
DynamicBuffer.cpp
@ -91,6 +97,7 @@ for architectureObject in [ MultiArchSubDirSetup ] {
:
be $(TARGET_NETWORK_LIBS) [ TargetLibstdc++ ] [ TargetLibsupc++ ]
[ BuildFeatureAttribute openssl : libraries ]
[ BuildFeatureAttribute icu : libraries ]
[ MultiArchDefaultGristFiles libshared.a ]
;
}

View File

@ -17,8 +17,12 @@
#include <MimeType.h>
#include <Roster.h>
#include <ICUWrapper.h>
#include <RegExp.h>
#include <unicode/idna.h>
#include <unicode/stringpiece.h>
static const char* kArchivedUrl = "be:url string";
@ -594,6 +598,50 @@ BUrl::UrlDecode(bool strict)
}
status_t
BUrl::IDNAToAscii()
{
UErrorCode err = U_ZERO_ERROR;
icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
icu::IDNAInfo info;
BString result;
BStringByteSink sink(&result);
converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
err);
delete converter;
if (U_FAILURE(err))
return B_ERROR;
fHost = result;
return B_OK;
}
status_t
BUrl::IDNAToUnicode()
{
UErrorCode err = U_ZERO_ERROR;
icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
icu::IDNAInfo info;
BString result;
BStringByteSink sink(&result);
converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
err);
delete converter;
if (U_FAILURE(err))
return B_ERROR;
fHost = result;
return B_OK;
}
// #pragma mark - utility functionality

View File

@ -587,10 +587,15 @@ UrlTest::IDNTest()
for (int i = 0; tests[i].escaped != NULL; i++)
{
NextSubTest();
BUrl url(tests[i].escaped);
url.UrlDecode();
CPPUNIT_ASSERT_EQUAL(BUrl(tests[i].decoded).UrlString(),
url.UrlString());
BUrl idn(tests[i].decoded);
status_t success = idn.IDNAToUnicode();
CPPUNIT_ASSERT_EQUAL(B_OK, success);
CPPUNIT_ASSERT_EQUAL(url.UrlString(), idn.UrlString());
}
}