blob: 999494f6c3f3c107ab9c551cd150909c61813b8a [file] [log] [blame] [edit]
// Copyright (C) 2014 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <libaddressinput/address_formatter.h>
#include <libaddressinput/address_data.h>
#include <libaddressinput/address_field.h>
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <functional>
#include <string>
#include <vector>
#include "format_element.h"
#include "language.h"
#include "region_data_constants.h"
#include "rule.h"
#include "util/cctype_tolower_equal.h"
#include "util/size.h"
namespace i18n {
namespace addressinput {
namespace {
const char kCommaSeparator[] = ", ";
const char kSpaceSeparator[] = " ";
const char kArabicCommaSeparator[] = "، ";
const char kLanguagesThatUseSpace[][3] = {
"th",
"ko",
};
const char kLanguagesThatHaveNoSeparator[][3] = {
"ja",
"zh", // All Chinese variants.
};
// This data is based on CLDR, cross-checked with data provided by Chrome
// linguists, for languages that are in official use in some country, where
// Arabic is the most likely script tag.
// TODO: Consider supporting variants such as tr-Arab by detecting the script
// code.
const char kLanguagesThatUseAnArabicComma[][3] = {
"ar",
"fa",
"ku",
"ps",
"ur",
};
std::string GetLineSeparatorForLanguage(const std::string& language_tag) {
Language address_language(language_tag);
// First deal with explicit script tags.
if (address_language.has_latin_script) {
return kCommaSeparator;
}
// Now guess something appropriate based on the base language.
const std::string& base_language = address_language.base;
using std::placeholders::_1;
if (std::find_if(kLanguagesThatUseSpace,
kLanguagesThatUseSpace + size(kLanguagesThatUseSpace),
std::bind(&EqualToTolowerString, _1, base_language)) !=
kLanguagesThatUseSpace + size(kLanguagesThatUseSpace)) {
return kSpaceSeparator;
} else if (std::find_if(
kLanguagesThatHaveNoSeparator,
kLanguagesThatHaveNoSeparator +
size(kLanguagesThatHaveNoSeparator),
std::bind(&EqualToTolowerString, _1, base_language)) !=
kLanguagesThatHaveNoSeparator +
size(kLanguagesThatHaveNoSeparator)) {
return "";
} else if (std::find_if(
kLanguagesThatUseAnArabicComma,
kLanguagesThatUseAnArabicComma +
size(kLanguagesThatUseAnArabicComma),
std::bind(&EqualToTolowerString, _1, base_language)) !=
kLanguagesThatUseAnArabicComma +
size(kLanguagesThatUseAnArabicComma)) {
return kArabicCommaSeparator;
}
// Either the language is a Latin-script language, or no language was
// specified. In the latter case we still return ", " as the most common
// separator in use. In countries that don't use this, e.g. Thailand,
// addresses are often written in Latin script where this would still be
// appropriate, so this is a reasonable default in the absence of information.
return kCommaSeparator;
}
void CombineLinesForLanguage(const std::vector<std::string>& lines,
const std::string& language_tag,
std::string* line) {
line->clear();
std::string separator = GetLineSeparatorForLanguage(language_tag);
for (auto it = lines.begin(); it != lines.end(); ++it) {
if (it != lines.begin()) {
line->append(separator);
}
line->append(*it);
}
}
} // namespace
void GetFormattedNationalAddress(
const AddressData& address_data, std::vector<std::string>* lines) {
assert(lines != nullptr);
lines->clear();
Rule rule;
rule.CopyFrom(Rule::GetDefault());
// TODO: Eventually, we should get the best rule for this country and
// language, rather than just for the country.
rule.ParseSerializedRule(
RegionDataConstants::GetRegionData(address_data.region_code));
Language language(address_data.language_code);
// If Latin-script rules are available and the |language_code| of this address
// is explicitly tagged as being Latin, then use the Latin-script formatting
// rules.
const std::vector<FormatElement>& format =
language.has_latin_script && !rule.GetLatinFormat().empty()
? rule.GetLatinFormat()
: rule.GetFormat();
// Address format without the unnecessary elements (based on which address
// fields are empty). We assume all literal strings that are not at the start
// or end of a line are separators, and therefore only relevant if the
// surrounding fields are filled in. This works with the data we have
// currently.
std::vector<FormatElement> pruned_format;
for (auto element_it = format.begin();
element_it != format.end();
++element_it) {
// Always keep the newlines.
if (element_it->IsNewline() ||
// Always keep the non-empty address fields.
(element_it->IsField() &&
!address_data.IsFieldEmpty(element_it->GetField())) ||
// Only keep literals that satisfy these 2 conditions:
(!element_it->IsField() &&
// (1) Not preceding an empty field.
(element_it + 1 == format.end() ||
!(element_it + 1)->IsField() ||
!address_data.IsFieldEmpty((element_it + 1)->GetField())) &&
// (2) Not following a removed field.
(element_it == format.begin() ||
!(element_it - 1)->IsField() ||
(!pruned_format.empty() && pruned_format.back().IsField())))) {
pruned_format.push_back(*element_it);
}
}
std::string line;
for (const auto& element : pruned_format) {
if (element.IsNewline()) {
if (!line.empty()) {
lines->push_back(line);
line.clear();
}
} else if (element.IsField()) {
AddressField field = element.GetField();
if (field == STREET_ADDRESS) {
// The field "street address" represents the street address lines of an
// address, so there can be multiple values.
if (!address_data.IsFieldEmpty(field)) {
line.append(address_data.address_line.front());
if (address_data.address_line.size() > 1U) {
lines->push_back(line);
line.clear();
const auto last_element_iterator =
address_data.address_line.begin() +
address_data.address_line.size() - 1;
lines->insert(lines->end(), address_data.address_line.begin() + 1,
last_element_iterator);
line.append(*last_element_iterator);
}
}
} else {
line.append(address_data.GetFieldValue(field));
}
} else {
line.append(element.GetLiteral());
}
}
if (!line.empty()) {
lines->push_back(line);
}
}
void GetFormattedNationalAddressLine(
const AddressData& address_data, std::string* line) {
std::vector<std::string> address_lines;
GetFormattedNationalAddress(address_data, &address_lines);
CombineLinesForLanguage(address_lines, address_data.language_code, line);
}
void GetStreetAddressLinesAsSingleLine(
const AddressData& address_data, std::string* line) {
CombineLinesForLanguage(
address_data.address_line, address_data.language_code, line);
}
} // namespace addressinput
} // namespace i18n