From c15471e19669af289dc064eac74e7116a22ad023 Mon Sep 17 00:00:00 2001
From: Cornel Rautenbach <corneliusr>
Date: Wed, 11 May 2022 11:22:30 +0200
Subject: [PATCH] Clean zone based on ISO 3166-2:ZA -
 https://gitlab.com/ship-logic/backends/backend/-/issues/1869

---
 address_utils/address_utils.go      | 216 +++++++++++++++++++---------
 address_utils/address_utils_test.go |   9 ++
 2 files changed, 154 insertions(+), 71 deletions(-)

diff --git a/address_utils/address_utils.go b/address_utils/address_utils.go
index eddd3f8..2bcfa81 100644
--- a/address_utils/address_utils.go
+++ b/address_utils/address_utils.go
@@ -3,33 +3,140 @@ package address_utils
 import (
 	"crypto/md5"
 	"fmt"
-	"gitlab.com/uafrica/go-utils/string_utils"
 	"regexp"
 	"strings"
-)
 
-const (
-	ProvinceKwaZuluNatal string = "KwaZulu-Natal"
-	ProvinceGauteng      string = "Gauteng"
-	ProvinceFreeState    string = "Free State"
-	ProvinceLimpopo      string = "Limpopo"
-	ProvinceMpumalanga   string = "Mpumalanga"
-	ProvinceNorthWest    string = "North West"
-	ProvinceEasternCape  string = "Eastern Cape"
-	ProvinceWesternCape  string = "Western Cape"
-	ProvinceNorthernCape string = "Northern Cape"
+	"gitlab.com/uafrica/go-utils/string_utils"
 )
 
-var Provinces = []string{
-	ProvinceKwaZuluNatal,
-	ProvinceGauteng,
-	ProvinceFreeState,
-	ProvinceLimpopo,
-	ProvinceMpumalanga,
-	ProvinceNorthWest,
-	ProvinceEasternCape,
-	ProvinceWesternCape,
-	ProvinceNorthernCape,
+type Province struct {
+	Code  string
+	Names []string
+}
+
+// Provinces largely follows the ISO standard: https://en.wikipedia.org/wiki/ISO_3166-2:ZA
+var Provinces = []Province{
+	{
+		Code: "EC",
+		Names: []string{
+			"Eastern Cape",
+			"Eastern-Cape",
+			"Oos-Kaap",
+			"iPumalanga-Kapa",
+			"Kapa Bohlabela",
+			"Kapa Botjhabela",
+			"Kapa-Vuxa",
+			"Kapa Botlhaba",
+			"Kapa Vhubvaḓuvha",
+			"Mpuma-Koloni",
+			"Mpumalanga-Kapa",
+			"Mpumalanga-Koloni",
+		},
+	},
+	{
+		Code: "FS",
+		Names: []string{
+			"Free State",
+			"Freestate",
+			"Vrystaat",
+			"iFreyistata",
+			"Freistata",
+			"Foreisetata",
+			"Fureisitata",
+			"Freyistata",
+			"Fuleyisitata",
+			"Freyisitata",
+		},
+	},
+	{
+		Code: "GP",
+		Names: []string{
+			"Gauteng",
+			"iGauteng",
+			"Kgauteng",
+			"Rhawuti",
+		},
+	},
+	{
+		Code: "KZN",
+		Names: []string{
+			"KwaZulu-Natal",
+			"KwaZulu Natal",
+			"iKwaZulu-Natal",
+			"GaZulu-Natala",
+			"Hazolo-Natala",
+			"KwaZulu-Natali",
+			"HaZulu-Natal",
+			"KwaZulu-Natala",
+		},
+	},
+	{
+		Code: "LP",
+		Names: []string{
+			"Limpopo",
+			"Vhembe",
+		},
+	},
+	{
+		Code: "MP",
+		Names: []string{
+			"Mpumalanga",
+			"iMpumalanga",
+		},
+	},
+	{
+		Code: "NC",
+		Names: []string{
+			"Northern Cape",
+			"Northern-Cape",
+			"Noord-Kaap",
+			"Noord Kaap",
+			"iTlhagwini-Kapa",
+			"Kapa Leboya",
+			"Kapa-N'walungu",
+			"Kapa Bokone",
+			"Kapa Leboa",
+			"Kapa Devhula",
+			"Mntla-Koloni",
+			"Nyakatho-Kapa",
+			"Nyakatho-Koloni",
+		},
+	},
+	{
+		Code: "NW",
+		Names: []string{
+			"North West",
+			"North-West",
+			"Noordwes",
+			"Noord-wes",
+			"iTlhagwini-Tjhingalanga",
+			"Lebowa Bodikela",
+			"Leboya Bophirima",
+			"Leboya le Bophirima",
+			"N'walungu-Vupeladyambu",
+			"Bokone Bophirima",
+			"Mntla-Ntshona",
+			"Nyakatho-Ntshonalanga",
+		},
+	},
+	{
+		Code: "WC",
+		Names: []string{
+			"Western Cape",
+			"Western-Cape",
+			"Wes-Kaap",
+			"Wes Kaap",
+			"iTjhingalanga-Kapa",
+			"Kapa Bodikela",
+			"Kapa Bophirimela",
+			"Kapa-Vupeladyambu",
+			"Kapa Bophirima",
+			"Kapa Vhukovhela",
+			"Ntshona-Koloni",
+			"Ntshonalanga-Kapa",
+			"Ntshonalanga-Koloni",
+		},
+	},
 }
 
 // MD5HashOfAddress m(E,L,L) - calculates and returns the MD5 hash of the entered address, lat and lng concatenated together. If lat and lng is blank, it is only the hash of the entered address
@@ -102,45 +209,21 @@ func stripUnwantedCharacters(s string) string {
 	return s
 }
 
-func CleanZone(oldCountry, oldZone *string) (newCountry, newZone *string) {
-	// Google zones
-	/*
-		"long_name": "KwaZulu-Natal",
-		"short_name": "KZN",
-
-		"long_name": "Gauteng",
-		"short_name": "GP",
-
-		"long_name": "Free State",
-		"short_name": "FS",
+func CleanZone(countryToClean, zoneToClean *string) (newCountry, newZone *string) {
+	newCountry = countryToClean
 
-		"long_name": "Limpopo",
-		"short_name": "LP",
+	southAfricaVariations := []string{"South Africa", "Suid-Afrika", "Suid Afrika", "Iningizimu Afrika", "Mzantsi Afrika", "Afrika Boroa", "Africa Kusini"}
 
-		"long_name": "Mpumalanga",
-		"short_name": "MP",
-
-		"long_name": "North West",
-		"short_name": "NW",
-
-		"long_name": "Western Cape",
-		"short_name": "WC",
-
-		"long_name": "Eastern Cape",
-		"short_name": "EC",
-
-		"long_name": "Northern Cape",
-		"short_name": "NC",
-	*/
-
-	newCountry = oldCountry
-	if oldCountry == nil || *oldCountry == "South Africa" || len(*oldCountry) == 0 {
-		defaultCountry := "ZA"
-		newCountry = &defaultCountry
+	for _, southAfricaVariation := range southAfricaVariations {
+		if countryToClean == nil || len(*countryToClean) == 0 || strings.ToLower(*countryToClean) == strings.ToLower(southAfricaVariation) {
+			defaultCountry := "ZA"
+			newCountry = &defaultCountry
+			break
+		}
 	}
 
-	if *newCountry == "ZA" && oldZone != nil {
-		zone := *oldZone
+	if *newCountry == "ZA" && zoneToClean != nil {
+		zone := *zoneToClean
 		// Gauteng - GT from uAfrica should be GP for Google
 		if zone == "GT" {
 			zone = "GP"
@@ -148,20 +231,11 @@ func CleanZone(oldCountry, oldZone *string) (newCountry, newZone *string) {
 			zone = "KZN"
 		}
 
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceKwaZuluNatal, "KZN")
-		zone = string_utils.ReplaceCaseInsensitive(zone, "KwaZulu Natal", "KZN")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceGauteng, "GP")
-		zone = string_utils.ReplaceCaseInsensitive(zone, "Freestate", "FS")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceFreeState, "FS")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceLimpopo, "LP")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceMpumalanga, "MP")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceNorthWest, "NW")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceEasternCape, "EC")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceWesternCape, "WC")
-		zone = string_utils.ReplaceCaseInsensitive(zone, ProvinceNorthernCape, "NC")
-		zone = string_utils.ReplaceCaseInsensitive(zone, "Eastern-Cape", "EC")
-		zone = string_utils.ReplaceCaseInsensitive(zone, "Western-Cape", "WC")
-		zone = string_utils.ReplaceCaseInsensitive(zone, "Northern-Cape", "NC")
+		for _, province := range Provinces {
+			for _, name := range province.Names {
+				zone = string_utils.ReplaceCaseInsensitive(zone, name, province.Code)
+			}
+		}
 
 		newZone = &zone
 	}
diff --git a/address_utils/address_utils_test.go b/address_utils/address_utils_test.go
index 03f79db..3305130 100644
--- a/address_utils/address_utils_test.go
+++ b/address_utils/address_utils_test.go
@@ -1,9 +1,18 @@
 package address_utils
 
 import (
+	"fmt"
 	"testing"
 )
 
+func TestCleanProvince(t *testing.T) {
+	zone := "Wes KaaP"
+	country := "South Africa"
+	cleanCountry, cleanZone := CleanZone(&country, &zone)
+
+	fmt.Printf("%s, %s converted to %s, %s\n", zone, country, *cleanZone, *cleanCountry)
+}
+
 func TestIsProvince(t *testing.T) {
 	type args struct {
 		address string
-- 
GitLab