UNLOCODE-lookup/get-city-code.py

93 lines
3.0 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import csv
import os
import re
import sys
# CSV structure:
# 0 empty
# 1 iso country code
# 2 city code
# 3 full name
# the rest is unused
parser = argparse.ArgumentParser(description='''Find cities code from UNLOCODE.
If the exact name is found when a country code is given, only this match will
be shown, all the MATCHES are displayed otherwise. The font case isnt taken in
account but the accents are.''')
parser.add_argument('--city', dest='city', type=str, required=True,
help='City name, will be looked for inside the CSV files')
parser.add_argument('--country', dest='country', type=str, required=False,
default='', help='''Country name, in ISO-3166 format, to narrow queries if
needed''')
args = parser.parse_args()
files = [file for file in os.listdir('.') if os.path.isfile(file)]
if len(files) == 0:
print('ERROR: No CSV file has been found.')
sys.exit(1)
def csvprocessing(city):
"""Tries to find the wanted city code into the list `city`."""
if (args.city.lower() == city[3].lower() and
args.country.lower() == city[1].lower()):
EXACT_MATCH.append(city)
if args.city.lower() == city[3].lower() and len(args.country) == 0:
MULTIPLE_MATCHES.append(city)
elif args.city.lower() in city[3].lower():
MATCHES.append(city)
def csvloop(file):
"""Loop over the lines of the CSV file. We have one list per line."""
entries = csv.reader(file)
for entry in entries:
csvprocessing(entry)
def print_results(exactitude, result_list):
"""Stupid function to print results based on very basic formating
condition."""
for match in result_list:
if re.match(r"^([A-Z]+)$", match[1]) and len(match[2]) > 0:
print('{} {} found for {} from country {}'.format(
exactitude, match[2], match[3], match[1]
))
# Global variables needed on the used functions
global MATCHES
global EXACT_MATCH
global MULTIPLE_MATCHES
MATCHES = []
EXACT_MATCH = []
MULTIPLE_MATCHES = []
# We look for all the CSV files in the current dir
for dir_file in files:
if re.match(r".*\.csv", dir_file, re.I):
# We cant assume that CSV file is utf-8 encoded
try:
with open(dir_file) as csvfile:
csvloop(csvfile)
except UnicodeDecodeError:
with open(dir_file, encoding='latin-1') as csvfile:
csvloop(csvfile)
# If the country has been provided
if len(EXACT_MATCH) > 0:
print_results('Exact match', EXACT_MATCH)
# If the country hasnt been provided but the cit(y|es) exist(s)
elif len(MULTIPLE_MATCHES) > 0:
print_results('Multiple matches', MULTIPLE_MATCHES)
# Otherwise, throw some garbage to the user
else:
if len(args.country) > 0:
print('Country code has been provided, but no match has been found.',
file=sys.stderr)
print('Falling back to worldwide lookup.', file=sys.stderr)
print_results('Non-exact matches', MATCHES)