UNLOCODE-lookup/get-city-code.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import csv
import os
import re
import sys

# CSV structure:
# 0 empty
# 1 iso country code
# 2 city code
# 3 full name
# the rest is unused

parser = argparse.ArgumentParser(description='''Find cities code from UNLOCODE.
If the exact name is found when a country code is given, only this match will
be shown, all the MATCHES are displayed otherwise. The font case isn’t taken in
account but the accents are.''')
parser.add_argument('--city', dest='city', type=str, required=True,
    help='City name, will be looked for inside the CSV files')
parser.add_argument('--country', dest='country', type=str, required=False,
    default='', help='''Country name, in ISO-3166 format, to narrow queries if
    needed''')
args = parser.parse_args()

files = [file for file in os.listdir('.') if os.path.isfile(file)]
if len(files) == 0:
    print('ERROR: No CSV file has been found.')
    sys.exit(1)

def csvprocessing(city):
    """Tries to find the wanted city code into the list `city`."""

    if (args.city.lower() == city[3].lower() and
        args.country.lower() ==  city[1].lower()):
        EXACT_MATCH.append(city)
    if args.city.lower() == city[3].lower() and len(args.country) == 0:
        MULTIPLE_MATCHES.append(city)
    elif args.city.lower() in city[3].lower():
        MATCHES.append(city)

def csvloop(file):
    """Loop over the lines of the CSV file. We have one list per line."""

    entries = csv.reader(file)
    for entry in entries:
        csvprocessing(entry)

def print_results(exactitude, result_list):
    """Stupid function to print results based on very basic formating
    condition."""

    for match in result_list:
        if re.match(r"^([A-Z]+)$", match[1]) and len(match[2]) > 0:
            print('{} {} found for {} from country {}'.format(
                exactitude, match[2], match[3], match[1]
            ))

# Global variables needed on the used functions
global MATCHES
global EXACT_MATCH
global MULTIPLE_MATCHES
MATCHES = []
EXACT_MATCH = []
MULTIPLE_MATCHES = []

# We look for all the CSV files in the current dir
for dir_file in files:
    if re.match(r".*\.csv", dir_file, re.I):
        # We can’t assume that CSV file is utf-8 encoded
        try:
            with open(dir_file) as csvfile:
                csvloop(csvfile)
        except UnicodeDecodeError:
            with open(dir_file, encoding='latin-1') as csvfile:
                csvloop(csvfile)

# If the country has been provided
if len(EXACT_MATCH) > 0:
    print_results('Exact match', EXACT_MATCH)
# If the country hasn’t been provided but the cit(y|es) exist(s)
elif len(MULTIPLE_MATCHES) > 0:
    print_results('Multiple matches', MULTIPLE_MATCHES)
# Otherwise, throw some garbage to the user
else:
    if len(args.country) > 0:
        print('Country code has been provided, but no match has been found.',
            file=sys.stderr)
        print('Falling back to worldwide lookup.', file=sys.stderr)
    print_results('Non-exact matches', MATCHES)