prevLatLong

# This was an attempt to compress the size of a database of zipcodes.
# The CSV format was zip,lat,long. The original file was 754 KB.
# There were very many duplicate latitude/longitude combinations that point to several zip codes.
# One way to reduce size would be to compress duplicate zips into one row with a single lat/long combo.
# Since we want to look up by zip code, this is not efficient.
# So we could remove the zips from that list and create a new list of ordered zips that points to the latlong list.

# I tried this below. The output filesizes are 405KB and 475KB. This did not save any space.
# It was worth a shot.

import csv

def mergeSort2D(data, column):
    if len(data) > 1:
        mid = len(data)//2
        L = data[:mid]
        R = data[mid:]

        mergeSort2D(L, column)
        mergeSort2D(R, column)

        i = j = k = 0

        while i < len(L) and j < len(R):
            if L[i][column] < R[j][column]:
                data[k] = L[i]
                i += 1
            else:
                data[k] = R[j]
                j += 1
            k += 1

        while i < len(L):
            data[k] = L[i]
            i += 1
            k += 1

        while j < len(R):
            data[k] = R[j]
            j += 1
            k += 1

def print2D(array, string=''):
    if string != '':
        print(string)
    for row in array:
        print(row)
    print('')

def test(zipcode, latlong):
    success = True
    for i in range(len(zipcode)):
        code = zipcode[i][0]
        key = zipcode[i][1]
        if code not in latlong[key][2]:
            success = False
            break
    if success:
        print('Success')
    else:
        print('Fail')

data = []

with open('./zipcode-database.csv', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        data.append(row)

mergeSort2D(data, 2)
mergeSort2D(data, 1)

latLongMap = []
latLongPos = -1
zipMap = []
prevLatLong = [0,0]

for row in data:
    if row[1] == prevLatLong[0] and row[2] == prevLatLong[1]:
        latLongMap[-1][2].append(row[0])
    else:
        latLongMap.append([row[1], row[2], [row[0]]])
        prevLatLong = [row[1], row[2]]
        latLongPos += 1
    zipMap.append([row[0], latLongPos])

test(zipMap, latLongMap)

for row in latLongMap:
    del row[2];

with open('zipmap.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(zipMap)

with open('latlong.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(latLongMap)

the code and food blogger

Search This Blog

prevLatLong