# This was an attempt to compress the size of a database of zipcodes.
# The CSV format was zip,lat,long. The original file was 754 KB.
# There were very many duplicate latitude/longitude combinations that point to several zip codes.
# One way to reduce size would be to compress duplicate zips into one row with a single lat/long combo.
# Since we want to look up by zip code, this is not efficient.
# So we could remove the zips from that list and create a new list of ordered zips that points to the latlong list.
# I tried this below. The output filesizes are 405KB and 475KB. This did not save any space.
# It was worth a shot.
import csv
def mergeSort2D(data, column):
if len(data) > 1:
mid = len(data)//2
L = data[:mid]
R = data[mid:]
mergeSort2D(L, column)
mergeSort2D(R, column)
i = j = k = 0
while i < len(L) and j < len(R):
if L[i][column] < R[j][column]:
data[k] = L[i]
i += 1
else:
data[k] = R[j]
j += 1
k += 1
while i < len(L):
data[k] = L[i]
i += 1
k += 1
while j < len(R):
data[k] = R[j]
j += 1
k += 1
def print2D(array, string=''):
if string != '':
print(string)
for row in array:
print(row)
print('')
def test(zipcode, latlong):
success = True
for i in range(len(zipcode)):
code = zipcode[i][0]
key = zipcode[i][1]
if code not in latlong[key][2]:
success = False
break
if success:
print('Success')
else:
print('Fail')
data = []
with open('./zipcode-database.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
data.append(row)
mergeSort2D(data, 2)
mergeSort2D(data, 1)
latLongMap = []
latLongPos = -1
zipMap = []
prevLatLong = [0,0]
for row in data:
if row[1] == prevLatLong[0] and row[2] == prevLatLong[1]:
latLongMap[-1][2].append(row[0])
else:
latLongMap.append([row[1], row[2], [row[0]]])
prevLatLong = [row[1], row[2]]
latLongPos += 1
zipMap.append([row[0], latLongPos])
test(zipMap, latLongMap)
for row in latLongMap:
del row[2];
with open('zipmap.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(zipMap)
with open('latlong.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(latLongMap)
# The CSV format was zip,lat,long. The original file was 754 KB.
# There were very many duplicate latitude/longitude combinations that point to several zip codes.
# One way to reduce size would be to compress duplicate zips into one row with a single lat/long combo.
# Since we want to look up by zip code, this is not efficient.
# So we could remove the zips from that list and create a new list of ordered zips that points to the latlong list.
# I tried this below. The output filesizes are 405KB and 475KB. This did not save any space.
# It was worth a shot.
import csv
def mergeSort2D(data, column):
if len(data) > 1:
mid = len(data)//2
L = data[:mid]
R = data[mid:]
mergeSort2D(L, column)
mergeSort2D(R, column)
i = j = k = 0
while i < len(L) and j < len(R):
if L[i][column] < R[j][column]:
data[k] = L[i]
i += 1
else:
data[k] = R[j]
j += 1
k += 1
while i < len(L):
data[k] = L[i]
i += 1
k += 1
while j < len(R):
data[k] = R[j]
j += 1
k += 1
def print2D(array, string=''):
if string != '':
print(string)
for row in array:
print(row)
print('')
def test(zipcode, latlong):
success = True
for i in range(len(zipcode)):
code = zipcode[i][0]
key = zipcode[i][1]
if code not in latlong[key][2]:
success = False
break
if success:
print('Success')
else:
print('Fail')
data = []
with open('./zipcode-database.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
data.append(row)
mergeSort2D(data, 2)
mergeSort2D(data, 1)
latLongMap = []
latLongPos = -1
zipMap = []
prevLatLong = [0,0]
for row in data:
if row[1] == prevLatLong[0] and row[2] == prevLatLong[1]:
latLongMap[-1][2].append(row[0])
else:
latLongMap.append([row[1], row[2], [row[0]]])
prevLatLong = [row[1], row[2]]
latLongPos += 1
zipMap.append([row[0], latLongPos])
test(zipMap, latLongMap)
for row in latLongMap:
del row[2];
with open('zipmap.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(zipMap)
with open('latlong.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(latLongMap)