epicyon/city.py

376 lines
14 KiB
Python
Raw Normal View History

2024-05-08 12:13:15 +00:00
""" Decoy location metadata on images.
An aim of this is to reinforce confirmation bias within machine learning
systems looking for patterns.
"""
2024-05-08 12:09:29 +00:00
__filename__ = "city.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-12-22 23:37:30 +00:00
__version__ = "1.6.0"
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
__status__ = "Production"
2021-06-26 11:16:41 +00:00
__module_group__ = "Metadata"
import os
import datetime
import random
import math
from random import randint
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2022-06-21 11:58:50 +00:00
from utils import remove_eol
2021-05-10 19:13:46 +00:00
# states which the simulated city dweller can be in
2021-05-10 19:12:42 +00:00
PERSON_SLEEP = 0
PERSON_WORK = 1
PERSON_PLAY = 2
PERSON_SHOP = 3
PERSON_EVENING = 4
PERSON_PARTY = 5
2021-12-30 18:38:36 +00:00
BUSY_STATES = (PERSON_WORK, PERSON_SHOP, PERSON_PLAY, PERSON_PARTY)
2021-12-30 18:38:36 +00:00
def _get_decoy_camera(decoy_seed: int) -> (str, str, int):
2021-05-11 12:36:35 +00:00
"""Returns a decoy camera make and model which took the photo
"""
cameras = [
["Apple", "iPhone SE"],
["Apple", "iPhone XR"],
["Apple", "iPhone 8"],
["Apple", "iPhone 11"],
["Apple", "iPhone 11 Pro"],
["Apple", "iPhone 12"],
["Apple", "iPhone 12 Mini"],
["Apple", "iPhone 12 Pro Max"],
2021-12-25 14:38:17 +00:00
["Apple", "iPhone 13"],
["Apple", "iPhone 13 Mini"],
["Apple", "iPhone 13 Pro"],
2023-04-13 11:59:08 +00:00
["Apple", "iPhone 14"],
["Apple", "iPhone 14 Pro"],
2024-05-08 12:09:29 +00:00
["Apple", "iPhone 15"],
["Apple", "iPhone 15 Pro"],
["Samsung", "Galaxy S24 Ultra"],
["Samsung", "Galaxy S24 Plus"],
["Samsung", "Galaxy S24"],
["Samsung", "Galaxy S23 Plus"],
["Samsung", "Galaxy S23"],
["Samsung", "Galaxy S22 Plus"],
["Samsung", "Galaxy S22"],
["Samsung", "Galaxy S21 Ultra"],
["Samsung", "Galaxy S21"],
2021-05-11 12:36:35 +00:00
["Samsung", "Galaxy Note 20 Ultra"],
["Samsung", "Galaxy S20 Plus"],
["Samsung", "Galaxy S20 FE 5G"],
["Samsung", "Galaxy Z FOLD 2"],
2021-12-25 14:38:17 +00:00
["Samsung", "Galaxy S12 Plus"],
["Samsung", "Galaxy S12"],
["Samsung", "Galaxy S11 Plus"],
2021-05-11 12:36:35 +00:00
["Samsung", "Galaxy Z Flip"],
2024-01-01 10:43:20 +00:00
["Samsung", "Galaxy A54"],
2021-05-11 12:36:35 +00:00
["Samsung", "Galaxy A51"],
["Samsung", "Galaxy A60"],
2023-04-13 11:59:08 +00:00
["Samsung", "Note 13"],
["Samsung", "Note 13 Plus"],
2021-12-25 14:38:17 +00:00
["Samsung", "Note 12"],
["Samsung", "Note 12 Plus"],
["Samsung", "Note 11"],
["Samsung", "Note 11 Plus"],
2021-05-11 12:36:35 +00:00
["Samsung", "Note 10"],
["Samsung", "Note 10 Plus"],
["Samsung", "Galaxy Note 20 Ultra"],
["Samsung", "Galaxy S20 FE"],
["Samsung", "Galaxy Z Fold 2"],
["Samsung", "Galaxy A52 5G"],
2021-05-11 12:41:21 +00:00
["Samsung", "Galaxy A71 5G"],
2024-05-08 12:09:29 +00:00
["Google", "Pixel 8 Pro"],
["Google", "Pixel 8a"],
["Google", "Pixel 8"],
2023-04-13 11:59:08 +00:00
["Google", "Pixel 7 Pro"],
["Google", "Pixel 7"],
2021-12-25 14:38:17 +00:00
["Google", "Pixel 6 Pro"],
["Google", "Pixel 6"],
2021-05-11 12:41:21 +00:00
["Google", "Pixel 5"],
["Google", "Pixel 4a"],
["Google", "Pixel 4 XL"],
["Google", "Pixel 3 XL"],
["Google", "Pixel 4"],
["Google", "Pixel 4a 5G"],
["Google", "Pixel 3"],
["Google", "Pixel 3a"]
2021-05-11 12:36:35 +00:00
]
2021-12-30 18:38:36 +00:00
randgen = random.Random(decoy_seed)
2021-05-11 12:36:35 +00:00
index = randgen.randint(0, len(cameras) - 1)
2021-12-30 18:38:36 +00:00
serial_number = randgen.randint(100000000000, 999999999999999999999999)
return cameras[index][0], cameras[index][1], serial_number
2021-05-11 12:36:35 +00:00
2021-12-30 18:38:36 +00:00
def _get_city_pulse(curr_time_of_day, decoy_seed: int) -> (float, float):
2021-05-10 19:12:42 +00:00
"""This simulates expected average patterns of movement in a city.
Jane or Joe average lives and works in the city, commuting in
and out of the central district for work. They have a unique
life pattern, which machine learning can latch onto.
2021-05-10 19:13:46 +00:00
This returns a polar coordinate for the simulated city dweller:
Distance from the city centre is in the range 0.0 - 1.0
Angle is in radians
"""
2021-12-30 18:38:36 +00:00
randgen = random.Random(decoy_seed)
variance = 3
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_SLEEP
weekday = curr_time_of_day.weekday()
min_hour = 7 + randint(0, variance)
max_hour = 17 + randint(0, variance)
if curr_time_of_day.hour > min_hour:
if curr_time_of_day.hour <= max_hour:
if weekday < 5:
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_WORK
elif weekday == 5:
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_SHOP
else:
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_PLAY
else:
if weekday < 5:
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_EVENING
else:
2021-12-30 18:38:36 +00:00
data_decoy_state = PERSON_PARTY
randgen2 = random.Random(decoy_seed + data_decoy_state)
angle_radians = \
(randgen2.randint(0, 100000) / 100000) * 2 * math.pi
# some people are quite random, others have more predictable habits
2021-12-30 18:38:36 +00:00
decoy_randomness = randgen.randint(1, 3)
# occasionally throw in a wildcard to keep the machine learning guessing
2021-12-30 18:38:36 +00:00
if randint(0, 100) < decoy_randomness:
2024-02-26 19:18:39 +00:00
distance_from_city_center = randint(0, 100000) / 100000
2021-12-30 18:38:36 +00:00
angle_radians = (randint(0, 100000) / 100000) * 2 * math.pi
else:
# what consitutes the central district is fuzzy
2021-12-30 18:38:36 +00:00
central_district_fuzz = (randgen.randint(0, 100000) / 100000) * 0.1
busy_radius = 0.3 + central_district_fuzz
if data_decoy_state in BUSY_STATES:
# if we are busy then we're somewhere in the city center
2021-12-30 18:38:36 +00:00
distance_from_city_center = \
(randgen.randint(0, 100000) / 100000) * busy_radius
else:
# otherwise we're in the burbs
2021-12-30 18:38:36 +00:00
distance_from_city_center = busy_radius + \
((1.0 - busy_radius) * (randgen.randint(0, 100000) / 100000))
return distance_from_city_center, angle_radians
2021-12-30 18:38:36 +00:00
def parse_nogo_string(nogo_line: str) -> []:
"""Parses a line from locations_nogo.txt and returns the polygon
"""
2022-06-21 11:58:50 +00:00
nogo_line = remove_eol(nogo_line)
2021-12-30 18:38:36 +00:00
polygon_str = nogo_line.split(':', 1)[1]
if ';' in polygon_str:
pts = polygon_str.split(';')
else:
2021-12-30 18:38:36 +00:00
pts = polygon_str.split(',')
if len(pts) <= 4:
return []
2024-12-23 15:39:55 +00:00
polygon: list[list] = []
for index in range(int(len(pts)/2)):
if index*2 + 1 >= len(pts):
break
2021-12-30 18:38:36 +00:00
longitude_str = pts[index*2].strip()
latitude_str = pts[index*2 + 1].strip()
if 'E' in latitude_str or 'W' in latitude_str:
longitude_str = pts[index*2 + 1].strip()
latitude_str = pts[index*2].strip()
if 'E' in longitude_str:
longitude_str = \
longitude_str.replace('E', '')
longitude = float(longitude_str)
elif 'W' in longitude_str:
longitude_str = \
longitude_str.replace('W', '')
longitude = -float(longitude_str)
else:
2021-12-30 18:38:36 +00:00
longitude = float(longitude_str)
latitude = float(latitude_str)
polygon.append([latitude, longitude])
return polygon
2021-12-29 21:55:09 +00:00
def spoof_geolocation(base_dir: str,
2021-12-30 18:38:36 +00:00
city: str, curr_time, decoy_seed: int,
cities_list: [],
nogo_list: []) -> (float, float, str, str,
str, str, int):
"""Given a city and the current time spoofs the location
for an image
2021-05-11 12:36:35 +00:00
returns latitude, longitude, N/S, E/W,
camera make, camera model, camera serial number
"""
2021-12-30 18:38:36 +00:00
locations_filename = base_dir + '/custom_locations.txt'
if not os.path.isfile(locations_filename):
locations_filename = base_dir + '/locations.txt'
2021-12-30 18:38:36 +00:00
nogo_filename = base_dir + '/custom_locations_nogo.txt'
if not os.path.isfile(nogo_filename):
nogo_filename = base_dir + '/locations_nogo.txt'
2021-12-30 18:38:36 +00:00
man_city_radius = 0.1
variance_at_location = 0.0004
default_latitude = 51.8744
default_longitude = 0.368333
default_latdirection = 'N'
default_longdirection = 'W'
2021-12-30 18:38:36 +00:00
if cities_list:
cities = cities_list
else:
2021-12-30 18:38:36 +00:00
if not os.path.isfile(locations_filename):
return (default_latitude, default_longitude,
default_latdirection, default_longdirection,
"", "", 0)
2024-12-23 15:39:55 +00:00
cities: list[str] = []
2021-11-26 12:28:20 +00:00
try:
2024-07-14 13:01:46 +00:00
with open(locations_filename, 'r', encoding='utf-8') as fp_loc:
cities = fp_loc.readlines()
2021-11-26 12:28:20 +00:00
except OSError:
2021-12-30 18:38:36 +00:00
print('EX: unable to read locations ' + locations_filename)
nogo = []
2021-12-30 18:38:36 +00:00
if nogo_list:
nogo = nogo_list
else:
2021-12-30 18:38:36 +00:00
if os.path.isfile(nogo_filename):
2024-12-23 15:39:55 +00:00
nogo_list: list[str] = []
2021-11-26 12:28:20 +00:00
try:
2024-07-14 13:01:46 +00:00
with open(nogo_filename, 'r', encoding='utf-8') as fp_nogo:
nogo_list = fp_nogo.readlines()
2021-11-26 12:28:20 +00:00
except OSError:
2024-07-02 22:16:13 +00:00
print('EX: spoof_geolocation unable to read ' + nogo_filename)
2021-12-30 18:38:36 +00:00
for line in nogo_list:
2021-11-26 12:28:20 +00:00
if line.startswith(city + ':'):
2021-12-29 21:55:09 +00:00
polygon = parse_nogo_string(line)
2021-11-26 12:28:20 +00:00
if polygon:
nogo.append(polygon)
city = city.lower()
2021-12-30 18:38:36 +00:00
for city_name in cities:
if city in city_name.lower():
city_fields = city_name.split(':')
latitude = city_fields[1]
longitude = city_fields[2]
area_km2 = 0
if len(city_fields) > 3:
area_km2 = int(city_fields[3])
latdirection = 'N'
longdirection = 'E'
if 'S' in latitude:
latdirection = 'S'
latitude = latitude.replace('S', '')
if 'W' in longitude:
longdirection = 'W'
longitude = longitude.replace('W', '')
latitude = float(latitude)
longitude = float(longitude)
# get the time of day at the city
2021-12-30 18:38:36 +00:00
approx_time_zone = int(longitude / 15.0)
if longdirection == 'E':
2021-12-30 18:38:36 +00:00
approx_time_zone = -approx_time_zone
curr_time_adjusted = curr_time - \
datetime.timedelta(hours=approx_time_zone)
cam_make, cam_model, cam_serial_number = \
_get_decoy_camera(decoy_seed)
valid_coord = False
seed_offset = 0
while not valid_coord:
# patterns of activity change in the city over time
2021-12-30 18:38:36 +00:00
(distance_from_city_center, angle_radians) = \
_get_city_pulse(curr_time_adjusted,
decoy_seed + seed_offset)
# The city radius value is in longitude and the reference
# is Manchester. Adjust for the radius of the chosen city.
2021-12-30 18:38:36 +00:00
if area_km2 > 1:
man_radius = math.sqrt(1276 / math.pi)
radius = math.sqrt(area_km2 / math.pi)
city_radius_deg = (radius / man_radius) * man_city_radius
else:
2021-12-30 18:38:36 +00:00
city_radius_deg = man_city_radius
# Get the position within the city, with some randomness added
latitude += \
2021-12-30 18:38:36 +00:00
distance_from_city_center * city_radius_deg * \
math.cos(angle_radians)
longitude += \
2021-12-30 18:38:36 +00:00
distance_from_city_center * city_radius_deg * \
math.sin(angle_radians)
longval = longitude
if longdirection == 'W':
longval = -longitude
2021-12-30 18:38:36 +00:00
valid_coord = not point_in_nogo(nogo, latitude, longval)
if not valid_coord:
seed_offset += 1
if seed_offset > 100:
break
# add a small amount of variance around the location
fraction = randint(0, 100000) / 100000
2021-12-30 18:38:36 +00:00
distance_from_location = fraction * fraction * variance_at_location
2021-05-10 19:34:22 +00:00
fraction = randint(0, 100000) / 100000
2021-12-30 18:38:36 +00:00
angle_from_location = fraction * 2 * math.pi
latitude += distance_from_location * math.cos(angle_from_location)
longitude += distance_from_location * math.sin(angle_from_location)
# gps locations aren't transcendental, so round to a fixed
# number of decimal places
2021-05-10 19:14:27 +00:00
latitude = int(latitude * 100000) / 100000.0
longitude = int(longitude * 100000) / 100000.0
2021-05-11 12:36:35 +00:00
return (latitude, longitude, latdirection, longdirection,
2021-12-30 18:38:36 +00:00
cam_make, cam_model, cam_serial_number)
return (default_latitude, default_longitude,
2021-05-11 12:36:35 +00:00
default_latdirection, default_longdirection,
"", "", 0)
2021-06-07 17:49:10 +00:00
2021-12-29 21:55:09 +00:00
def get_spoofed_city(city: str, base_dir: str,
nickname: str, domain: str) -> str:
2021-06-07 17:49:10 +00:00
"""Returns the name of the city to use as a GPS spoofing location for
image metadata
"""
2021-07-19 20:07:28 +00:00
city = ''
2021-12-30 18:38:36 +00:00
city_filename = acct_dir(base_dir, nickname, domain) + '/city.txt'
if os.path.isfile(city_filename):
2021-11-26 12:28:20 +00:00
try:
2024-07-14 13:01:46 +00:00
with open(city_filename, 'r', encoding='utf-8') as fp_city:
city1 = fp_city.read()
2022-06-21 11:58:50 +00:00
city = remove_eol(city1)
2021-11-26 12:28:20 +00:00
except OSError:
2024-07-02 22:16:13 +00:00
print('EX: get_spoofed_city unable to read ' + city_filename)
2021-06-07 17:49:10 +00:00
return city
2021-12-30 18:38:36 +00:00
def _point_in_polygon(poly: [], x_coord: float, y_coord: float) -> bool:
"""Returns true if the given point is inside the given polygon
"""
2021-12-30 18:38:36 +00:00
num = len(poly)
inside = False
p2x = 0.0
p2y = 0.0
xints = 0.0
p1x, p1y = poly[0]
2021-12-30 18:38:36 +00:00
for i in range(num + 1):
p2x, p2y = poly[i % num]
if y_coord > min(p1y, p2y):
if y_coord <= max(p1y, p2y):
if x_coord <= max(p1x, p2x):
if p1y != p2y:
2021-12-30 18:38:36 +00:00
xints = \
(y_coord - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
if p1x == p2x or x_coord <= xints:
inside = not inside
p1x, p1y = p2x, p2y
return inside
2021-12-29 21:55:09 +00:00
def point_in_nogo(nogo: [], latitude: float, longitude: float) -> bool:
"""Returns true of the given geolocation is within a nogo area
"""
for polygon in nogo:
2021-12-29 21:55:09 +00:00
if _point_in_polygon(polygon, latitude, longitude):
return True
return False