Decoy is a more descriptive term

merge-requests/30/head
Bob Mottram 2021-05-10 12:19:42 +01:00
parent 55fc1c8d91
commit 75be6ddfee
2 changed files with 49 additions and 39 deletions

View File

@ -56,46 +56,56 @@ def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
os.system('/usr/bin/mogrify -strip ' + outputFilename) # nosec os.system('/usr/bin/mogrify -strip ' + outputFilename) # nosec
def _getCityPulse(currTimeOfDay, doppelgangerSeed: int) -> float: def _getCityPulse(currTimeOfDay, decoySeed: int) -> (float, float):
"""The data doppelganger """The data decoy
This simulates expected average patterns of movement in a city. This simulates expected average patterns of movement in a city.
Jane or Joe average lives and works in the city, commuting in Jane or Joe average lives and works in the city, commuting in
and out of the central district for work. They have a unique and out of the central district for work. They have a unique
life pattern, which machine learning can latch onto. life pattern, which machine learning can latch onto.
This returns a polar coordinate:
Distance from the city centre is in the range 0.0 - 1.0
Angle is in radians
""" """
randgen = random.Random(doppelgangerSeed) randgen = random.Random(decoySeed)
variance = 3 variance = 3
busyStates = ("work", "shop", "play", "party") busyStates = ("work", "shop", "play", "party")
dataDoppelgangerState = "sleep" dataDecoyState = "sleep"
dataDoppelgangerIndex = 0 dataDecoyIndex = 0
weekday = currTimeOfDay.weekday() weekday = currTimeOfDay.weekday()
minHour = 7 + randint(0, variance) minHour = 7 + randint(0, variance)
maxHour = 17 + randint(0, variance) maxHour = 17 + randint(0, variance)
if currTimeOfDay.hour > minHour: if currTimeOfDay.hour > minHour:
if currTimeOfDay.hour <= maxHour: if currTimeOfDay.hour <= maxHour:
if weekday < 5: if weekday < 5:
dataDoppelgangerState = "work" dataDecoyState = "work"
dataDoppelgangerIndex = 1 dataDecoyIndex = 1
elif weekday == 5: elif weekday == 5:
dataDoppelgangerState = "shop" dataDecoyState = "shop"
dataDoppelgangerIndex = 2 dataDecoyIndex = 2
else: else:
dataDoppelgangerState = "play" dataDecoyState = "play"
dataDoppelgangerIndex = 3 dataDecoyIndex = 3
else: else:
if weekday < 5: if weekday < 5:
dataDoppelgangerState = "evening" dataDecoyState = "evening"
dataDoppelgangerIndex = 4 dataDecoyIndex = 4
else: else:
dataDoppelgangerState = "party" dataDecoyState = "party"
dataDoppelgangerIndex = 5 dataDecoyIndex = 5
angleRadians = \ angleRadians = \
(randgen.randint(0, 100000 - 5 + dataDoppelgangerIndex) / 100000) * \ (randgen.randint(0, 100000 - 5 + dataDecoyIndex) / 100000) * \
2 * math.pi 2 * math.pi
# some people are quite random, others have more predictable habits
decoyRandomness = randgen.randint(10, 20)
# occasionally throw in a wildcard to keep the machine learning guessing
if randint(0, 100) < decoyRandomness:
distanceFromCityCenter = (randint(0, 100000) / 100000)
angleRadians = (randint(0, 100000) / 100000) * 2 * math.pi
else:
# what consitutes the central district is fuzzy # what consitutes the central district is fuzzy
centralDistrictFuzz = (randgen.randint(0, 100000) / 100000) * 0.1 centralDistrictFuzz = (randgen.randint(0, 100000) / 100000) * 0.1
busyRadius = 0.3 + centralDistrictFuzz busyRadius = 0.3 + centralDistrictFuzz
if dataDoppelgangerState in busyStates: if dataDecoyState in busyStates:
# if we are busy then we're somewhere in the city center # if we are busy then we're somewhere in the city center
distanceFromCityCenter = \ distanceFromCityCenter = \
(randgen.randint(0, 100000) / 100000) * busyRadius (randgen.randint(0, 100000) / 100000) * busyRadius
@ -107,7 +117,7 @@ def _getCityPulse(currTimeOfDay, doppelgangerSeed: int) -> float:
def spoofGeolocation(baseDir: str, def spoofGeolocation(baseDir: str,
city: str, currTime, doppelgangerSeed: int, city: str, currTime, decoySeed: int,
citiesList: []) -> (float, float, str, str): citiesList: []) -> (float, float, str, str):
"""Given a city and the current time spoofs the location """Given a city and the current time spoofs the location
for an image for an image
@ -156,7 +166,7 @@ def spoofGeolocation(baseDir: str,
datetime.timedelta(hours=approxTimeZone) datetime.timedelta(hours=approxTimeZone)
# patterns of activity change in the city over time # patterns of activity change in the city over time
(distanceFromCityCenter, angleRadians) = \ (distanceFromCityCenter, angleRadians) = \
_getCityPulse(currTimeAdjusted, doppelgangerSeed) _getCityPulse(currTimeAdjusted, decoySeed)
# Get the position within the city, with some randomness added # Get the position within the city, with some randomness added
latitude += \ latitude += \
distanceFromCityCenter * cityRadius * math.cos(angleRadians) distanceFromCityCenter * cityRadius * math.cos(angleRadians)
@ -189,14 +199,14 @@ def _spoofMetaData(baseDir: str, nickname: str, domain: str,
return return
# get the random seed used to generate a unique pattern for this account # get the random seed used to generate a unique pattern for this account
doppelgangerSeedFilename = \ decoySeedFilename = \
baseDir + '/accounts/' + nickname + '@' + domain + '/doppelgangerseed' baseDir + '/accounts/' + nickname + '@' + domain + '/decoyseed'
doppelgangerSeed = 63725 decoySeed = 63725
if os.path.isfile(doppelgangerSeedFilename): if os.path.isfile(decoySeedFilename):
with open(doppelgangerSeedFilename, 'r') as fp: with open(decoySeedFilename, 'r') as fp:
doppelgangerSeed = int(fp.read()) decoySeed = int(fp.read())
else: else:
doppelgangerSeed = randint(10000, 10000000000000) decoySeed = randint(10000, 10000000000000)
if os.path.isfile('/usr/bin/exiftool'): if os.path.isfile('/usr/bin/exiftool'):
print('Spoofing metadata in ' + outputFilename + ' using exiftool') print('Spoofing metadata in ' + outputFilename + ' using exiftool')
@ -206,7 +216,7 @@ def _spoofMetaData(baseDir: str, nickname: str, domain: str,
published = currTimeAdjusted.strftime("%Y:%m:%d %H:%M:%S+00:00") published = currTimeAdjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
(latitude, longitude, latitudeRef, longitudeRef) = \ (latitude, longitude, latitudeRef, longitudeRef) = \
spoofGeolocation(baseDir, spoofCity, currTimeAdjusted, spoofGeolocation(baseDir, spoofCity, currTimeAdjusted,
doppelgangerSeed, None) decoySeed, None)
os.system('exiftool -artist="' + nickname + '" ' + os.system('exiftool -artist="' + nickname + '" ' +
'-DateTimeOriginal="' + published + '" ' + '-DateTimeOriginal="' + published + '" ' +
'-FileModifyDate="' + published + '" ' + '-FileModifyDate="' + published + '" ' +

View File

@ -3676,10 +3676,10 @@ def testSpoofGeolocation() -> None:
'HOUSTON, USA:29.9803:W95.3397' 'HOUSTON, USA:29.9803:W95.3397'
] ]
currTime = datetime.datetime.utcnow() currTime = datetime.datetime.utcnow()
doppelgangerSeed = 7634682 decoySeed = 7634682
cityRadius = 0.1 cityRadius = 0.1
coords = spoofGeolocation('', 'los angeles', currTime, coords = spoofGeolocation('', 'los angeles', currTime,
doppelgangerSeed, citiesList) decoySeed, citiesList)
assert coords[0] >= 33.9425 - cityRadius assert coords[0] >= 33.9425 - cityRadius
assert coords[0] <= 33.9425 + cityRadius assert coords[0] <= 33.9425 + cityRadius
assert coords[1] >= 118.408 - cityRadius assert coords[1] >= 118.408 - cityRadius
@ -3687,7 +3687,7 @@ def testSpoofGeolocation() -> None:
assert coords[2] == 'N' assert coords[2] == 'N'
assert coords[3] == 'W' assert coords[3] == 'W'
coords = spoofGeolocation('', 'unknown', currTime, coords = spoofGeolocation('', 'unknown', currTime,
doppelgangerSeed, citiesList) decoySeed, citiesList)
assert coords[0] >= 51.8744 - cityRadius assert coords[0] >= 51.8744 - cityRadius
assert coords[0] <= 51.8744 + cityRadius assert coords[0] <= 51.8744 + cityRadius
assert coords[1] >= 0.368333 - cityRadius assert coords[1] >= 0.368333 - cityRadius