# Copyright (c) 2020 Brit Nicholson.
# All rights reserved. This program is made available under
# the terms of the Simple Non Code License (SNCL) v2.3.0,
# which is available at
# https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt

# This is the code used for a calculator that finds the average
# amount and percentiles of an ancestor's DNA when various relatives
# have their DNA genotyped.

import numpy as np
import random
import math
import fnmatch
import time

start_time = time.time()

def recomb(fun_gen, fun_half_segs, first_anc, second_anc=None):

  out = first_anc

  for gen_num in range(1, fun_gen + 1):
  
    out = random.sample(out, fun_half_segs)
    if second_anc is not None:
        out.extend(random.sample(second_anc, fun_half_segs))

  return out

segs = 97.5

seg_decimal = segs/2 - int(segs/2)
percent_reproduced = []

# Enter 1 for mother, 2 for grandma, 3 for great-grandma, etc. (n+2 for ng grandma):
gen = 2
# Enter one for yourself:
sib_num = 1
aunt_num = 0
par_of_cous_num = 0
cus_num = 1
cus_from_same_aunt_num = 0

trials = 20000
for i in range(0, trials):

  if np.random.random() > seg_decimal:
    half_segs = math.floor(segs/2)
  else:
    half_segs = math.ceil(segs/2)

  m = list(range(0, half_segs*2))
  p = list(m)

  m = ['m' + str(i) for i in m]
  p = ['p' + str(i) for i in p]

  #grandma = recomb(half_segs, m, p)
  # The following aunt variable is only used for cousins from the same aunt/uncle:
  aunt = recomb(gen-1, half_segs, m, p)

  unique_segs = []

  # DNA contributed by siblings
  for s_num in range(1, sib_num + 1):
      unique_segs.extend(recomb(gen, half_segs, m, p))

  # DNA contributed by aunts/uncles
  for a_num in range(1, aunt_num + 1):
      unique_segs.extend(recomb(gen-1, half_segs, m, p))

  # DNA contributed by parents of cousins of order “gen” (will be same as aunt or uncle if gen = 2)
  for p_of_c_num in range(1, par_of_cous_num + 1):
      unique_segs.extend(recomb(gen-1, half_segs, m, p))

  # DNA contributed by cousins who are NOT children of the above aunts/uncles:
  # Also, each of these cousins is from a different aunt or uncle.
  for c_num in range(1, cus_num + 1):
       unique_segs.extend(recomb(gen, half_segs, m, p))

  # This is for several cousins from the same aunt or uncle.
  # It would be burdensome to do this for multiple aunts or uncles
  for cfsa_num in range(1, cus_from_same_aunt_num + 1):
      unique_segs.extend(recomb(1, half_segs, aunt))
  
  unique_segs = fnmatch.filter(list(set(unique_segs)), 'm*')
  
  percent_reproduced.append(len(unique_segs)/(half_segs*2))

#df = pd.DataFrame(percent_reproduced)

print(np.min(percent_reproduced))

print(np.percentile(percent_reproduced, 2.5))
#print(df.quantile(0.025))

print(np.mean(percent_reproduced))

print(np.percentile(percent_reproduced, 97.5))
#print(df.quantile(0.975))

print(np.max(percent_reproduced))

print("n")

print("Time to complete: %s seconds." % (time.time() - start_time))

Feel free to ask me about modeling & simulation, genetic genealogy, or genealogical research. To see my articles on Medium, click here. And try out a nifty calculator that’s based on the first of my three genetic models. It lets you find the amount of an ancestor’s DNA you have when combined with various relatives. And most importantly, check out these ranges of shared DNA percentages or shared centiMorgans, which are the only published values that match known standard deviations.