# Copyright (c) 2020 Brit Nicholson. # All rights reserved. This program is made available under # the terms of the Simple Non Code License (SNCL) v2.3.0, # which is available at # https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt import numpy as np import random import fnmatch import time def recomb(anc_1, anc_2, rand_fish, fun_spots): rand = random.random() if rand < 0.25: out_a = recomb_a_first(anc_1, rand_fish, fun_spots) out_b = recomb_a_first(anc_2, rand_fish, fun_spots) elif rand < 0.5: out_a = recomb_a_first(anc_1, rand_fish, fun_spots) out_b = recomb_b_first(anc_2, rand_fish, fun_spots) elif rand < 0.75: out_a = recomb_b_first(anc_1, rand_fish, fun_spots) out_b = recomb_a_first(anc_2, rand_fish, fun_spots) else: out_a = recomb_b_first(anc_1, rand_fish, fun_spots) out_b = recomb_b_first(anc_2, rand_fish, fun_spots) out = np.column_stack((out_a, out_b)) return out def recomb_a_first(anc, fun_rand_fish, fun_spots): random.shuffle(fun_spots) num_of_recombs = 22 + np.random.poisson(fun_rand_fish) cut_spots = sorted(fun_spots[:num_of_recombs]) a = anc[:,0].tolist() b = anc[:,1].tolist() if len(cut_spots) == 0: recomb_out = a return recomb_out recomb_out = [] for i in range(0, len(cut_spots)): if i == 0: recomb_out = a[0:cut_spots[0]] if i % 2 == 1: recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]]) last_a = False else: recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]]) last_a = True if last_a: recomb_out.extend(b[cut_spots[i]:len(b)]) else: recomb_out.extend(a[cut_spots[i]:len(a)]) return recomb_out def recomb_b_first(anc, fun_rand_fish, fun_spots): random.shuffle(fun_spots) num_of_recombs = 22 + np.random.poisson(fun_rand_fish) cut_spots = sorted(fun_spots[:num_of_recombs]) a = anc[:,0].tolist() b = anc[:,1].tolist() if len(cut_spots) == 0: recomb_out = b return recomb_out recomb_out = [] for i in range(0, len(cut_spots)): if i == 0: recomb_out = b[0:cut_spots[0]] if i % 2 == 1: recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]]) last_a = True else: recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]]) last_a = False if last_a: recomb_out.extend(b[cut_spots[i]:len(b)]) else: recomb_out.extend(a[cut_spots[i]:len(a)]) return recomb_out # Out of 1B Poisson values, the smallest I got was 28 and the largest was 94, so 99 (or 97+) genes should be fine. genes_num = 99 genes_list = list(range(0, genes_num)) spots = list(range(1, genes_num)) rand_fish = 33 # λ # Gen. = 2 simulates grandparents, 3 simulates great-grandparents, etc. gen = 2 trials = 500000 # Only enter a "1" for one of the following to see what % of DNA one shares w/ that relative only. aunt_bool = 0 cous_1st_bool = 0 great_aunt_bool = 0 par_of_cous_bool = 0 nth_cous_bool = 0 nth_anc_bool = 1 # Use this to change an nth relative to a half-relative half = 0 # Create the farthest back ancestors. Label homolouges; otherwise, aunts share 50%. for num in range(1, int((2**gen)/2) + 1): vars()['first_anc_' + str(num)] = np.column_stack(([str(num) + 'a' for i in genes_list], [str(num) + 'b' for i in genes_list])) if nth_cous_bool == 1 or par_of_cous_bool == 1: fill = np.column_stack(([0 for i in genes_list], [0 for i in genes_list])) start_time = time.time() if aunt_bool + cous_1st_bool + great_aunt_bool + par_of_cous_bool + nth_cous_bool + nth_anc_bool != 1: raise ValueError('One and only one relative must be compared.') if great_aunt_bool == 1 and gen != 3: raise ValueError('Great Aunts should only be compared when gen = 3.') percent_same = [] for i_trials in range(0, trials): if gen > 2: for num in range(1, int((2**gen)/4) + 1): vars()['new_' + str(num)] = recomb(vars()['first_anc_' + str(2*num-1)], vars()['first_anc_' + str(2*num)], rand_fish, spots) # Recombine for close relatives. These 2 loops get it down to two grandparents. # Each anc. in the tree is created, but some are eventually overwritten. for g in range(gen-1, 2, -1): for num in range(1, int((2**g)/4) + 1): vars()['new_' + str(num)] = recomb(vars()['new_' + str(2*num-1)], vars()['new_' + str(2*num)], rand_fish, spots) # Create a 'mother' if gen > 2: mom = recomb(new_1, new_2, rand_fish, spots) else: mom = recomb(first_anc_1, first_anc_2, rand_fish, spots) # Create oneself (only the applicable homologue, which is arbitrarily the maternal one) rand = random.random() if rand < 0.5: me = recomb_a_first(mom, rand_fish, spots) else: me = recomb_b_first(mom, rand_fish, spots) me = np.asarray(me) if nth_anc_bool == 1: percent_same.append(np.sum((me == first_anc_2[:,0]) | (me == first_anc_2[:,1]))/(2*genes_num)) elif aunt_bool == 1 or cous_1st_bool == 1: # Create an 'aunt' if gen > 2: aunt = recomb(new_1, new_2, rand_fish, spots) else: aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots) if aunt_bool == 1: percent_same.append(np.sum((me == aunt[:,0]) | (me == aunt[:,1]))/(2*genes_num)) else: rand = random.random() if rand < 0.5: cous_1st = recomb_a_first(aunt, rand_fish, spots) else: cous_1st = recomb_b_first(aunt, rand_fish, spots) cous_1st = np.asarray(cous_1st) percent_same.append(np.sum((me == cous_1st))/(2*genes_num)) elif gen == 3 and great_aunt_bool == 1: # To keep the programming simple and limit run-time, only allow calculations for great-aunts # and great-uncles (siblings of one's grandparents) when gen == 3 great_aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots) percent_same.append(np.sum((me == great_aunt[:,0]) | (me == great_aunt[:,1]))/(2*genes_num)) # Recombine for nth cousin or their parent elif nth_cous_bool == 1 or par_of_cous_bool == 1: # One only shares 2 close ancestors with a full nth cousin (unless they share more /:) if half == 0: new_1 = recomb(first_anc_1, first_anc_2, rand_fish, spots) else: new_1 = recomb(first_anc_1, fill, rand_fish, spots) # If gen > 3, dilute half of the genes with 'fill' for each extra generation for g in range(gen - 1, 2, -1): new_1 = recomb(new_1, fill, rand_fish, spots) # Create a parent of nth cousin (where n = gen - 1) if gen > 2: par_of_cous = recomb(new_1, fill, rand_fish, spots) else: par_of_cous = new_1 #par_of_cous = recomb(new_1, fill, rand_fish, spots) if par_of_cous_bool == 1: percent_same.append(np.sum((me == par_of_cous[:,0]) | (me == par_of_cous[:,1]))/(2*genes_num)) else: # Create an nth cousin (where n = gen - 1) rand = random.random() if rand < 0.5: nth_cous = recomb_a_first(par_of_cous, rand_fish, spots) else: nth_cous = recomb_b_first(par_of_cous, rand_fish, spots) percent_same.append(np.sum((me == nth_cous))/(2*genes_num)) # Calculate the fraction of DNA shared #percent_same.append(np.sum((me == mom[:,0]) | (me == mom[:,1]))/(2*genes_num)) if half == 1: print('This is for a half-relative!', "n") mean_out = np.mean(percent_same) std_dev_out = np.std(percent_same) print(mean_out) print(std_dev_out) print("n") print("Time to complete: %s seconds." % (time.time() - start_time)) import statistics def print_results(pv): print(':', str(round(100*statistics.mean(pv),2)) + '%,', 'min:', str(round(min(pv),5)) + ',', '95%-Conf.:', str(round(np.percentile(pv, lo),4)) + '-' + str(round(np.percentile(pv, hi),4)) + ',', 'max:', str(round(max(pv),5)) + ',', round(statistics.stdev(pv),5), 'std,', pv.count(0) / len(pv), '0%', pv.count(100) / len(pv), '100%') confid = 95 lo = (100 - confid) / 2 hi = 100 - lo print_results(percent_same)
Feel free to ask me about modeling & simulation, genetic genealogy, or genealogical research. To see my articles on Medium, click here. And try out a nifty calculator that’s based on the first of my three genetic models. It lets you find the amount of an ancestor’s DNA you have when combined with various relatives. And most importantly, check out these ranges of shared DNA percentages or shared centiMorgans, which are the only published values that match known standard deviations.
Recent Comments