## This notebook is designed to demonstrate the methods described in Barrett et al. 2017 - Accuracy of inference on the physics of binary evolution from gravitational-wave observations - https://arxiv.org/abs/1711.06287¶

In [ ]:
import numpy as np
import matplotlib.pyplot as plot
import h5py as h5

from COMPAS import cosmologicalIntegrator as cci
from COMPAS import selection_effects as cse

import scipy.special as spsp

%matplotlib inline


#### The standard output of COMPAS is a single HDF5 file. This data is large (~38GB) but is available on request. Here we do a bit of book-keeping, getting the column numbers for the relevant heraders, and determining the unique random seeds passed to the COMPAS code¶

In [ ]:
h5File = h5.File('./metallicityData.h5')

initials = h5File['initials']
mergers = h5File['mergers']
initialSeeds = h5File['initialsSeeds']

uniqueSeeds = np.unique(initialSeeds)
nSim = len(uniqueSeeds)

#make a dictionary of header names:indices
ih = {}
ih[h] = i
mh = {}
mh[h] = i

#column indices in the initial file
iKickInd = ih['sigma_kick']
iCeInd = ih['CE_Alpha']
iWrInd = ih['wolfRayetMultiplier']
iLbvInd = ih['LBV_multiplier']

#column indices in the mergers file
mKickInd = mh['sigmaKick']
mCeInd = mh['CEalpha']
mWrInd = mh['wolfRayetMultiplier']
mLbvInd = mh['flbv']


#### Next, we set all of the specifics of the calculations, such as the number of hypothetical observations, how to bin the chirp mass distribution etc¶

In [ ]:
#how many bins to use when approximating chirp mass PDFs with histograms
nBins = 30

#how many hypothetical observations have been made
nObs = 1000

#the SNR threshold for selection effects
snrThreshold = 8.0

#COMPAS fiducial model values
fiducialValues = [250.,1.,1.,1.5]


#### Next calculate the total amount of mass represented with each unique combination of population parameters, we ran 25 different combinations, at each of 12 metallicities (thus the magic 300)¶

In [ ]:
#the total mass evolved for each combination of population parameters
totalMassEvolvedPerGridPoint = np.sum(initials[:,ih['mass1']] + initials[:,ih['mass2']]) / 300.


#### Now we set up our cosmology. We use the astropy library for this¶

In [ ]:
import astropy.cosmology as apyc

#setup cosmology and redshift binning (Planck vals)
cosmology = apyc.FlatLambdaCDM(67.8,0.308)

maxRedshift = 2.
nRedshiftBins = 200

redshiftEdges = np.linspace(0,maxRedshift,nRedshiftBins+1)
comovEdges = [x.value*1e-9 for x in cosmology.comoving_volume(redshiftEdges)]

redsDistsDerivs = np.diff(comovEdges)

redshifts = 0.5*(redshiftEdges[:-1] + redshiftEdges[1:])
lumDists = [x.value for x in cosmology.luminosity_distance(redshifts)]


#### We get the list of unique population parameter/metallicity combinations. This is basically 12 copies of table 1 from the paper, one for each value of metallicity¶

In [ ]:
gridPoints = np.loadtxt('/home/jbarrett/fisherMatrices/dataEtc/metallicityIncluded.txt')

#popParams is literally the same list as table 1 from the paper
popParams =  (gridPoints[gridPoints[:,4] == gridPoints[-1,4]])[:,:-1]


#### Now get the relevant mass and time delay info out of the data files¶

In [ ]:
allM1 = mergers[:,mh['M1']]
allM2 = mergers[:,mh['M2']]

allDelayTimes = mergers[:,mh['tc']] + mergers[:,mh['tform']]

allChirpMasses = ((allM1*allM2)**(3./5.))/((allM1+allM2)**(1./5.))


#### And subselect the "interesting" ones (i.e. that merge in a hubble time, are binary black holes (stellar type 14), etc¶

In [ ]:
# make a boolean mask for the 'interesting' DCOs
interestingMergerMask = (mergers[:,mh['mergesInHubbleTimeFlag']] == 1) \
& (mergers[:,mh['stellarType1']] == 14) & (mergers[:,mh['stellarType2']] == 14) \
& (mergers[:,mh['RLOFSecondaryAfterCEE']] == 0) & (mergers[:,mh['optimisticCEFlag']] == 0)

# grab only those systems

#sort them according seed for matching to the initial population seeds
inds = np.argsort(interestingMergerSeeds)
interestingMergers = interestingMergers[inds]
interestingMergerSeeds = interestingMergerSeeds[inds]
interestingChirpMasses = interestingChirpMasses[inds]
interestingM1s = interestingM1s[inds]
interestingM2s = interestingM2s[inds]
interestingDelayTimes = interestingDelayTimes[inds]

minCm, maxCm = np.min(interestingChirpMasses),np.max(interestingChirpMasses)

metallicities = np.unique(interestingMergers[:,mh['Metallicity1']])


#### Set up the "Cosmological Integrator" object. See the COMPAS python library for detailed documentation.¶

In [ ]:
ci = cci.CosmologicalIntegrator(massMultiplier=4.7845,metallicities=metallicities,
useRedshiftLookbackTable=True,nRedshiftsInTable=1000000)


#### Now we define one of the key functions for this method. For a provided set of population parameters (pp) and random seeds, this calculates the weighting of each system due to cosmic history and selection effects¶

In [ ]:
def getChirpMassesAndWeights(pp,seeds):

#grab a mask on the merger arrays for the corresponding mergers
& (interestingMergers[:,mh['sigmaKick']] == pp[0])
& (interestingMergers[:,mh['flbv']] == pp[3])
& (interestingMergers[:,mh['wolfRayetMultiplier']] == pp[2]))

ppChirps = []
ppWeights = []
for Z in metallicities:

#grab the mergers at this particular combination of population parameters/metallicity

#it will be useful to hash the merger seeds against their index
keyValPairs = zip(relevantSeeds,range(len(relevantSeeds)))
seedDict = dict(keyValPairs)

#track which of our set of initial seeds merge

#grab the ones that correspond to this metallicity
relevantInitialsSeeds = seeds[seedsThatMergeMask & (Zs == Z)]

if len(relevantInitialsSeeds) == 0:
continue

#use the hash to get the array indices in our relevant mergers
inds = [seedDict[s] for s in relevantInitialsSeeds]

weights = np.zeros_like(inds)*1.
for z,ld,der in zip(redshifts,lumDists,redsDistsDerivs):

#compute the cosmic integration rates
rates,_ = ci.cosmologicalIntegration(totalMassEvolvedPerGridPoint/12.,relevantDelayTimes[inds],Z,z)

#compute the selection effects
detProbs = cse.detection_probability(relevantM1s[inds],relevantM2s[inds],z,ld,snrThreshold)

rateContribution = (1./(1.+z))*der*rates*detProbs

weights += rateContribution

#         weights = np.ones_like(inds)*1.
ppWeights.append(weights)
ppChirps.append(relevantChirps[inds])

ppChirps = np.concatenate(ppChirps)
ppWeights = np.concatenate(ppWeights)

return ppChirps,ppWeights


#### Next the function which calculates the measurement changes to the bins due to measurement uncertainty (see section 4.5 and figure 3)¶

In [ ]:
def applyMeasurementErrors(counts,edges,percentageError=0.03):

binCentres = 0.5*(edges[1:] + edges[:-1])

fractions = []
for en,b in enumerate(binCentres):

sigma = percentageError*b

term1 = spsp.erf((b - edges[:-1])/np.sqrt(2.*sigma**2))
term2 = spsp.erf((b - edges[1:])/np.sqrt(2.*sigma**2))

fractions.append(0.5*(term1-term2))

normedFractions = []
for f in fractions:
normedFractions.append(f/np.sum(f))
normedFractions = np.array(normedFractions)

newCounts = np.zeros_like(counts)
for n,c in zip(normedFractions,counts):
newCounts += n*c

return newCounts


#### We start by subselecting metallicities and initial conditions (seeds)¶

In [ ]:
seeds = np.random.choice(uniqueSeeds,size=nSim,replace=True)

Zs = np.random.choice(metallicities,size=nSim,replace=True)


#### Now we get the necessary statistics about the central (fiducial) model. These will be the same statistics that we compute at the perturbed models in a second.¶

In [ ]:
fidChirps,fidWeights = getChirpMassesAndWeights(fiducialValues,seeds)

fidCounts,edges = np.histogram(fidChirps,bins=nBins,range=(minCm,maxCm),weights=fidWeights)

fidCounts = applyMeasurementErrors(fidCounts,edges)

#normalise for probability
fidTotalCount = float(np.sum(fidCounts))
fidProbs = fidCounts/fidTotalCount

fidProbs[fidProbs == 0] = 1e-8

# and get the rate (per simulated binary)
fidRate = fidTotalCount


#### Now we define a function to compute the same statistics for each combination of population parameters, computin the difference with respect to the fiducial values calculated above. These are then used in a linear regression (via equation 22 and 23 in the paper)¶

In [ ]:
def getGradients(popParamVals,varyingCol):

regressionPoints = []
for ppv in popParamVals:

print ppv

#we already did the fiducial ones
if np.all(ppv == fiducialValues):
continue

otherChirps,otherWeights = getChirpMassesAndWeights(ppv,seeds)

otherCounts,_ = np.histogram(otherChirps,bins=nBins,range=(minCm,maxCm),weights=otherWeights)

#         print 'before',np.sum(otherCounts), otherCounts

otherCounts = applyMeasurementErrors(otherCounts,edges)

#         print 'after',np.sum(otherCounts), otherCounts

otherTotalCount = float(np.sum(otherCounts))
otherProbs = otherCounts/otherTotalCount

otherProbs[otherProbs == 0] = 1e-8

otherRate = otherTotalCount

probDiffs = fidProbs - otherProbs
rateDiff = fidRate - otherRate

toRegress = np.concatenate(([rateDiff],probDiffs))

regressionPoints.append(toRegress)

regressionPoints = np.array(regressionPoints)

normalisedPopParams = popParamVals/fiducialValues[varyingCol]
hs = 1.-normalisedPopParams[:,varyingCol]

desMat = np.column_stack((hs,(hs**2)/2.))

for en,tr in enumerate(regressionPoints.T):

# solve the normal equation
firstDeriv,secondDeriv = np.linalg.solve(np.dot(desMat.T,desMat),np.dot(desMat.T,tr))



#### We use the above function for each population parameter¶

In [ ]:
kickPps = popParams[popParams[:,0] != 250.]
alphaPps = popParams[popParams[:,1] != 1.]
wrPps = popParams[popParams[:,2] != 1.]
lbvPps = popParams[popParams[:,3] != 1.5]



#### And finally tie all of this information together in a calculation of the Fisher matrix a la section 4.3 in the paper¶

In [ ]:
fisherMatrix = np.zeros((4,4))