Note
Click here to download the full example code
Align datasets¶
Shows how to combine information from two datasets describing the same cases, but not necessarily in the same order.
# Author: Christian Brodbeck <christianbrodbeck@nyu.edu>
import random
import string
from eelbrain import *
# Generate a dataset with known sequence
ds = Dataset()
ds['ascii'] = Factor(string.ascii_lowercase)
# Add an index variable to the dataset to later identify the cases
ds.index()
# Generate two shuffled copies of the dataset (and print them to confirm that
# they are shuffled)
ds1 = ds[random.sample(range(ds.n_cases), 15)]
print(ds1.head())
ds2 = ds[random.sample(range(ds.n_cases), 16)]
print(ds2.head())
Out:
ascii index
-------------
d 3
k 10
t 19
h 7
n 13
m 12
a 0
l 11
w 22
f 5
ascii index
-------------
d 3
g 6
v 21
n 13
z 25
t 19
m 12
o 14
b 1
a 0
Align the datasets¶
Use the "index"
variable added above to identify cases and align the two
datasets
ds1_aligned, ds2_aligned = align(ds1, ds2, 'index')
# show the ascii sequences for the two datasets next to each other to
# demonstrate that they are aligned
ds1_aligned['ascii_ds2'] = ds2_aligned['ascii']
print(ds1_aligned)
Out:
ascii index ascii_ds2
-------------------------
d 3 d
t 19 t
n 13 n
m 12 m
a 0 a
f 5 f
q 16 q
g 6 g
v 21 v
o 14 o