library(data.table)
library(dplyr)
library(plyr)
library(stringr)
relationship <- data.table::fread("train_relationships.csv")
relationship %>%
  head(20)
##             p1         p2
##  1: F0002/MID1 F0002/MID3
##  2: F0002/MID2 F0002/MID3
##  3: F0005/MID1 F0005/MID2
##  4: F0005/MID3 F0005/MID2
##  5: F0009/MID1 F0009/MID4
##  6: F0009/MID1 F0009/MID3
##  7: F0009/MID1 F0009/MID2
##  8: F0009/MID1 F0009/MID6
##  9: F0009/MID2 F0009/MID4
## 10: F0009/MID2 F0009/MID6
## 11: F0009/MID2 F0009/MID3
## 12: F0009/MID3 F0009/MID4
## 13: F0009/MID3 F0009/MID6
## 14: F0009/MID4 F0009/MID6
## 15: F0009/MID5 F0009/MID1
## 16: F0009/MID5 F0009/MID2
## 17: F0009/MID5 F0009/MID4
## 18: F0009/MID5 F0009/MID6
## 19: F0009/MID5 F0009/MID3
## 20: F0009/MID7 F0009/MID3

0.1 Collapse from p1 to p2

collaps_P1P2 <- plyr::ddply(relationship,c("p1"),
                       function(df1)paste(df1$p2,
                       collapse = ","))
colnames(collaps_P1P2) <- c("p1", "p2")
collaps_P1P2 %>%
  head(20)
##            p1                                                     p2
## 1  F0002/MID1                                             F0002/MID3
## 2  F0002/MID2                                             F0002/MID3
## 3  F0005/MID1                                             F0005/MID2
## 4  F0005/MID3                                             F0005/MID2
## 5  F0009/MID1            F0009/MID4,F0009/MID3,F0009/MID2,F0009/MID6
## 6  F0009/MID2                       F0009/MID4,F0009/MID6,F0009/MID3
## 7  F0009/MID3                                  F0009/MID4,F0009/MID6
## 8  F0009/MID4                                             F0009/MID6
## 9  F0009/MID5 F0009/MID1,F0009/MID2,F0009/MID4,F0009/MID6,F0009/MID3
## 10 F0009/MID7 F0009/MID3,F0009/MID6,F0009/MID4,F0009/MID1,F0009/MID2
## 11 F0010/MID1                                  F0010/MID3,F0010/MID4
## 12 F0010/MID2                                  F0010/MID3,F0010/MID4
## 13 F0010/MID3                                             F0010/MID4
## 14 F0016/MID1                                  F0016/MID5,F0016/MID2
## 15 F0016/MID2                                  F0016/MID4,F0016/MID3
## 16 F0016/MID3                                             F0016/MID4
## 17 F0017/MID1 F0017/MID3,F0017/MID4,F0017/MID7,F0017/MID5,F0017/MID6
## 18 F0017/MID2 F0017/MID5,F0017/MID4,F0017/MID3,F0017/MID7,F0017/MID6
## 19 F0017/MID3                       F0017/MID4,F0017/MID6,F0017/MID5
## 20 F0017/MID4                                  F0017/MID5,F0017/MID6

0.2 Cancatenate subfamilies 1

subfamilies1 <- paste(collaps_P1P2$p1, collaps_P1P2$p2, sep=",")
## extract subfamilies with more than two persons
subfamilies1_  <- subfamilies1[stringr::str_detect(subfamilies1, "\\w\\d*\\/\\w*\\d*,\\w\\d*\\/\\w*\\d*,")]
subfamilies1_ %>%
  head(20)
##  [1] "F0009/MID1,F0009/MID4,F0009/MID3,F0009/MID2,F0009/MID6"            
##  [2] "F0009/MID2,F0009/MID4,F0009/MID6,F0009/MID3"                       
##  [3] "F0009/MID3,F0009/MID4,F0009/MID6"                                  
##  [4] "F0009/MID5,F0009/MID1,F0009/MID2,F0009/MID4,F0009/MID6,F0009/MID3" 
##  [5] "F0009/MID7,F0009/MID3,F0009/MID6,F0009/MID4,F0009/MID1,F0009/MID2" 
##  [6] "F0010/MID1,F0010/MID3,F0010/MID4"                                  
##  [7] "F0010/MID2,F0010/MID3,F0010/MID4"                                  
##  [8] "F0016/MID1,F0016/MID5,F0016/MID2"                                  
##  [9] "F0016/MID2,F0016/MID4,F0016/MID3"                                  
## [10] "F0017/MID1,F0017/MID3,F0017/MID4,F0017/MID7,F0017/MID5,F0017/MID6" 
## [11] "F0017/MID2,F0017/MID5,F0017/MID4,F0017/MID3,F0017/MID7,F0017/MID6" 
## [12] "F0017/MID3,F0017/MID4,F0017/MID6,F0017/MID5"                       
## [13] "F0017/MID4,F0017/MID5,F0017/MID6"                                  
## [14] "F0020/MID1,F0020/MID10,F0020/MID6,F0020/MID5,F0020/MID3,F0020/MID2"
## [15] "F0020/MID11,F0020/MID3,F0020/MID10,F0020/MID6"                     
## [16] "F0020/MID2,F0020/MID6,F0020/MID10,F0020/MID3,F0020/MID5"           
## [17] "F0020/MID3,F0020/MID6,F0020/MID10,F0020/MID5"                      
## [18] "F0020/MID5,F0020/MID6,F0020/MID10"                                 
## [19] "F0020/MID7,F0020/MID5,F0020/MID1,F0020/MID3,F0020/MID6,F0020/MID10"
## [20] "F0020/MID8,F0020/MID10,F0020/MID6,F0020/MID3,F0020/MID2"

0.3 Collapse from p2 to p1

collaps_P2P1 <- plyr::ddply(relationship,c("p2"),
                       function(df1)paste(df1$p1,
                       collapse = ","))
colnames(collaps_P2P1) <- c("p2", "p1")
collaps_P2P1 %>%
  head(20)
##             p2
## 1   F0002/MID3
## 2   F0005/MID2
## 3   F0009/MID1
## 4   F0009/MID2
## 5   F0009/MID3
## 6   F0009/MID4
## 7   F0009/MID6
## 8   F0010/MID3
## 9   F0010/MID4
## 10  F0016/MID2
## 11  F0016/MID3
## 12  F0016/MID4
## 13  F0016/MID5
## 14  F0017/MID3
## 15  F0017/MID4
## 16  F0017/MID5
## 17  F0017/MID6
## 18  F0017/MID7
## 19  F0020/MID1
## 20 F0020/MID10
##                                                                                          p1
## 1                                                                     F0002/MID1,F0002/MID2
## 2                                                                     F0005/MID1,F0005/MID3
## 3                                                                     F0009/MID5,F0009/MID7
## 4                                                          F0009/MID1,F0009/MID5,F0009/MID7
## 5                                               F0009/MID1,F0009/MID2,F0009/MID5,F0009/MID7
## 6                                    F0009/MID1,F0009/MID2,F0009/MID3,F0009/MID5,F0009/MID7
## 7                         F0009/MID1,F0009/MID2,F0009/MID3,F0009/MID4,F0009/MID5,F0009/MID7
## 8                                                                     F0010/MID1,F0010/MID2
## 9                                                          F0010/MID1,F0010/MID2,F0010/MID3
## 10                                                                               F0016/MID1
## 11                                                                               F0016/MID2
## 12                                                                    F0016/MID2,F0016/MID3
## 13                                                                               F0016/MID1
## 14                                                                    F0017/MID1,F0017/MID2
## 15                                                         F0017/MID1,F0017/MID2,F0017/MID3
## 16                                              F0017/MID1,F0017/MID2,F0017/MID3,F0017/MID4
## 17                                              F0017/MID1,F0017/MID2,F0017/MID3,F0017/MID4
## 18                                                         F0017/MID1,F0017/MID2,F0017/MID5
## 19                                                                               F0020/MID7
## 20 F0020/MID1,F0020/MID11,F0020/MID2,F0020/MID3,F0020/MID5,F0020/MID6,F0020/MID7,F0020/MID8

0.4 Cancatenate subfamilies 2

subfamilies2 <- paste(collaps_P2P1$p1, collaps_P2P1$p2, sep=",")
## extract subfamilies with more than two persons
subfamilies2_  <- subfamilies2[stringr::str_detect(subfamilies2, "\\w\\d*\\/\\w*\\d*,\\w\\d*\\/\\w*\\d*,")]
subfamilies2_ %>%
  head(20)
##  [1] "F0002/MID1,F0002/MID2,F0002/MID3"                                                                    
##  [2] "F0005/MID1,F0005/MID3,F0005/MID2"                                                                    
##  [3] "F0009/MID5,F0009/MID7,F0009/MID1"                                                                    
##  [4] "F0009/MID1,F0009/MID5,F0009/MID7,F0009/MID2"                                                         
##  [5] "F0009/MID1,F0009/MID2,F0009/MID5,F0009/MID7,F0009/MID3"                                              
##  [6] "F0009/MID1,F0009/MID2,F0009/MID3,F0009/MID5,F0009/MID7,F0009/MID4"                                   
##  [7] "F0009/MID1,F0009/MID2,F0009/MID3,F0009/MID4,F0009/MID5,F0009/MID7,F0009/MID6"                        
##  [8] "F0010/MID1,F0010/MID2,F0010/MID3"                                                                    
##  [9] "F0010/MID1,F0010/MID2,F0010/MID3,F0010/MID4"                                                         
## [10] "F0016/MID2,F0016/MID3,F0016/MID4"                                                                    
## [11] "F0017/MID1,F0017/MID2,F0017/MID3"                                                                    
## [12] "F0017/MID1,F0017/MID2,F0017/MID3,F0017/MID4"                                                         
## [13] "F0017/MID1,F0017/MID2,F0017/MID3,F0017/MID4,F0017/MID5"                                              
## [14] "F0017/MID1,F0017/MID2,F0017/MID3,F0017/MID4,F0017/MID6"                                              
## [15] "F0017/MID1,F0017/MID2,F0017/MID5,F0017/MID7"                                                         
## [16] "F0020/MID1,F0020/MID11,F0020/MID2,F0020/MID3,F0020/MID5,F0020/MID6,F0020/MID7,F0020/MID8,F0020/MID10"
## [17] "F0020/MID1,F0020/MID8,F0020/MID9,F0020/MID2"                                                         
## [18] "F0020/MID1,F0020/MID11,F0020/MID2,F0020/MID7,F0020/MID8,F0020/MID3"                                  
## [19] "F0020/MID1,F0020/MID2,F0020/MID3,F0020/MID7,F0020/MID9,F0020/MID5"                                   
## [20] "F0020/MID1,F0020/MID11,F0020/MID2,F0020/MID3,F0020/MID5,F0020/MID7,F0020/MID8,F0020/MID6"
families <- unique(c(subfamilies1_, subfamilies2_))
families[1:20]
##  [1] "F0009/MID1,F0009/MID4,F0009/MID3,F0009/MID2,F0009/MID6"            
##  [2] "F0009/MID2,F0009/MID4,F0009/MID6,F0009/MID3"                       
##  [3] "F0009/MID3,F0009/MID4,F0009/MID6"                                  
##  [4] "F0009/MID5,F0009/MID1,F0009/MID2,F0009/MID4,F0009/MID6,F0009/MID3" 
##  [5] "F0009/MID7,F0009/MID3,F0009/MID6,F0009/MID4,F0009/MID1,F0009/MID2" 
##  [6] "F0010/MID1,F0010/MID3,F0010/MID4"                                  
##  [7] "F0010/MID2,F0010/MID3,F0010/MID4"                                  
##  [8] "F0016/MID1,F0016/MID5,F0016/MID2"                                  
##  [9] "F0016/MID2,F0016/MID4,F0016/MID3"                                  
## [10] "F0017/MID1,F0017/MID3,F0017/MID4,F0017/MID7,F0017/MID5,F0017/MID6" 
## [11] "F0017/MID2,F0017/MID5,F0017/MID4,F0017/MID3,F0017/MID7,F0017/MID6" 
## [12] "F0017/MID3,F0017/MID4,F0017/MID6,F0017/MID5"                       
## [13] "F0017/MID4,F0017/MID5,F0017/MID6"                                  
## [14] "F0020/MID1,F0020/MID10,F0020/MID6,F0020/MID5,F0020/MID3,F0020/MID2"
## [15] "F0020/MID11,F0020/MID3,F0020/MID10,F0020/MID6"                     
## [16] "F0020/MID2,F0020/MID6,F0020/MID10,F0020/MID3,F0020/MID5"           
## [17] "F0020/MID3,F0020/MID6,F0020/MID10,F0020/MID5"                      
## [18] "F0020/MID5,F0020/MID6,F0020/MID10"                                 
## [19] "F0020/MID7,F0020/MID5,F0020/MID1,F0020/MID3,F0020/MID6,F0020/MID10"
## [20] "F0020/MID8,F0020/MID10,F0020/MID6,F0020/MID3,F0020/MID2"

0.5 Index of person in the same family

family_idx <- 
  unique(unlist(strsplit(families, ","))) %>% 
  str_extract("\\w\\d*") %>% 
  as.factor() %>%
  as.numeric() %>%
  -2

family_idx
##    [1]   1   1   1   1   1   1   1   2   2   2   2   3   3   3   3   3   4
##   [18]   4   4   4   4   4   4   5   5   5   5   5   5   5   5   5   5   6
##   [35]   6   6   6   7   7   7   7   7   7   8   8   8   9   9   9  10  10
##   [52]  10  10  10  11  11  11  11  11  11  11  12  12  12  12  12  12  13
##   [69]  13  13  14  14  14  17  17  17  17  17  17  18  18  18  18  20  20
##   [86]  20  20  20  20  21  21  21  21  22  22  22  22  22  22  23  23  23
##  [103]  23  23  24  24  24  24  24  24  24  25  25  25  25  25  26  26  26
##  [120]  27  27  27  27  27  28  28  28  28  29  29  29  29  29  29  29  29
##  [137]  30  30  30  30  31  31  31  32  32  32  32  32  32  32  32  32  32
##  [154]  33  33  33  33  33  33  33  33  33  33  33  33  33  33  33  34  34
##  [171]  34  34  34  35  35  35  35  35  36  36  36  38  38  38  38  39  39
##  [188]  39  40  40  40  40  40  40  40  41  41  41  42  42  42  42  42  42
##  [205]  43  43  43  43  43  43  43  46  46  46  46  46  46  46  46  47  47
##  [222]  47  47  47  47  47  47  48  48  48  48  49  49  49  50  50  50  51
##  [239]  51  51  51  51  51  51  52  52  52  52  52  52  52  54  54  54  54
##  [256]  55  55  55  55  55  56  56  56  56  57  57  57  57  57  57  57  59
##  [273]  59  59  59  59  61  61  61  61  62  62  62  63  63  63  63  63  63
##  [290]  63  63  63  64  64  64  64  64  64  64  65  65  65  66  66  66  66
##  [307]  67  67  67  67  68  68  68  68  68  68  68  70  70  70  70  71  71
##  [324]  71  71  71  72  72  72  72  72  72  73  73  73  74  74  74  74  74
##  [341]  74  74  74  75  75  75  75  77  77  77  77  77  77  78  78  78  78
##  [358]  78  79  79  79  79  79  80  80  80  80  80  81  81  81  81  81  81
##  [375]  81  81  81  81  82  82  82  82  82  82  82  82  82  82  82  82  82
##  [392]  82  83  83  83  83  83  83  83  85  85  85  85  86  86  86  86  88
##  [409]  88  88  88  88  88  88  88  89  89  89  89  89  89  89  90  90  90
##  [426]  90  91  91  91  91  91  92  92  92  92  93  93  93  93  93  93  93
##  [443]  93  93  93  93  93  93  94  94  94  95  95  95  96  96  96  96  96
##  [460]  96  97  97  97  97  97  97  97  98  98  98 100 100 100 100 100 100
##  [477] 100 103 103 103 105 105 105 105 105 105 105 106 106 106 106 106 106
##  [494] 106 106 106 107 107 107 108 108 108 109 109 109 109 109 109 109 109
##  [511] 109 109 110 110 110 110 110 111 111 111 111 111 112 112 112 112 112
##  [528] 112 113 113 113 113 114 114 114 114 114 114 115 115 115 115 115 116
##  [545] 116 116 116 117 117 117 117 117 117 118 118 118 118 118 119 119 119
##  [562] 119 120 120 120 120 120 120 120 120 120 121 121 121 121 121 122 122
##  [579] 122 123 123 123 124 124 124 124 124 124 124 124 125 125 125 125 125
##  [596] 125 126 126 126 126 126 126 127 127 127 127 127 128 128 128 129 129
##  [613] 129 129 130 130 130 130 130 131 131 131 131 131 131 131 131 131 131
##  [630] 131 131 131 132 132 132 132 133 133 133 133 133 133 134 134 134 134
##  [647] 135 135 135 135 135 135 135 135 136 136 136 137 137 137 137 138 138
##  [664] 138 138 138 138 140 140 140 140 141 141 141 141 141 141 141 141 141
##  [681] 141 142 142 142 144 144 144 144 144 144 144 145 145 145 145 145 145
##  [698] 145 145 145 145 145 146 146 146 146 146 146 147 147 147 147 149 149
##  [715] 149 149 149 149 149 149 150 150 150 150 150 151 151 151 151 152 152
##  [732] 152 152 154 154 154 154 154 155 155 155 156 156 156 157 157 157 157
##  [749] 157 157 157 157 157 157 158 158 158 158 158 158 160 160 160 160 160
##  [766] 160 160 160 160 160 161 161 161 162 162 162 163 163 163 163 163 163
##  [783] 164 164 164 164 164 165 165 165 165 165 165 165 166 166 166 166 166
##  [800] 166 166 166 166 167 167 167 169 169 169 169 170 170 170 170 170 170
##  [817] 172 172 172 172 173 173 173 173 173 173 173 173 174 174 174 174 174
##  [834] 174 174 174 176 176 176 177 177 177 179 179 179 179 179 179 180 180
##  [851] 180 180 180 180 180 182 182 182 183 183 183 184 184 184 184 186 186
##  [868] 186 187 187 187 187 187 189 189 189 189 189 191 191 191 191 191 192
##  [885] 192 192 192 192 193 193 193 193 193 193 193 193 193 193 194 194 194
##  [902] 194 196 196 196 196 197 197 197 197 197 200 200 200 200 201 201 201
##  [919] 201 202 202 202 203 203 203 203 203 203 204 204 204 204 204 204 205
##  [936] 205 205 205 206 206 206 206 207 207 207 207 208 208 208 208 208 209
##  [953] 209 209 209 209 210 210 210 210 211 211 211 211 211 211 211 211 211
##  [970] 211 211 211 211 212 212 212 212 213 213 213 214 214 214 214 215 215
##  [987] 215 215 215 216 216 216 217 217 217 217 217 217 217 217 217 220 220
## [1004] 220 220 220 221 221 221 222 222 222 222 222 222 222 222 224 224 224
## [1021] 224 225 225 225 225 225 226 226 226 226 226 227 227 227 227 228 228
## [1038] 228 228 228 228 229 229 229 229 229 229 229 229 229 229 229 229 229
## [1055] 229 229 229 229 229 229 229 229 229 229 229 229 229 229 229 229 229
## [1072] 229 229 229 229 229 231 231 231 231 232 232 232 233 233 233 233 233
## [1089] 233 234 234 234 234 234 234 234 235 235 235 235 235 236 236 236 236
## [1106] 236 236 238 238 238 239 239 239 239 240 240 240 240 241 241 241 241
## [1123] 242 242 242 242 243 243 243 243 243 243 243 244 244 244 244 244 244
## [1140] 245 245 245 245 245 246 246 246 246 246 246 249 249 249 250 250 250
## [1157] 250 250 250 250 250 251 251 251 252 252 252 252 252 252 252 252 252
## [1174] 253 253 253 253 253 254 254 254 254 255 255 255 256 256 256 256 256
## [1191] 256 257 257 257 257 257 257 258 258 258 258 258 259 259 259 259 259
## [1208] 260 260 260 260 260 261 261 261 262 262 262 262 262 262 264 264 264
## [1225] 267 267 267 267 267 267 267 267 267 267 268 268 268 269 269 269 270
## [1242] 270 270 270 270 271 271 271 271 271 272 272 272 272 272 272 272 272
## [1259] 272 273 273 273 273 274 274 274 274 275 275 275 275 275 275 275 275
## [1276] 275 275 277 277 277 278 278 278 278 279 279 279 279 279 280 280 280
## [1293] 280 281 281 281 282 282 282 283 283 283 283 284 284 284 284 284 285
## [1310] 285 285 285 285 286 286 286 287 287 287 287 287 288 288 288 288 289
## [1327] 289 289 289 289 289 289 289 289 290 290 290 290 291 291 291 291 291
## [1344] 291 292 292 292 292 292 292 292 293 293 293 293 294 294 294 294 295
## [1361] 295 295 295 295 295 295 295 295 295 296 296 296 296 296 296 297 297
## [1378] 297 297 297 298 298 298 298 300 300 300 300 300 300 301 301 301 301
## [1395] 301 301 301 301 301 302 302 302 302 303 303 303 303 303 303 304 304
## [1412] 304 304 304 304 304 305 305 305 305 306 306 306 306 306 306 307 307
## [1429] 307 307 307 307 308 308 308 308 308 308 308 310 310 310 310 310 312
## [1446] 312 312 312 312 312 312 312 312 313 313 313 313 315 315 315 315 315
## [1463] 315 315 316 316 316 316 316 317 317 317 317 317 318 318 318 318 319
## [1480] 319 319 319 320 320 320 320 320 320 320 320 321 321 321 321 321 322
## [1497] 322 322 322 322 323 323 323 324 324 324 324 324 324 324 324 326 326
## [1514] 326 326 327 327 327 327 327 328 328 328 328 328 328 328 328 328 329
## [1531] 329 329 329 329 330 330 330 330 330 330 330 330 331 331 331 332 332
## [1548] 332 332 332 332 332 332 332 332 332 333 333 333 334 334 334 336 336
## [1565] 336 336 336 336 336 337 337 337 337 337 338 338 338 338 338 338 339
## [1582] 339 339 339 339 339 339 339 339 340 340 340 340 341 341 341 342 342
## [1599] 342 342 343 343 343 343 343 344 344 344 345 345 345 345 346 346 346
## [1616] 346 346 346 347 347 347 349 349 349 350 350 350 353 353 353 353 353
## [1633] 353 353 353 354 354 354 354 354 355 355 355 355 355 355 356 356 356
## [1650] 356 356 356 357 357 357 357 357 359 359 359 359 360 360 360 360 361
## [1667] 361 361 361 363 363 363 363 363 364 364 364 364 364 364 364 367 367
## [1684] 367 367 369 369 369 369 369 370 370 370 370 371 371 371 371 372 372
## [1701] 372 372 373 373 373 373 373 374 374 374 375 375 375 375 375 375 376
## [1718] 376 376 376 376 377 377 377 377 378 378 378 378 378 379 379 379 379
## [1735] 380 380 380 380 381 381 381 381 382 382 382 382 387 387 387 387 388
## [1752] 388 388 388 389 389 389 389 389 389 389 389 390 390 390 390 390 391
## [1769] 391 391 391 391 392 392 392 392 392 392 392 393 393 393 394 394 394
## [1786] 396 396 396 398 398 398 398 398 399 399 399 399 399 399 400 400 400
## [1803] 400 400 400 400 400 401 401 401 403 403 403 403 404 404 404 404 404
## [1820] 404 404 405 405 405 405 406 406 406 406 406 406 406 406 407 407 407
## [1837] 407 408 408 408 408 408 409 409 409 409 410 410 410 410 410 410 411
## [1854] 411 411 411 411 411 411 411 411  -1  -1  -1   0   0   0   6   8   8
## [1871]   9  14  15  15  15  16  16  16  19  19  19  25  30  31  34  35  37
## [1888]  37  37  39  39  44  44  44  45  45  45  45  45  48  50  51  53  53
## [1905]  53  57  58  58  58  60  60  60  61  61  62  64  65  69  69  69  73
## [1922]  75  75  76  76  76  79  84  84  84  84  86  87  87  87  93  94  95
## [1939]  98  99  99  99 100 101 101 101 102 102 102 103 104 104 104 107 108
## [1956] 111 120 120 122 138 139 139 139 142 143 143 143 148 148 148 153 153
## [1973] 153 154 158 159 159 159 161 161 162 162 162 167 168 168 168 170 171
## [1990] 171 171 174 174 175 175 175 177 178 178 178 181 181 181 182 182 182
## [2007] 185 185 185 186 188 188 188 190 190 190 193 194 195 195 195 198 198
## [2024] 198 199 199 199 199 199 202 205 212 213 216 218 218 218 219 219 219
## [2041] 220 223 223 223 224 229 229 229 229 229 229 230 230 230 233 237 237
## [2058] 237 239 240 241 241 241 242 242 244 247 247 247 247 248 248 248 249
## [2075] 251 256 259 261 261 263 263 263 263 263 265 265 265 266 266 266 269
## [2092] 271 276 276 276 277 278 279 279 280 285 286 290 292 296 296 299 299
## [2109] 299 309 309 309 311 311 311 314 314 314 314 314 315 316 321 322 325
## [2126] 325 325 329 331 333 334 334 335 335 335 339 340 341 348 348 348 348
## [2143] 348 350 351 351 351 352 352 352 358 358 358 362 362 362 365 365 365
## [2160] 366 366 366 367 368 368 368 373 375 377 378 379 383 383 383 384 384
## [2177] 384 385 385 385 386 386 386 389 392 393 394 395 395 395 395 396 397
## [2194] 397 397 398 401 401 402 402 402 406 407

0.6 Collapse Persons by family index

fam <- as.data.frame(cbind(Person = unique(unlist(strsplit(families, ","))),family_idx))
fam %>%
  head(20)
##        Person family_idx
## 1  F0009/MID1          1
## 2  F0009/MID4          1
## 3  F0009/MID3          1
## 4  F0009/MID2          1
## 5  F0009/MID6          1
## 6  F0009/MID5          1
## 7  F0009/MID7          1
## 8  F0010/MID1          2
## 9  F0010/MID3          2
## 10 F0010/MID4          2
## 11 F0010/MID2          2
## 12 F0016/MID1          3
## 13 F0016/MID5          3
## 14 F0016/MID2          3
## 15 F0016/MID4          3
## 16 F0016/MID3          3
## 17 F0017/MID1          4
## 18 F0017/MID3          4
## 19 F0017/MID4          4
## 20 F0017/MID7          4
unique_families <- plyr::ddply(fam,c("family_idx"),
                       function(df1)paste(df1$Person,
                       collapse = ","))
colnames(unique_families) <- c("index", "Persons with kinship relationships")
DT::datatable(unique_families) %>%
DT::formatStyle( colnames(unique_families), color = 'black')

There are 413 kinship relationships