Skip to content

Commit

Permalink
Improve group search method for connectivity
Browse files Browse the repository at this point in the history
  • Loading branch information
pw0908 committed Oct 18, 2024
1 parent 9292a61 commit b164ace
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 25 deletions.
59 changes: 36 additions & 23 deletions src/group_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function _get_groups_from_smiles(smiles::String,groups::Vector{GCPair},connectiv
end

if connectivity
return (smiles,gcpairs,get_connectivity(mol,group_id,groups))
return (smiles,gcpairs,get_connectivity(mol,group_id,group_id_expanded,groups, bond_mat_minimum, __bonds))
else
return (smiles,gcpairs)
end
Expand Down Expand Up @@ -204,33 +204,38 @@ function find_covered_atoms(mol, groups, atoms, __bonds, check)
return smatches_idx_expanded, bond_mat
end

function get_connectivity(mol,group_id,groups)
function get_connectivity(mol, group_id, group_id_expanded, groups, mapping, bondlist)
ngroups = length(group_id)
A = zeros(ngroups,ngroups)
connectivity = Pair{NTuple{2,String},Int}[]
for i in 1:ngroups
gci = groups[group_id[i]]
smart1 = smarts(gci)
smart2 = smarts(gci)
querie = get_qmol(smart1*smart2)
smatch = get_substruct_matches(mol,querie)
name_i = name(gci)
A[i,i] = length(unique(smatch))
if A[i,i]!=0
append!(connectivity,[(name_i,name_i)=>Int(A[i,i])])

bond_mat = zeros(Int64, size(mapping,1), size(mapping,1))

for i in 1:length(bondlist)
bond = bondlist[i]
group_1 = mapping[:,bond[1]].==1
group_2 = mapping[:,bond[2]].==1
if group_1 == group_2
continue
else
gc1 = group_id_expanded[group_1]
gc2 = group_id_expanded[group_2]
idx1 = group_id .== gc1
idx2 = group_id .== gc2
A[idx1,idx2] .+= 1
bond_mat[group_1, group_2] .+= 1
bond_mat[group_2, group_1] .+= 1
end
end

for j in i+1:ngroups
gcj = groups[group_id[j]]
smart2 = smarts(gcj)
querie = get_qmol(smart1*smart2)
smatch = get_substruct_matches(mol,querie)
querie = get_qmol(smart2*smart1)
append!(smatch,get_substruct_matches(mol,querie))
A[i,j] = length(unique(smatch))
name_j = name(gcj)
if A[i,j]!=0
append!(connectivity,[(name_i,name_j)=>Int(A[i,j])])
for i in 1:ngroups
for j in i:ngroups
if A[i,j] != 0 || A[j,i] != 0
if i != j
append!(connectivity,[(name(groups[group_id[i]]),name(groups[group_id[j]]))=>Int(A[i,j]+A[j,i])])
else
append!(connectivity,[(name(groups[group_id[i]]),name(groups[group_id[j]]))=>Int(A[i,j])])
end
end
end
end
Expand Down Expand Up @@ -348,4 +353,12 @@ function group_replace(grouplist,group_keys...)
return [k => v for (k,v) in pairs(res)]

end

# function check_coverage(smiles, groups)
# mol = get_mol(smiles)
# atoms = get_atoms(mol)
# __bonds = __getbondlist(mol)
# group_id_expanded, bond_mat_minimum = get_expanded_groups(mol, groups, atoms, __bonds, true, smiles)
# return true
# end
export get_groups_from_name, get_groups_from_smiles, group_replace
10 changes: 8 additions & 2 deletions test/test_group_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,19 @@ end
(component, groups, connectivity) = get_groups_from_smiles(smiles, gcPPCSAFTGroups; connectivity=true)

@test isequal(["CH2" => 7, "aCH" => 5, "CH3" => 3, "aC" => 1, "C" => 1] |> Set,Set(groups))
@test isequal([("aCH", "aCH") => 4, ("CH2", "C") => 2, ("C", "aC") => 1, ("aCH", "aC") => 2, ("CH3", "CH2") => 3, ("CH2", "CH2") => 5, ("CH3", "C") => 1] |> Set,Set(connectivity))
@test isequal([("aCH", "aCH") => 4, ("CH2", "C") => 2, ("C", "aC") => 1, ("aCH", "aC") => 2, ("CH3", "CH2") => 2, ("CH2", "CH2") => 5, ("CH3", "C") => 1] |> Set,Set(connectivity))

# Test case where order matters
# Test a highly branched hydrocarbon
smiles = "CC(=O)OC"
(component, groups, connectivity) = get_groups_from_smiles(smiles, gcPPCSAFTGroups; connectivity=true)

@test isequal(["CH3" => 2, "COO" => 1] |> Set,Set(groups))
@test isequal([("CH3", "COO") => 2] |> Set,Set(connectivity))

# Test case where there is overlap between groups
smiles = "C(COCCOCCOCCOCCO)O"
(component, groups, connectivity) = get_groups_from_smiles(smiles, gcPPCSAFTGroups; connectivity=true)

@test isequal(["CH2" => 6, "OH" => 2, "OCH2" => 4] |> Set,Set(groups))
@test isequal([("CH2", "CH2") => 2, ("CH2", "OH") => 2, ("CH2", "OCH2") => 6, ("OCH2", "OCH2") => 1] |> Set,Set(connectivity))
end

0 comments on commit b164ace

Please sign in to comment.