NGL@1.0.0-beta.7 Home Manual Reference Source Gallery

src/parser/mmtf-parser.js

/**
 * @file Mmtf Parser
 * @author Alexander Rose <alexander.rose@weirdbyte.de>
 * @private
 */

import { Matrix4 } from '../../lib/three.es6.js'

import { Debug, Log, ParserRegistry } from '../globals.js'
import StructureParser from './structure-parser.js'
import {
    buildUnitcellAssembly, calculateBondsBetween, calculateBondsWithin
} from '../structure/structure-utils.js'
import { ChemCompHetero } from '../structure/structure-constants.js'
import Entity from '../structure/entity.js'
import Unitcell from '../symmetry/unitcell.js'
import Assembly from '../symmetry/assembly.js'

import { decodeMsgpack, decodeMmtf } from '../../lib/mmtf.es6.js'

const SstrucMap = {
  '0': 'i'.charCodeAt(0),  // pi helix
  '1': 's'.charCodeAt(0),  // bend
  '2': 'h'.charCodeAt(0),  // alpha helix
  '3': 'e'.charCodeAt(0),  // extended
  '4': 'g'.charCodeAt(0),  // 3-10 helix
  '5': 'b'.charCodeAt(0),  // bridge
  '6': 't'.charCodeAt(0),  // turn
  '7': 'l'.charCodeAt(0),  // coil
  '-1': ''.charCodeAt(0)   // NA
}

class MmtfParser extends StructureParser {
  get type () { return 'mmtf' }
  get isBinary () { return true }

  _parse () {
    // https://github.com/rcsb/mmtf

    if (Debug) Log.time('MmtfParser._parse ' + this.name)

    var i, il, j, jl, groupData

    var s = this.structure
    var sd = decodeMmtf(decodeMsgpack(this.streamer.data))

        // structure header
    var headerFields = [
      'depositionDate', 'releaseDate', 'resolution',
      'rFree', 'rWork', 'experimentalMethods'
    ]
    headerFields.forEach(function (name) {
      if (sd[ name ] !== undefined) {
        s.header[ name ] = sd[ name ]
      }
    })

    var numBonds, numAtoms, numGroups, numChains, numModels
    var chainsPerModel

    s.id = sd.structureId
    s.title = sd.title

    if (this.firstModelOnly || this.asTrajectory) {
      numModels = 1
      numChains = sd.chainsPerModel[ 0 ]

      numGroups = 0
      for (i = 0, il = numChains; i < il; ++i) {
        numGroups += sd.groupsPerChain[ i ]
      }

      numAtoms = 0
      for (i = 0, il = numGroups; i < il; ++i) {
        groupData = sd.groupList[ sd.groupTypeList[ i ] ]
        numAtoms += groupData.atomNameList.length
      }

      numBonds = sd.numBonds

      chainsPerModel = [ numChains ]
    } else {
      numBonds = sd.numBonds
      numAtoms = sd.numAtoms
      numGroups = sd.numGroups
      numChains = sd.numChains
      numModels = sd.numModels

      chainsPerModel = sd.chainsPerModel
    }

    numBonds += numGroups  // add numGroups to have space for polymer bonds

    //

    if (this.asTrajectory) {
      for (i = 0, il = sd.numModels; i < il; ++i) {
        var frame = new Float32Array(numAtoms * 3)
        var frameAtomOffset = numAtoms * i

        for (j = 0; j < numAtoms; ++j) {
          var j3 = j * 3
          var offset = j + frameAtomOffset
          frame[ j3 ] = sd.xCoordList[ offset ]
          frame[ j3 + 1 ] = sd.yCoordList[ offset ]
          frame[ j3 + 2 ] = sd.zCoordList[ offset ]
        }

        s.frames.push(frame)
      }
    }

    // bondStore
    var bAtomIndex1 = new Uint32Array(numBonds)
    var bAtomIndex2 = new Uint32Array(numBonds)
    var bBondOrder = new Uint8Array(numBonds)

    var aGroupIndex = new Uint32Array(numAtoms)

    var gChainIndex = new Uint32Array(numGroups)
    var gAtomOffset = new Uint32Array(numGroups)
    var gAtomCount = new Uint16Array(numGroups)

    var cModelIndex = new Uint16Array(numChains)
    var cGroupOffset = new Uint32Array(numChains)
    var cGroupCount = new Uint32Array(numChains)

    var mChainOffset = new Uint32Array(numModels)
    var mChainCount = new Uint32Array(numModels)

    // set-up model-chain relations
    var chainOffset = 0
    for (i = 0, il = numModels; i < il; ++i) {
      var modelChainCount = chainsPerModel[ i ]
      mChainOffset[ i ] = chainOffset
      mChainCount[ i ] = modelChainCount
      for (j = 0; j < modelChainCount; ++j) {
        cModelIndex[ j + chainOffset ] = i
      }
      chainOffset += modelChainCount
    }

    // set-up chain-residue relations
    var groupsPerChain = sd.groupsPerChain
    var groupOffset = 0
    for (i = 0, il = numChains; i < il; ++i) {
      var chainGroupCount = groupsPerChain[ i ]
      cGroupOffset[ i ] = groupOffset
      cGroupCount[ i ] = chainGroupCount
      for (j = 0; j < chainGroupCount; ++j) {
        gChainIndex[ j + groupOffset ] = i
      }
      groupOffset += chainGroupCount
    }

    /// ///
    // get data from group map

    var atomOffset = 0
    var bondOffset = 0

    for (i = 0, il = numGroups; i < il; ++i) {
      groupData = sd.groupList[ sd.groupTypeList[ i ] ]
      var groupAtomCount = groupData.atomNameList.length

      var groupBondAtomList = groupData.bondAtomList
      var groupBondOrderList = groupData.bondOrderList

      for (j = 0, jl = groupBondOrderList.length; j < jl; ++j) {
        bAtomIndex1[ bondOffset ] = atomOffset + groupBondAtomList[ j * 2 ]
        bAtomIndex2[ bondOffset ] = atomOffset + groupBondAtomList[ j * 2 + 1 ]
        bBondOrder[ bondOffset ] = groupBondOrderList[ j ]
        bondOffset += 1
      }

      //

      gAtomOffset[ i ] = atomOffset
      gAtomCount[ i ] = groupAtomCount

      for (j = 0; j < groupAtomCount; ++j) {
        aGroupIndex[ atomOffset ] = i
        atomOffset += 1
      }
    }

    // extra bonds

    var bondAtomList = sd.bondAtomList
    if (bondAtomList) {
      if (sd.bondOrderList) {
        bBondOrder.set(sd.bondOrderList, bondOffset)
      }

      for (i = 0, il = bondAtomList.length; i < il; i += 2) {
        var atomIndex1 = bondAtomList[ i ]
        var atomIndex2 = bondAtomList[ i + 1 ]
        if (atomIndex1 < numAtoms && atomIndex2 < numAtoms) {
          bAtomIndex1[ bondOffset ] = atomIndex1
          bAtomIndex2[ bondOffset ] = atomIndex2
          bondOffset += 1
        }
      }
    }

    //

    s.bondStore.length = bBondOrder.length
    s.bondStore.count = bondOffset
    s.bondStore.atomIndex1 = bAtomIndex1
    s.bondStore.atomIndex2 = bAtomIndex2
    s.bondStore.bondOrder = bBondOrder

    s.atomStore.length = numAtoms
    s.atomStore.count = numAtoms
    s.atomStore.residueIndex = aGroupIndex
    s.atomStore.atomTypeId = new Uint16Array(numAtoms)
    s.atomStore.x = sd.xCoordList.subarray(0, numAtoms)
    s.atomStore.y = sd.yCoordList.subarray(0, numAtoms)
    s.atomStore.z = sd.zCoordList.subarray(0, numAtoms)
    s.atomStore.serial = sd.atomIdList.subarray(0, numAtoms)
    s.atomStore.bfactor = sd.bFactorList.subarray(0, numAtoms)
    s.atomStore.altloc = sd.altLocList.subarray(0, numAtoms)
    s.atomStore.occupancy = sd.occupancyList.subarray(0, numAtoms)

    s.residueStore.length = numGroups
    s.residueStore.count = numGroups
    s.residueStore.chainIndex = gChainIndex
    s.residueStore.residueTypeId = sd.groupTypeList
    s.residueStore.atomOffset = gAtomOffset
    s.residueStore.atomCount = gAtomCount
    s.residueStore.resno = sd.groupIdList.subarray(0, numGroups)
    s.residueStore.sstruc = sd.secStructList.subarray(0, numGroups)
    s.residueStore.inscode = sd.insCodeList.subarray(0, numGroups)

    s.chainStore.length = numChains
    s.chainStore.count = numChains
    s.chainStore.entityIndex = new Uint16Array(numChains)
    s.chainStore.modelIndex = cModelIndex
    s.chainStore.residueOffset = cGroupOffset
    s.chainStore.residueCount = cGroupCount
    s.chainStore.chainname = sd.chainNameList.subarray(0, numChains * 4)
    s.chainStore.chainid = sd.chainIdList.subarray(0, numChains * 4)

    s.modelStore.length = numModels
    s.modelStore.count = numModels
    s.modelStore.chainOffset = mChainOffset
    s.modelStore.chainCount = mChainCount

    //

    var groupTypeDict = {}
    for (i = 0, il = sd.groupList.length; i < il; ++i) {
      var groupType = sd.groupList[ i ]
      var atomTypeIdList = []
      for (j = 0, jl = groupType.atomNameList.length; j < jl; ++j) {
        var element = groupType.elementList[ j ].toUpperCase()
        var atomname = groupType.atomNameList[ j ]
        atomTypeIdList.push(s.atomMap.add(atomname, element))
      }
      var chemCompType = groupType.chemCompType.toUpperCase()
      var hetFlag = ChemCompHetero.includes(chemCompType)

      var numGroupBonds = groupType.bondOrderList.length
      var atomIndices1 = new Array(numGroupBonds)
      var atomIndices2 = new Array(numGroupBonds)
      for (j = 0; j < numGroupBonds; ++j) {
        atomIndices1[ j ] = groupType.bondAtomList[ j * 2 ]
        atomIndices2[ j ] = groupType.bondAtomList[ j * 2 + 1 ]
      }
      var bonds = {
        atomIndices1: atomIndices1,
        atomIndices2: atomIndices2,
        bondOrders: groupType.bondOrderList
      }

      groupTypeDict[ i ] = s.residueMap.add(
        groupType.groupName, atomTypeIdList, hetFlag, chemCompType, bonds
      )
    }

    for (i = 0, il = numGroups; i < il; ++i) {
      s.residueStore.residueTypeId[ i ] = groupTypeDict[ s.residueStore.residueTypeId[ i ] ]
    }

    for (i = 0, il = s.atomStore.count; i < il; ++i) {
      var residueIndex = s.atomStore.residueIndex[ i ]
      var residueType = s.residueMap.list[ s.residueStore.residueTypeId[ residueIndex ] ]
      var resAtomOffset = s.residueStore.atomOffset[ residueIndex ]
      s.atomStore.atomTypeId[ i ] = residueType.atomTypeIdList[ i - resAtomOffset ]
    }

    if (sd.secStructList) {
      var secStructLength = sd.secStructList.length
      for (i = 0, il = s.residueStore.count; i < il; ++i) {
        // with ( i % secStructLength ) secStruct entries are reused
        var sstruc = SstrucMap[ s.residueStore.sstruc[ i % secStructLength ] ]
        if (sstruc !== undefined) s.residueStore.sstruc[ i ] = sstruc
      }
    }

    //

    if (sd.entityList) {
      sd.entityList.forEach(function (e, i) {
        s.entityList[ i ] = new Entity(
                    s, i, e.description, e.type, e.chainIndexList
                )
      })
    }

    if (sd.bioAssemblyList) {
      sd.bioAssemblyList.forEach(function (_assembly, k) {
        var id = k + 1
        var assembly = new Assembly(id)
        s.biomolDict[ 'BU' + id ] = assembly
        var chainToPart = {}
        _assembly.transformList.forEach(function (_transform) {
          var matrix = new Matrix4().fromArray(_transform.matrix).transpose()
          var chainList = _transform.chainIndexList.map(function (chainIndex) {
            var chainname = ''
            for (var k = 0; k < 4; ++k) {
              var code = sd.chainNameList[ chainIndex * 4 + k ]
              if (code) {
                chainname += String.fromCharCode(code)
              } else {
                break
              }
            }
            return chainname
          })
          var part = chainToPart[ chainList ]
          if (part) {
            part.matrixList.push(matrix)
          } else {
            chainToPart[ chainList ] = assembly.addPart([ matrix ], chainList)
          }
        })
      })
    }

    if (sd.ncsOperatorList) {
      var ncsName = 'NCS'
      var ncsAssembly = new Assembly(ncsName)
      var ncsPart = ncsAssembly.addPart()
      sd.ncsOperatorList.forEach(function (_operator) {
        var matrix = new Matrix4().fromArray(_operator).transpose()
        ncsPart.matrixList.push(matrix)
      })
      if (ncsPart.matrixList.length > 0) {
        s.biomolDict[ ncsName ] = ncsAssembly
      }
    }

    var uc = sd.unitCell
    if (uc && Array.isArray(uc) && uc[ 0 ]) {
      s.unitcell = new Unitcell({
        a: uc[ 0 ],
        b: uc[ 1 ],
        c: uc[ 2 ],
        alpha: uc[ 3 ],
        beta: uc[ 4 ],
        gamma: uc[ 5 ],
        spacegroup: sd.spaceGroup
      })
    } else {
      s.unitcell = undefined
    }

    // calculate backbone bonds
    calculateBondsBetween(s, true)

    // calculate rung bonds
    calculateBondsWithin(s, true)

    s.finalizeAtoms()
    s.finalizeBonds()

    buildUnitcellAssembly(s)

    if (Debug) Log.timeEnd('MmtfParser._parse ' + this.name)
  }
}

ParserRegistry.add('mmtf', MmtfParser)

export default MmtfParser