import TokenTransformingMixins from 'causal-net/packages/causality-preprocessing/src/NLP/tokenTransforming.mixins.js'TokenTransformingMixins
Extends:
Method Summary
| Public Methods | ||
| public |
badWordsFilter(tokens: Array , badWordList: Array ): Array Remove bad words from tokens |
|
| public |
tokenize(sentence: String ): Array tokenized sentence |
|
| public |
wordDuplicateRemove(tokens: Array ): * Remove duplicated tokens |
|
| public |
wordFreqCount(tokens: Array , freqCount: Object ): Object Count number of apperance of each token in the token list |
|
Public Methods
public badWordsFilter(tokens: Array , badWordList: Array ): Array source
Remove bad words from tokens
Params:
| Name | Type | Attribute | Description |
| tokens | Array | array of string token |
|
| badWordList | Array | Array of bad words |
Return:
| Array | tokens without bad words |
public tokenize(sentence: String ): Array source
tokenized sentence
Params:
| Name | Type | Attribute | Description |
| sentence | String | array of string sentence |
Return:
| Array | array of tokenized sentences |
public wordDuplicateRemove(tokens: Array ): * source
Remove duplicated tokens
Params:
| Name | Type | Attribute | Description |
| tokens | Array | array of string token |
Return:
| * |
public wordFreqCount(tokens: Array , freqCount: Object ): Object source
Count number of apperance of each token in the token list
Params:
| Name | Type | Attribute | Description |
| tokens | Array | array of string tokens |
|
| freqCount | Object |
|
objec or previous wordFreqCount, empty object if not provided |
Return:
| Object | json object with token as key and counting number as corresponding value |