Source: set/xmm-set.js

  1. // An xmm-compatible training set must have the following fields :
  2. // - bimodal (boolean)
  3. // - column_names (array of strings)
  4. // - dimension (integer)
  5. // - dimension_input (integer < dimension)
  6. // - phrases (array of phrases)
  7. // - on export, each phrase must have an extra "index" field
  8. // => when the class returns a set with getPhrasesOfLabel or getTrainingSet,
  9. // it should add these index fields before returning the result.
  10. // => when a set is added with addTrainingSet, the indexes must be removed
  11. // from the phrases before they are added to the internal array
  12. /**
  13. * XMM compatible training set manager utility <br />
  14. * Class to ease the creation of XMM compatible training sets. <br />
  15. * Phrases should be generated with the PhraseMaker class or the original XMM library.
  16. */
  17. class SetMaker {
  18. constructor() {
  19. this._config = {};
  20. this._phrases = [];
  21. }
  22. /***
  23. * The current total number of phrases in the set.
  24. * @readonly
  25. */
  26. // get size() {
  27. // return this._phrases.length;
  28. // }
  29. /**
  30. * A valid XMM training set, ready to be processed by the XMM library.
  31. * @typedef xmmTrainingSet
  32. * @type {Object}
  33. * @name xmmTrainingSet
  34. * @property {Boolean} bimodal - Indicates wether the set's phrases data should be considered bimodal.
  35. * If true, the <code>dimension_input</code> property will be taken into account.
  36. * @property {Number} dimension - Size of a vector element of the set's phrases.
  37. * @property {Number} dimension_input - Size of the part of an input vector element that should be used for training.
  38. * This implies that the rest of the vector (of size <code>dimension - dimension_input</code>)
  39. * will be used for regression. Only taken into account if <code>bimodal</code> is true.
  40. * @property {Array.String} column_names - Array of string identifiers describing each scalar of a phrase's vector elements.
  41. * Typically of size <code>dimension</code>.
  42. * @property {Array.xmmPhrase} phrases - Array of valid XMM phrases containing an extra "index" field.
  43. */
  44. /**
  45. * Get the total number of phrases actually in the set.
  46. * @returns {Number}
  47. */
  48. getSize() {
  49. return this._phrases.length;
  50. }
  51. /**
  52. * Add an XMM phrase to the current set.
  53. * @param {xmmPhrase} phrase - An XMM compatible phrase (ie created with the PhraseMaker class)
  54. */
  55. addPhrase(phrase) {
  56. if (this._phrases.length === 0) {
  57. this._setConfigFrom(phrase);
  58. } else if (!this._checkCompatibility(phrase)) {
  59. throw new Error('Bad phrase format: added phrase must match current set configuration');
  60. }
  61. this._phrases.push(JSON.parse(JSON.stringify(phrase)));
  62. }
  63. /**
  64. * Add all phrases from another training set.
  65. * @param {xmmTrainingSet} set - An XMM compatible training set.
  66. */
  67. addTrainingSet(set) {
  68. if (this._phrases.length === 0) {
  69. this._setConfigFrom(set);
  70. } else if (!this._checkCompatibility(set)) {
  71. throw new Error('Bad set format: added set must match current set configuration');
  72. }
  73. const phrases = set['phrases'];
  74. for (let phrase of phrases) {
  75. this._phrases.push(phrase);
  76. }
  77. }
  78. /**
  79. * Get phrase at a particular index.
  80. * @param {Number} index - The index of the phrase to retrieve.
  81. * @returns {xmmPhrase}
  82. */
  83. getPhrase(index) {
  84. if (index > -1 && index < this._phrases.length) {
  85. // return a new copy of the phrase :
  86. return JSON.parse(JSON.stringify(this._phrases[index]));
  87. }
  88. return null;
  89. }
  90. /**
  91. * Remove phrase at a particular index.
  92. * @param {Number} index - The index of the phrase to remove.
  93. */
  94. removePhrase(index) {
  95. if (index > -1 && index < this._phrases.length) {
  96. this._phrases.splice(index, 1);
  97. }
  98. }
  99. /**
  100. * Return the subset of phrases of a particular label.
  101. * @param {String} label - The label of the phrases from which to generate the sub-training set.
  102. * @returns {xmmTrainingSet}
  103. */
  104. getPhrasesOfLabel(label) {
  105. const res = {};
  106. for (let prop in this._config) {
  107. res[prop] = this._config[prop];
  108. }
  109. res['phrases'] = [];
  110. let index = 0;
  111. for (let phrase of this._phrases) {
  112. if (phrase['label'] === label) {
  113. let p = JSON.parse(JSON.stringify(phrase));
  114. p['index'] = index++;
  115. res['phrases'].push(p);
  116. }
  117. }
  118. return res;
  119. }
  120. /**
  121. * Remove all phrases of a particular label.
  122. * @param {String} label - The label of the phrases to remove.
  123. */
  124. removePhrasesOfLabel(label) {
  125. for (let i = this._phrases.length - 1; i >= 0; i--) {
  126. if (this._phrases[i]['label'] === label) {
  127. this._phrases.splice(i, 1);
  128. }
  129. }
  130. }
  131. /**
  132. * Return the current training set.
  133. * @returns {xmmTrainingSet}
  134. */
  135. getTrainingSet() {
  136. let res = {};
  137. for (let prop in this._config) {
  138. res[prop] = this._config[prop];
  139. }
  140. res['phrases'] = [];
  141. let index = 0;
  142. for (let phrase of this._phrases) {
  143. let p = JSON.parse(JSON.stringify(phrase));
  144. p['index'] = index++;
  145. res['phrases'].push(p);
  146. }
  147. return res;
  148. }
  149. /**
  150. * Clear the whole set.
  151. */
  152. clear() {
  153. this._config = {};
  154. this._phrases = [];
  155. }
  156. /**
  157. * Check the config of a phrase or training set before applying it
  158. * to the current class.
  159. * Throw errors if not valid ?
  160. * @private
  161. */
  162. _setConfigFrom(obj) {
  163. for (let prop in obj) {
  164. if (prop === 'bimodal' && typeof(obj['bimodal']) === 'boolean') {
  165. this._config[prop] = obj[prop];
  166. } else if (prop === 'column_names' && Array.isArray(obj[prop])) {
  167. this._config[prop] = obj[prop].slice(0);
  168. } else if (prop === 'dimension' && Number.isInteger(obj[prop])) {
  169. this._config[prop] = obj[prop];
  170. } else if (prop === 'dimension_input' && Number.isInteger(obj[prop])) {
  171. this._config[prop] = obj[prop];
  172. }
  173. }
  174. }
  175. /**
  176. * Check if the phrase or set is compatible with the current settings.
  177. * @private
  178. */
  179. _checkCompatibility(obj) {
  180. if (obj['bimodal'] !== this._config['bimodal']
  181. || obj['dimension'] !== this._config['dimension']
  182. || obj['dimension_input'] !== this._config['dimension_input']) {
  183. return false;
  184. }
  185. const ocn = obj['column_names'];
  186. const ccn = this._config['column_names'];
  187. if (ocn && ccn) {
  188. if (ocn.length !== ccn.length) {
  189. return false;
  190. } else {
  191. for (let i = 0; i < ocn.length; i++) {
  192. if (ocn[i] !== ccn[i]) {
  193. return false;
  194. }
  195. }
  196. }
  197. }
  198. return true;
  199. }
  200. };
  201. export default SetMaker;