1
1
import { CodeTokens } from './CodeTokens' ;
2
2
import { PythonIdentifiers } from './identifiers/PythonIdentifiers' ;
3
3
import { CommonLangIdentifiers } from './identifiers/CommonLangIdentifiers' ;
4
+ import { GolangIdentifiers } from './identifiers/GolangIdentifiers' ;
4
5
5
6
export class CodeTokenizer {
6
7
private pyIdentifiers = new PythonIdentifiers ( ) ;
7
8
8
- public tokenizeCode ( code : string , language : string ) : CodeTokens [ ] {
9
+ private golangIdentifiers = new GolangIdentifiers ( ) ;
10
+
11
+ public tokenize ( code : string , language : string ) : CodeTokens [ ] {
9
12
switch ( language ) {
10
13
case 'python' :
11
- return this . tokenizePython ( code , this . pyIdentifiers ) ;
14
+ return this . tokenizeCode ( code , this . pyIdentifiers ) ;
15
+ case 'go' :
16
+ return this . tokenizeCode ( code , this . golangIdentifiers ) ;
12
17
default :
13
18
return [ ] ;
14
19
}
15
20
}
16
21
17
- private tokenizePython (
22
+ private tokenizeCode (
18
23
code : string ,
19
24
languageIdentifier : CommonLangIdentifiers ,
20
25
) : CodeTokens [ ] {
@@ -24,68 +29,71 @@ export class CodeTokenizer {
24
29
lines . forEach ( ( line ) => {
25
30
const trimedLine = line . trim ( ) ;
26
31
27
- // Function detection
28
- if (
29
- trimedLine . startsWith ( languageIdentifier . functionIdentifier ( ) ) &&
30
- trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
31
- ) {
32
- tokenizedCode . push ( CodeTokens . FUNC ) ;
33
- const numberOfParams =
34
- CodeTokenizer . numberOfFunctionParameters ( trimedLine ) ;
35
- tokenizedCode = CodeTokenizer . pushTokensInTab (
36
- tokenizedCode ,
37
- CodeTokens . FUNCPARAM ,
38
- numberOfParams ,
39
- ) ;
32
+ if ( trimedLine !== '' && trimedLine !== '}' )
33
+ if (
34
+ trimedLine . startsWith ( languageIdentifier . functionIdentifier ( ) ) &&
35
+ trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
36
+ ) {
37
+ // Function detection
38
+ tokenizedCode . push ( CodeTokens . FUNC ) ;
39
+ const numberOfParams =
40
+ CodeTokenizer . numberOfFunctionParameters ( trimedLine ) ;
41
+ tokenizedCode = CodeTokenizer . pushTokensInTab (
42
+ tokenizedCode ,
43
+ CodeTokens . FUNCPARAM ,
44
+ numberOfParams ,
45
+ ) ;
46
+
47
+ // For line
48
+ } else if (
49
+ trimedLine . startsWith ( languageIdentifier . forIdentifier ( ) ) &&
50
+ trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
51
+ ) {
52
+ tokenizedCode . push ( CodeTokens . FOR ) ;
53
+ // While line
54
+ } else if (
55
+ trimedLine . startsWith ( languageIdentifier . whileIdentifier ( ) ) &&
56
+ trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
57
+ ) {
58
+ tokenizedCode . push ( CodeTokens . WHILE ) ;
59
+ // If line
60
+ } else if (
61
+ trimedLine . startsWith ( languageIdentifier . ifIdentifier ( ) ) &&
62
+ trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
63
+ ) {
64
+ tokenizedCode . push ( CodeTokens . IF ) ;
65
+ const andOccurrences = CodeTokenizer . numberTokenPresentInLine (
66
+ languageIdentifier . andIdentifier ( ) ,
67
+ trimedLine ,
68
+ ) ;
69
+ tokenizedCode = CodeTokenizer . pushTokensInTab (
70
+ tokenizedCode ,
71
+ CodeTokens . AND ,
72
+ andOccurrences ,
73
+ ) ;
40
74
41
- // For line
42
- } else if (
43
- trimedLine . startsWith ( languageIdentifier . forIdentifier ( ) ) &&
44
- trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
45
- ) {
46
- tokenizedCode . push ( CodeTokens . FOR ) ;
47
- // While line
48
- } else if (
49
- trimedLine . startsWith ( languageIdentifier . whileIdentifier ( ) ) &&
50
- trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
51
- ) {
52
- tokenizedCode . push ( CodeTokens . WHILE ) ;
53
- // If line
54
- } else if (
55
- trimedLine . startsWith ( languageIdentifier . ifIdentifier ( ) ) &&
56
- trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
57
- ) {
58
- tokenizedCode . push ( CodeTokens . IF ) ;
59
- const andOccurrences = CodeTokenizer . numberTokenPresentInLine (
60
- languageIdentifier . andIdentifier ( ) ,
61
- trimedLine ,
62
- ) ;
63
- tokenizedCode = CodeTokenizer . pushTokensInTab (
64
- tokenizedCode ,
65
- CodeTokens . AND ,
66
- andOccurrences ,
67
- ) ;
75
+ const orOccurrences = CodeTokenizer . numberTokenPresentInLine (
76
+ languageIdentifier . orIdentifier ( ) ,
77
+ trimedLine ,
78
+ ) ;
79
+ tokenizedCode = CodeTokenizer . pushTokensInTab (
80
+ tokenizedCode ,
81
+ CodeTokens . OR ,
82
+ orOccurrences ,
83
+ ) ;
68
84
69
- const orOccurrences = CodeTokenizer . numberTokenPresentInLine (
70
- languageIdentifier . orIdentifier ( ) ,
71
- trimedLine ,
72
- ) ;
73
- tokenizedCode = CodeTokenizer . pushTokensInTab (
74
- tokenizedCode ,
75
- CodeTokens . OR ,
76
- orOccurrences ,
77
- ) ;
85
+ // Case line
86
+ } else if (
87
+ trimedLine . startsWith ( languageIdentifier . caseIdentifier ( ) ) &&
88
+ trimedLine . endsWith ( ':' )
89
+ ) {
90
+ tokenizedCode . push ( CodeTokens . CASE ) ;
91
+ // Other lines
92
+ } else {
93
+ tokenizedCode . push ( CodeTokens . LINE ) ;
94
+ }
78
95
79
- // Case line
80
- } else if (
81
- trimedLine . startsWith ( languageIdentifier . caseIdentifier ( ) ) &&
82
- trimedLine . endsWith ( languageIdentifier . endLoopAndCondIdentifier ( ) )
83
- ) {
84
- tokenizedCode . push ( CodeTokens . CASE ) ;
85
- // Other lines
86
- } else {
87
- tokenizedCode . push ( CodeTokens . LINE ) ;
88
- }
96
+ console . log ( trimedLine ) ;
89
97
} ) ;
90
98
91
99
console . log ( tokenizedCode ) ;
@@ -95,7 +103,8 @@ export class CodeTokenizer {
95
103
96
104
private static numberOfFunctionParameters ( line : string ) : number {
97
105
const attributes = line . substring ( line . indexOf ( '(' ) , line . indexOf ( ')' ) ) ;
98
- return this . numberTokenPresentInLine ( ',' , attributes ) + 1 ;
106
+ const attribNb = this . numberTokenPresentInLine ( ',' , attributes ) ;
107
+ return attribNb === 0 ? 0 : attribNb + 1 ;
99
108
}
100
109
101
110
private static numberTokenPresentInLine ( token : string , line : string ) : number {
0 commit comments