/* Define a structure type */
struct mystruct_st {
int id; /* 4 bytes */
char name[16]; /* 16 bytes */
};
int main() {
/* Declare and initialize a struct */
struct mystruct_st person;
person.id = 12345;
strncpy(person.name, "Alice", 16);
/* Struct with initializer */
struct mystruct_st another = {67890, "Bob"};
return 0;
}
```
---
## Unions
```c
/* Union - all members share the same memory location */
union number {
int x; /* 4 bytes */
float y; /* 4 bytes, same location as x */
};
int main() {
union number n;
n.x = 42;
printf("As int: %d\n", n.x);
n.y = 3.14f;
printf("As float: %f\n", n.y);
/* n.x now contains garbage (float bits as int) */
return 0;
}
```
Key difference: Struct members have separate memory; union members share the same location.
---
## Memory Layout
flowchart TB
subgraph Memory["Virtual Memory Layout"]
K["Kernel Space
(inaccessible)"]
S["Stack
(local vars, function calls)
Grows downward ↓"]
H["Heap
(malloc/free)
Grows upward ↑"]
G["Global/Static Data"]
C["Code (Text)
(program instructions)"]
end
---
## Pointers
A **pointer** is a variable that holds a memory address.
```c
int main() {
int x = 42; /* Regular integer variable */
int *p; /* Pointer to an integer */
p = &x; /* & = "address of" */
printf("Value of x: %d\n", x); /* 42 */
printf("Address of x: %p\n", (void*)&x); /* 0x7ffd... */
printf("Value of p: %p\n", (void*)p); /* same address */
printf("Value at p: %d\n", *p); /* 42 (* = dereference) */
return 0;
}
```
---
## Pointer Operators
| Operator | Name | Purpose |
|----------|------|---------|
| `&` | Address-of | Get the memory address of a variable |
| `*` | Dereference | Access the value at a memory address |
```c
int x = 42;
int *p = &x; /* p holds address of x */
int y = *p; /* y gets value at address p (42) */
*p = 100; /* changes x to 100 */
```
---
## Strings as Character Arrays
In C, strings are arrays of characters terminated by `'\0'`:
```
+---+---+---+---+---+----+
| 1 | 2 | 3 | + | 4 | \0 |
+---+---+---+---+---+----+
^ ^
| |
s end
```
```c
char str[] = "123+4";
printf("Length: %zu\n", strlen(str)); /* 5 (not counting \0) */
printf("Size: %zu\n", sizeof(str)); /* 6 (including \0) */
```
---
## Scanner Data Structures
```c
#define SCAN_TOKEN_LEN 33
#define SCAN_TOKEN_TABLE_LEN 1024
/* Token types */
enum scan_token_enum {
TK_INTLIT, /* Integer literal */
TK_PLUS, /* + */
TK_MINUS, /* - */
TK_EOT, /* End of text */
/* ... more types ... */
};
/* Individual token structure */
struct scan_token_st {
enum scan_token_enum id; /* Token type */
char value[SCAN_TOKEN_LEN]; /* Token text */
};
```
---
## Token Table
```c
/* Table of all scanned tokens */
struct scan_table_st {
struct scan_token_st table[SCAN_TOKEN_TABLE_LEN];
int len; /* Number of tokens */
int next; /* Next token for parser */
};
```
After scanning `"1 + 2"`:
```
+--------+--------+--------+--------+
| INTLIT | PLUS | INTLIT | EOT |
| "1" | "+" | "2" | "" |
+--------+--------+--------+--------+
```
---
## Helper Functions
```c
#include
/* Check if character is a digit (0-9) */
bool scan_is_digit(char c) {
return c >= '0' && c <= '9';
}
/* Check if character is a hex digit */
bool scan_is_hexdigit(char c) {
return scan_is_digit(c) ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
}
/* Check if character is whitespace */
bool scan_is_whitespace(char c) {
return (c == ' ' || c == '\t');
}
```
---
## Scanning Integer Literals
```c
/* Scan an integer literal: digit (digit)* */
char *scan_int(char *p, char *end, struct scan_token_st *tp) {
int i = 0;
/* Read digits until non-digit or end */
while (scan_is_digit(*p) && p < end) {
tp->value[i] = *p;
p += 1;
i += 1;
}
tp->value[i] = '\0'; /* Null-terminate */
tp->id = TK_INTLIT;
return p;
}
```
---
## Reading Single Tokens
```c
/* Read a token of known length */
char *scan_read_token(struct scan_token_st *tp,
char *p, int len,
enum scan_token_enum id) {
int i;
tp->id = id;
for (i = 0; i < len; i++) {
tp->value[i] = *p;
p += 1;
}
tp->value[i] = '\0';
return p;
}
```
---
## Main Scan Function (Part 1)
```c
char *scan_token(char *p, char *end, struct scan_token_st *tp) {
if (p == end) {
/* End of input */
p = scan_read_token(tp, p, 0, TK_EOT);
} else if (scan_is_whitespace(*p)) {
/* Skip whitespace */
while (scan_is_whitespace(*p) && (p < end)) {
p += 1;
}
p = scan_token(p, end, tp);
} else if (*p == '0' && *(p + 1) == 'x') {
/* Hexadecimal literal */
p = scan_hex(p + 2, end, tp);
} else if (*p == '0' && *(p + 1) == 'b') {
/* Binary literal */
p = scan_bin(p + 2, end, tp);
}
/* continued... */
```
---
## Main Scan Function (Part 2)
```c
} else if (scan_is_digit(*p)) {
p = scan_int(p, end, tp); /* Decimal integer */
} else if (*p == '+') {
p = scan_read_token(tp, p, 1, TK_PLUS);
} else if (*p == '-') {
p = scan_read_token(tp, p, 1, TK_MINUS);
} else if (*p == '*') {
p = scan_read_token(tp, p, 1, TK_MULT);
} else if (*p == '>' && *(p + 1) == '>') {
p = scan_read_token(tp, p, 2, TK_LSR); /* Two-char */
} else {
printf("scan_error: Invalid char '%c'\n", *p);
exit(-1);
}
return p;
}
```
---
## Scanning All Tokens
```c
void scan_table_scan(struct scan_table_st *st,
char *input, int len) {
struct scan_token_st *tp;
char *p = input;
char *end = p + len;
while (true) {
tp = &st->table[st->len];
st->len += 1;
p = scan_token(p, end, tp);
if (tp->id == TK_EOT) {
break;
}
}
}
```
---
## Complete Example
```c
int main(int argc, char **argv) {
struct scan_table_st scan_table;
char *input = "10 + 0xFF * 0b1010";
int len = strlen(input);
scan_table.len = 0;
scan_table.next = 0;
scan_table_scan(&scan_table, input, len);
/* Print all tokens */
for (int i = 0; i < scan_table.len; i++) {
struct scan_token_st *tp = &scan_table.table[i];
printf("[%d] type=%d value=\"%s\"\n", i, tp->id, tp->value);
}
return 0;
}
```
---
## Output
```text
Input: "10 + 0xFF * 0b1010"
Tokens:
[0] type=0 value="10"
[1] type=5 value="+"
[2] type=1 value="FF"
[3] type=7 value="*"
[4] type=2 value="1010"
[5] type=16 value=""
```
---
## Key Concepts
| Concept | Description |
|---------|-------------|
| Pointer | Variable storing a memory address |
| `&` operator | Gets address of a variable |
| `*` operator | Dereferences (accesses value at address) |
| `struct` | Groups related data together |
| `enum` | Named integer constants |
| Token | Type + value from scanner |
---
## Summary
1. **C Compilation**: Source → Preprocessing → Compilation → Linking → Executable
2. **Memory**: Stack (local vars), Heap (dynamic), Global data, Code
3. **Pointers**: `&` gets address, `*` dereferences
4. **Scanner**: Converts characters to tokens using char pointer traversal
5. **Pattern**: Check character, consume it, advance pointer