Storage and Buffer Manager

Data structure

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define MAXPAGES 100000
#define FRAMESIZE 4096
#define PAGESIZE 4096
#define DEFBUFSIZE 1024

struct bFrame
{
char field[FRAMESIZE];
};

struct BCB
{
int page_id;
int frame_id;
int count; // number of times the page is accessed (incomplete)
int dirty;
BCB* nextBCB;
BCB* nextLRU;
BCB* prevLRU;
BCB() {
page_id = -1;
frame_id = -1;
count = 0;
dirty = 0;
nextBCB = NULL;
nextLRU = NULL;
prevLRU = NULL;
}
BCB(int page_id, int frame_id, int count, int dirty, BCB* nextBCB, BCB* nextLRU, BCB* prevLRU) {
this->page_id = page_id;
this->frame_id = frame_id;
this->count = count;
this->dirty = dirty;
this->nextBCB = nextBCB;
this->nextLRU = nextLRU;
this->prevLRU = prevLRU;
}
};

struct Page
{
int page_id;
int frame_id;
Page() {
page_id = -1;
frame_id = -1;
}
};

Buffer Manager

  • FixPage(int page_id, int cmd)

    该函数查看所需 page 是否已经在 buffer 中,若存在返回它对应的 frame_id。若该页不在内存,该函数选择一个 victim page,当一个被选中的页面 dirty 位被标记,需要将选中页面写回磁盘。并且若有需要加载所需 page。更新 LRU 队列,pageframe 之间的 hash table

  • FixNewPage()

    当产生插入,目录分割或创建对象等操作时,该函数被调用,用来产生一个新的 page 及对应的 frame。返回一个 page_idframe_id。更新 LRU 队列,pageframe 之间的 hash table

  • UnfixPage(int page_id)

    该函数是与 FixPageFixNewPage 配合使用。这个函数每次将 frame 上的 fix_count1。 当数量为 0 时,对 page 的占用就被解除,若被选中 frame 可以被移除。page_id 可以被转化成 frame_id 并且它可以被解除占用。当一个页面上的 fix_count 等于 0 时,该页面可以被选为 victim page

  • NumFreeFrames()

    查看 buffer 返回空闲可用的 buffer page 数量。

  • SelectVictim()

    选择一个空的 frame 进行替换。若没有则调用 **SelectLRUVictim()**。

  • SelectLRUVictim()

    LRU 队列中选择 frame 进行替换。

  • Hash(int page_id)

    该函数使用 page_id 作为参数并返回 frame id

  • RemoveBCB(int page_id)

    该函数从队列里移除 page_id 对应的 BCB。只有在 SelectVictim() 函数需要替换一个 frame 时才被调用。

  • SetDirty(int frame_id)

    frame_id 设置 dirty 位。dirty 位用于指示是否需要写回 frame。一个 frame 若被修改过,则需要被写回。

  • UnsetDirty(int frame_id)

    该函数将对应 frame_iddirty 位设置为 0。当调用了 setDirty() 函数但,实际上该页只是临时改动,不需要被写回,也不想被存储到磁盘,调用 UnsetDirty 函数。

  • WriteDirtys()

    该函数在系统结束前必须被调用。函数的目的是写回 buffer 中的所有 dirty_bit1page 到磁盘。

  • PrintFrame(int frame_id)

    打印 frame 内容。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
class BMgr
{
public:
BMgr(DSMgr& dsmgr) {
memset(ftop, -1, sizeof(ftop));
memset(ptof, NULL, sizeof(ptof));
lru_head = NULL;
lru_tail = NULL;
ds = &dsmgr;
hit_cnt = 0;
}
// Interface functions
int FixPage(int page_id, int cmd) {
int frame_id = -1;
int hash_id = Hash(page_id);
BCB* bcb = ptof[hash_id];
while (bcb != NULL) {
if (bcb->page_id == page_id) {
frame_id = bcb->frame_id;
break;
}
bcb = bcb->nextBCB;
}
if (frame_id == -1) { // page not in buffer
frame_id = SelectVictim(); // choose a frame to replace
if (ftop[frame_id] != -1) { // frame is occupied
BCB tmp = RemoveBCB(ftop[frame_id]);
if (tmp.dirty == 1) { // write back if dirty
ds->WritePage(tmp.page_id, buf[tmp.frame_id]);
}
}
buf[frame_id] = ds->ReadPage(page_id);
}
else { // page in buffer
RemoveBCB(page_id);
hit_cnt++;
}
// insert into LRU
BCB* newbcb = new BCB(page_id, frame_id, 0, 0, ptof[hash_id], lru_head, NULL);
newbcb->count++;
lru_head = newbcb;
if (lru_tail == NULL) { // initiate lru at first time
lru_tail = newbcb;
}
if (cmd == 1) { // if write, set dirty
newbcb->dirty = 1;
}
// update hash table
ptof[hash_id] = newbcb;
ftop[frame_id] = page_id;
return frame_id;
}
Page FixNewPage() {
Page page;
for (int i = 0; i < MAXPAGES; i++) {
if (!ds->GetUse(i)) {
ds->SetUse(i, 1);
page.page_id = i;
break;
}
}
int page_id = page.page_id;
int hash_id = Hash(page_id);
int frame_id = SelectVictim();
if (ftop[frame_id] != -1) { // frame is occupied
BCB tmp = RemoveBCB(ftop[frame_id]);
if (tmp.dirty == 1) { // write back if dirty
ds->WritePage(tmp.page_id, buf[tmp.frame_id]);
}
}
page.frame_id = frame_id;
BCB* newbcb = new BCB(page_id, frame_id, 0, 0, NULL, lru_head, NULL);
lru_head = newbcb;
if (lru_tail == NULL) { // initiate lru at first time
lru_tail = newbcb;
}
// update hash table
ptof[hash_id] = newbcb;
ftop[frame_id] = page_id;
return page;
}
int UnfixPage(int page_id) {
int hash_id = Hash(page_id);
BCB* bcb = ptof[hash_id];
while (bcb != NULL) {
if (bcb->page_id == page_id) {
break;
}
bcb = bcb->nextBCB;
}
bcb->count--;
return bcb->count;
}
int NumFreeFrames() {
int cnt = 0;
for (int i = 0; i < DEFBUFSIZE; i++) {
if (ftop[i] == -1) {
cnt++;
}
}
return cnt;
}
// Internal Functions
int SelectVictim() {
int frame_id = -1;
if (NumFreeFrames() > 0) {
for (int i = 0; i < DEFBUFSIZE; i++) {
if (ftop[i] == -1) {
frame_id = i;
break;
}
}
}
else {
frame_id = SelectLRUVictim();
}
return frame_id;
}
int SelectLRUVictim() {
BCB* tmp = lru_tail;
while (tmp->count > 0) {
tmp = tmp->prevLRU;
}
return tmp->frame_id;
}
int Hash(int page_id) {
return page_id % DEFBUFSIZE;
}
BCB RemoveBCB(int page_id) {
int hash_id = Hash(page_id);
BCB* tmp = new BCB();
BCB* bcb = ptof[hash_id];
if (bcb->page_id == page_id) {
tmp = bcb;
bcb = bcb->nextBCB;
}
else {
while (bcb->nextBCB != NULL) {
if (bcb->nextBCB->page_id == page_id) {
tmp = bcb->nextBCB;
bcb->nextBCB = tmp->nextBCB;
break;
}
bcb = bcb->nextBCB;
}
}
if (tmp->nextLRU != NULL) {
tmp->nextLRU->prevLRU = tmp->prevLRU;
}
else { // the last one of LRU
lru_tail = tmp->prevLRU;
}
if (tmp->prevLRU != NULL) {
tmp->prevLRU->nextLRU = tmp->nextLRU;
}
else { // the first one of LRU
lru_head = tmp->nextLRU;
}
ptof[hash_id] = bcb;
BCB ret = *tmp;
delete tmp;
return ret;
}
void SetDirty(int frame_id) {
int page_id = ftop[frame_id];
BCB *bcb = ptof[Hash(page_id)];
while (bcb != NULL)
{
if (bcb->page_id == page_id)
{
bcb->dirty = 1;
break;
}
bcb = bcb->nextBCB;
}
}
void UnsetDirty(int frame_id) {
int page_id = ftop[frame_id];
BCB *bcb = ptof[Hash(page_id)];
while (bcb != NULL)
{
if (bcb->page_id == page_id)
{
bcb->dirty = 0;
break;
}
bcb = bcb->nextBCB;
}
}
void WriteDirtys() {
BCB* bcb = NULL;
for (int i = 0; i < DEFBUFSIZE; i++) {
bcb = ptof[i];
while (bcb != NULL) {
if (bcb->dirty == 1) {
ds->WritePage(bcb->page_id, buf[bcb->frame_id]);
}
bcb = bcb->nextBCB;
}
}
}
void WriteBuffer(int frame_id, bFrame& frm) {
buf[frame_id] = frm;
}
void PrintFrame(int frame_id) {
cout<<buf[frame_id].field<<endl;
}
public:
int hit_cnt; // hit count
private:
// Hash Table
int ftop[DEFBUFSIZE]; // frame to page
BCB* ptof[DEFBUFSIZE]; // page to frame
DSMgr* ds; // data storage manager
bFrame buf[DEFBUFSIZE]; // buffer pool
BCB* lru_head; // recently used
BCB* lru_tail; // least recently used
};

Data Storage Manager

  • OpenFile(string filename)

  • CloseFile()

  • ReadPage(int page_id)

    Buffer Manager 中,ReadPage 函数被 FixPage 调用。返回值为它读取的内容。

  • WritePage(int frame_id, bFrame frm)

    当一个 pagebuffer 中取出时,调用 WritePage 函数。

  • Seek(int offset, int pos)

    移动文件指针

  • GetFile()

  • IncNumPages()

  • GetNumPages()

  • SetUse(int page_id, int use_bit)

    该函数在 page 数组中查找 set 位。这个数组用于记录 page 使用情况的变化。如果这个 page 中所有的记录都被删除,该页面即不再被使用。为了了解一个页面是否被重复使用过,该数组所有的 use_bit 位都需要被检查是否被置为 0FixNewPage 先检查数组的 use_bit 是否被置为 0。如果找到 1,说明 page 已经被重用。否则需要分配一个新的 page

  • GetUse(int page_id)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class DSMgr
{
public:
DSMgr() {
currFile = NULL;
int i = 0;
iCnt = 0;
oCnt = 0;
numPages = 0;
memset(pages, 0, 100000 * sizeof(bool));
}
~DSMgr() {
if (currFile != NULL) {
fclose(currFile);
}
}
int OpenFile(string filename) {
currFile = fopen(filename.c_str(), "r+");
if (currFile == NULL) {
currFile = fopen(filename.c_str(), "w+");
if (currFile == NULL) {
return 0;
}
}
// set page_use and numPages
fseek(currFile, 0, SEEK_END);
long fileSize = ftell(currFile);
int tmp_numPages = numPages;
numPages += (fileSize + PAGESIZE - 1) / PAGESIZE; // round up
memset(&pages[tmp_numPages], 1, numPages - tmp_numPages); // set use bits
return 1;
}
int CloseFile() {
if (currFile == NULL) {
return 0;
}
fclose(currFile);
currFile = NULL;
return 1;
}
// read a page from the file
bFrame ReadPage(int page_id) {
bFrame frm;
if (currFile == NULL) {
return frm;
}
fseek(currFile, page_id * PAGESIZE, SEEK_SET);
fread(frm.field, sizeof(char), PAGESIZE, currFile);
iCnt++;
return frm;
}
// write a page to the file
int WritePage(int page_id, bFrame& frm) {
if (currFile == NULL) {
return 0;
}
fseek(currFile, page_id * PAGESIZE, SEEK_SET);
fwrite(frm.field, sizeof(char), PAGESIZE, currFile);
oCnt++;
return 1;
}
// seek to the page_id
int Seek(int offset, int pos) {
if (currFile == NULL) {
return 0;
}
fseek(currFile, offset, pos);
return 1;
}
FILE* GetFile() {
return currFile;
}
void IncNumPages() {
numPages++;
}
int GetNumPages() {
return numPages;
}
void SetUse(int index, int use_bit) {
pages[index] = use_bit;
}
int GetUse(int index) {
return pages[index];
}
public:
int iCnt;
int oCnt;
private:
FILE* currFile;
int numPages; // number of pages in the file
bool pages[MAXPAGES]; // array of use bits
};

Main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
void generateData() {
FILE* fp = fopen("data.dbf", "w+");
char bu[50000];
memset(bu, 'a', sizeof(bu)); // 'a': raw data
for (int i = 0; i < 4096; i++) {
fwrite(bu, sizeof(char), sizeof(bu), fp);
}
fclose(fp);
}

int main()
{
//generateData(); // page 0~49999
// 'a': raw data
// 'b': modified data
// 'c': new data

// init
DSMgr* dsmgr = new DSMgr;
dsmgr->OpenFile("data.dbf");
BMgr* bmgr = new BMgr(*dsmgr);

/* ------------------write a new page------------------

Page page = bmgr->FixNewPage();
bmgr->FixPage(page.page_id, 1);
bFrame frm;
memset(frm.field, 'c', sizeof(frm.field));
bmgr->WriteBuffer(page.frame_id, frm);
bmgr->UnfixPage(page.page_id);

-----if succeed, cmd 1,50000 will read this page----- */

// read cmd
ifstream infile("data-5w-50w-zipf.txt");
int x = 0;
int y = -1;
string line;
string token;

// ------------------------ time ------------------------
// ------------------------------------------------------
auto beginTime = chrono::high_resolution_clock::now();
while (getline(infile, line)) {
istringstream iss(line);
if (std::getline(iss, token, ',')) {
x = std::stoi(token);
}
if (std::getline(iss, token, ',')) {
y = std::stoi(token);
}
int tmp_frame_id = bmgr->FixPage(y, x);
/*
read or write page
x=0: read
x=1: write
y: page_id
*/

/* -----------------------------------------------
if (x == 1) {
bFrame frm;
memset(frm.field, 'b', sizeof(frm.field));
bmgr->WriteBuffer(tmp_frame_id, frm);
}
-------------------------------------------------*/
bmgr->UnfixPage(y);
}
bmgr->WriteDirtys();
auto endTime = chrono::high_resolution_clock::now();
// ------------------------------------------------------
// ------------------------ time ------------------------

auto duration = chrono::duration_cast<chrono::milliseconds>(endTime - beginTime);
cout << "Time: " << duration.count() << " ms" << endl;
cout << "Read: " << dsmgr->iCnt << " times" << endl;
cout << "Write: " << dsmgr->oCnt << " times" << endl;
cout << "Hit: " << bmgr->hit_cnt << " times" << endl;
cout << "Hit Rate:" << bmgr->hit_cnt / 5000 << "%" << endl;
}

Input

1
2
3
4
5
500000 cmds
each line: x,y
x=0: read
x=1: write
y: page_id

Output