2014-12-05 50 views
1

我試圖複製一個大的字符串像"https://cdn.sstatic.net/stackexchange/img/logos/so/so-logo.png"像url只有url和NULL終止符https://cdn.sstatic.net/stackexchange/img/logos/so/so-logo.png\0更小的數組,但我越來越垃圾在陣列後面。在C中複製數組,並擺脫字符數組中的垃圾

的問題應該在這裏:

pos2 = 0; 
while (found == 0) 
{ 
    c = fgetc (fp); // get char from file 
    link[pos2] = c; 
    if (link[pos2-1] == 'g' && link[pos2] == '\"') // png and jpg files 
    { 
    found = 1; 
    } 
    ++pos2; 
} 
--pos2; 
found = 0; 
char link2[pos2]; 
for (j = 1; j < pos2; ++j) 
{ 
    link2[j-1] = link[j]; 
} 
link2[j] = '\0'; 
//sprintf(cmd, "wget -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", ++num, link2); 
//system(cmd); 
printf("%s\n", link2);// checking 
sleep(1); 

下面是代碼:

#include <stdio.h> 
#include <stdlib.h> // for using system calls 
#include <unistd.h> // for sleep 

int main() 
{ 
    char body[] = "forum-post-body-content", notes[] = "p-comment-notes", img[] = "img src=", link[200], cmd[200]={0}, file[10]; 
    int c, pos = 0, pos2 = 0, fin = 0, i, j, num = 0, found = 0; 
    FILE *fp; 

    for (i = 1; i < 500; ++i) 
    { 
     sprintf(cmd,"wget -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'",i,i); 
     system(cmd); 
     sprintf(file, "page%d.txt", i); 
     fp = fopen (file, "r"); 
     while ((c = fgetc(fp)) != EOF) 
     { 
      if (body[pos] == c) 
      { 
       if (pos == 22) 
       { 
        pos = 0; 
        while (fin == 0) 
        { 
         c = fgetc (fp); 
         if (feof (fp)) 
          break; 
         if (notes[pos] == c) 
         { 
          if (pos == 14) 
          { 
           fin = 1; 
           pos = -1; 
          } 
          ++pos; 
         } 
         else 
         { 
          if(pos > 0) 
           pos = 0; 
         } 
         if (img[pos2] == c) 
         { 
          if (pos2 == 7) 
          { 
           pos2 = 0; 
           while (found == 0) 
           { 
            c = fgetc (fp); // get char from file 
            link[pos2] = c; 
            if (pos2 > 0) 
            { 
             if(link[pos2-1] == 'g' && link[pos2] == '\"') 
             { 
             found = 1; 
             } 
            } 
            ++pos2; 
           } 
           --pos2; 
           found = 0; 
           char link2[pos2]; 
           for (j = 1; j < pos2; ++j) 
           { 
            link2[j - 1] = link[j]; 
           } 
           link2[j - 1] = '\0'; 
           //sprintf(cmd, "wget -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", ++num, link2); 
           //system(cmd); 
           printf("%s\n", link2);// checking 
           sleep(1); 
           pos2 = -1; 
          } 
          ++pos2; 
         } 
         else 
         { 
          if(pos2 > 0) 
           pos2 = 0; 
         } 
        } 
       fin = 0; 
       } 
       ++pos; 
      } 
      else 
       pos = 0; 
     } 
     // closing file 
     fclose (fp); 
     if (remove (file)) 
     { 
      fprintf(stderr, "Can't remove file\n"); 
     } 
    } 
} 
+1

在使用它之前將數組memset爲全零。 – 2014-12-05 16:11:20

+1

你以'pos2 = 0'開始並訪問'link [pos2-1]',這是超出界限和未定義的行爲。 – mch 2014-12-05 16:13:09

+0

在慣用的C中,你的循環開始'while(1){c = fgetc(fp); if(feof(fp))break;'會被寫入while((c = fgetc(fp))!= EOF){'。然而,你寫的東西聽起來不錯,儘管使用'feof()'是一種現在打破的習慣,而不是以後。 – 2014-12-05 16:16:51

回答

0

立即解決問題似乎是因爲這一行:

link2[j] = '\0'; 

這應該是:

link2[j-1] = '\0'; 

(與循環內的索引一致)。

此外,正如@mch在註釋中指出的那樣,訪問link[pos2-1]以獲得while循環的第一個迭代將訪問數組的邊界外的項。

快速掃描剩下的代碼,似乎還存在一些潛在的問題,所以您可能想仔細看看整個代碼。

1

鑑於聲明:

char cmd[50]; 

這一行不好:

sprintf(cmd,"wget -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'",i,i); 

您試圖將超過120個字符打包到50字節的緩衝區中;這將無法可靠地工作。

0

你必須讀出界這裏的另一個發生:

pos2 = 0;   /* pos2 = 0 */ 
while (found == 0) 
{ 
    c = fgetc (fp); 
    link[pos2] = c; 
    if (link[pos2-1] == 'g' && link[pos2] == '\"') /* reading pos2 - 1 (bad) */ 

,特別要注意所有的數組索引。如果您願意,您很少想要轉移索引(即i-1等),您必須驗證您的索引範圍不會嘗試讀取界外