空のフィールドをサポートしながら、sscanfを使用してコンマ区切りの文字列でフィールドを解析するにはどうすればよいですか？

Question

空のフィールドが含まれている可能性のあるカンマ区切りの文字列があります。例えば：

1,2,,4

基本的な使用

sscanf(string,"%[^,],%[^,],%[^,],%[^,],%[^,]", &val1, &val2, &val3, &val4);

空のフィールドの前にすべての値を取得し、空のフィールド以降で予期しない結果が発生します。

空のフィールドの式をsscanf（）から削除すると、

sscanf(string,"%[^,],%[^,],,%[^,],%[^,]", &val1, &val2, &val3, &val4);

すべてがうまくいきます。

空のフィールドをいつ取得するかわからないので、空のフィールドをエレガントに処理するように式を書き直す方法はありますか？

ChrisF · Accepted Answer

カンマを区切り文字としてstrtokを使用すると、1つ以上の文字列のリストが取得され、そのうちの1つ以上がnull /ゼロの長さになります。

詳細については、私のここで回答をご覧ください。

Sinan &#220;n&#252;r · Answer

man sscanf ：

[指定された受け入れられた文字のセットからのnonempty文字シーケンスに一致します。

（強調が追加されました）。

xerostomus · Answer

正しく動作するように、このコードを少し変更する必要がありました。

//rm token_pure;gcc -Wall -O3 -o token_pure token_pure.c; ./token_pure #include <stdio.h> #include <string.h> int main () { char str[] = " 1 , 2 x, , 4 "; char *s1; char *s2; s2=(void*)&str; //this is here to avoid warning of assignment from incompatible pointer type do { while( *s2 == ' ' || *s2 == '	' ) s2++; s1 = strsep( &s2, "," ); if( !*s1 ){ printf("val: (empty)
" ); } else{ int val; char ch; int ret = sscanf( s1, " %i %c", &val, &ch ); if( ret != 1 ){ printf("val: (syntax error)
" ); } else{ printf("val: %i
", val ); } } } while (s2!=0 ); return 0; }

および出力：

val: 1 val: (syntax error) val: (empty) val: 4

Jonathan Leffler · Answer

これは、現在CSV値を扱っているようです。引用符で囲まれた文字列を処理するように拡張する必要がある場合（たとえば、フィールドにコンマを含めることができるようにするため）、scanf-ファミリは形式のすべての複雑さを処理できないことがわかります。したがって、CSV形式（のバリアント）を処理するために特別に設計されたコードを使用する必要があります。

セットCSVライブラリの実装については、CおよびC++の ' プログラミングの実践 '-にあります。間違いなく他にもたくさんあります。

sambowry · Answer

これは、コンマで区切られたint値をスキャンする私のバージョンです。このコードは、空のフィールドと非整数フィールドを検出します。

#include <stdio.h> #include <string.h> int main(){ char str[] = " 1 , 2 x, , 4 "; printf("str: '%s'
", str ); for( char *s2 = str; s2; ){ while( *s2 == ' ' || *s2 == '	' ) s2++; char *s1 = strsep( &s2, "," ); if( !*s1 ){ printf("val: (empty)
" ); } else{ int val; char ch; int ret = sscanf( s1, " %i %c", &val, &ch ); if( ret != 1 ){ printf("val: (syntax error)
" ); } else{ printf("val: %i
", val ); } } } return 0; }

結果：

str: ' 1 , 2 x, , 4 ' val: 1 val: (syntax error) val: (empty) val: 4

Sashank Bhogu · Answer

いくつかのstrtok（）の問題ここにリストされています： http://benpfaff.org/writings/clc/strtok.html

したがって、strtokを回避することをお勧めします。

ここで、空のフィールドを含む文字列を次のように考えます。

char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here

文字列をCSV形式に変換してfloat配列に読み取ることができる単純な関数：

int strCSV2Float(float *strFloatArray , char *myCSVStringing);

以下の使用法を見つけてください。

#include <stdio.h> #include <stdlib.h> int strCSV2Float(float *strFloatArray , char *myCSVStringing); void main() { char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here float floatArr[10]; // specify size here int totalValues = 0; printf("myCSVString == %s 
",&myCSVString[0]); totalValues = strCSV2Float(&floatArr[0] , &myCSVString[0]); // call the function here int floatValueCount = 0; for (floatValueCount = 0 ; floatValueCount < totalValues ; floatValueCount++) { printf("floatArr[%d] = %f
",floatValueCount , floatArr[floatValueCount]); } } int strCSV2Float(float *strFloatArray , char *myCSVStringing) { int strLen = 0; int commaCount =0; // count the number of commas int commaCountOld =0; // count the number of commas int wordEndChar = 0; int wordStartChar = -1; int wordLength =0; for(strLen=0; myCSVStringing[strLen] != '\0'; strLen++) // first get the string length { if ( (myCSVStringing[strLen] == ',') || ( myCSVStringing[strLen+1] == '\0' )) { commaCount++; wordEndChar = strLen; } if ( (commaCount - commaCountOld) > 0 ) { int aIter =0; wordLength = (wordEndChar - wordStartChar); char Word[55] = ""; for (aIter = 0; aIter < wordLength; aIter++) { Word[aIter] = myCSVStringing[strLen-wordLength+aIter+1]; } if (Word[aIter-1] == ',') Word[aIter-1] = '\0'; // printf("
"); Word[wordLength] = '\0'; strFloatArray[commaCount-1] = atof(&Word[0]); wordLength = 0; wordStartChar = wordEndChar; commaCountOld = commaCount; } } return commaCount; }

出力は次のとおりです：

myCSVString == -1.4,2.6,,-0.24,1.26 floatArr[0] = -1.400000 floatArr[1] = 2.600000 floatArr[2] = 0.000000 floatArr[3] = -0.240000 floatArr[4] = 1.260000

dmcontador · Answer

私は同じ質問に対する答えを探してここに到着しました。私もscanf関数を残したくありませんでした。最後に、私は自分でzsscanfを作成し、フォーマットを解析し、すべてのデータを1つずつsscanfして、sscanfの戻り値をチェックして、空の読み取りが行われたかどうかを確認しました。これはやや私の特定のケースでした。フィールドの一部だけが必要で、一部は空で、セパレーターを想定できませんでした。

#include <stdarg.h> #include <stdio.h> int zsscanf(char *data, char *format, ...) { va_list argp; va_start(argp, format); int fptr = 0, sptr = 0, iptr = 0, isptr = 0, ok, saved = 0; char def[32]; while (1) { if (format[fptr] != '%') { ok = sscanf(&format[fptr], "%28[^%]%n", def, &iptr); if (!ok) break; fptr += iptr; def[iptr] = '%'; def[iptr+1] = 'n'; def[iptr+2] = 0; ok = sscanf(&data[sptr], def, &isptr); if (!ok) break; sptr += isptr; } else if (format[fptr+1] == '%') { if (data[sptr] == '%') { fptr += 2; sptr += 1; } else { ok = -1; break; } } else { void *savehere = NULL; ok = sscanf(&format[fptr], "%%%28[^%]%n", &def[1], &iptr); if (!ok) break; fptr += iptr; def[0] = '%'; def[iptr] = '%'; def[iptr+1] = 'n'; def[iptr+2] = 0; isptr = 0; if (def[1] != '*') { savehere = va_arg(argp, void*); ok = sscanf(&data[sptr], def, savehere, &isptr); if (ok == 0 && isptr == 0) { // Let's assume only char types. Won't hurt in other cases. ((char*)savehere)[0] = 0; ok = 1; } if (ok > 0) { saved++; } } else { ok = sscanf(&data[sptr], def, &isptr) == 0; } if (ok < 0) break; sptr += isptr; } } va_end(argp); return saved == 0 ? ok : saved; } int main() { char *format = "%15[^	;,]%*1[	;,]" // NameId "%*[^	;,]%*1[	;,]" // Name "%*[^	;,]%*1[	;,]" // Abbreviation "%*[^	;,]%*1[	;,]" // Description "%31[^	;,]"; // Electrical Line char nameId[16]; char elect[32]; char *line1 = "TVC-CCTV-0002	TVC-CCTV-0002	TVC-CCTV-0002	CCTV DOMO CAMERA 21-32-29	ELECTRICAL_TopoLine_823	foo	bar"; char *line2 = "TVC-CCTV-0000;;;;;foo;bar;"; int ok = zsscanf(line1, format, nameId, elect); printf ("%d: |%s|%s|
", ok, nameId, elect); ok = zsscanf(line2, format, nameId, elect); printf ("%d: |%s|%s|
", ok, nameId, elect); return 0; }

出力：

 2: |TVC-CCTV-0002|ELECTRICAL_TopoLine_823| 2: |TVC-CCTV-0000||

注意してください、それは完全にテストされておらず、厳しい制限があります（最も明白なもの：%...s、%...c、%...[...]のみを受け入れ、%...[...]としてセパレータを必要とします;そうでなければ私は本当にフォーマット文字列を気にする必要がありました。このように私は%だけを気にします）。

hanoo · Answer

読み取りをスキップするには、「％」の後に「*」を付けます。さらに、たとえば「％3s」を示す3文字のみを読み取ることができます。