ValueError：サイズ2のシーケンスを次元4の配列軸にコピーできません

Question

このエラーの原因を誰かに説明してもらえますか？そしてそれはどういう意味ですか？どうすればそれを修正できますか？多分私の質問はとても一般的です！申し訳ありませんが、ここに何を追加すればよいかわかりません。：P

エラー：

 Traceback (most recent call last): File "C:	est\7.4.3.bench.py", line 9, in <module> print imagesearch.compute_ukbench_score(src,imlist[:100]) File "C:	est\imagesearch.py", line 168, in compute_ukbench_score pos[i] = [w[1]-1 for w in src.query(imlist[i])[:4]] File "C:	est\imagesearch.py", line 128, in query h = self.get_imhistogram(imname) File "C:	est\imagesearch.py", line 91, in get_imhistogram "select rowid from imlist where filename='%s'" % imname).fetchone() ValueError: cannot copy sequence with size 2 to array axis with dimension 4

これがimagesearch.pyです：

from numpy import * import pickle from pysqlite2 import dbapi2 as sqlite class Indexer(object): def __init__(self,db,voc): """ Initialize with the name of the database and a vocabulary object. """ self.con = sqlite.connect(db) self.voc = voc def __del__(self): self.con.close() def db_commit(self): self.con.commit() def get_id(self,imname): """ Get an entry id and add if not present. """ cur = self.con.execute( "select rowid from imlist where filename='%s'" % imname) res=cur.fetchone() if res==None: cur = self.con.execute( "insert into imlist(filename) values ('%s')" % imname) return cur.lastrowid else: return res[0] def is_indexed(self,imname): """ Returns True if imname has been indexed. """ im = self.con.execute("select rowid from imlist where filename='%s'" % imname).fetchone() return im != None def add_to_index(self,imname,descr): """ Take an image with feature descriptors, project on vocabulary and add to database. """ if self.is_indexed(imname): return print 'indexing', imname # get the imid imid = self.get_id(imname) # get the words imwords = self.voc.project(descr) nbr_words = imwords.shape[0] # link each Word to image for i in range(nbr_words): Word = imwords[i] # wordid is the Word number itself self.con.execute("insert into imwords(imid,wordid,vocname) values (?,?,?)", (imid,Word,self.voc.name)) # store Word histogram for image # use pickle to encode NumPy arrays as strings self.con.execute("insert into imhistograms(imid,histogram,vocname) values (?,?,?)", (imid,pickle.dumps(imwords),self.voc.name)) def create_tables(self): """ Create the database tables. """ self.con.execute('create table imlist(filename)') self.con.execute('create table imwords(imid,wordid,vocname)') self.con.execute('create table imhistograms(imid,histogram,vocname)') self.con.execute('create index im_idx on imlist(filename)') self.con.execute('create index wordid_idx on imwords(wordid)') self.con.execute('create index imid_idx on imwords(imid)') self.con.execute('create index imidhist_idx on imhistograms(imid)') self.db_commit() class Searcher(object): def __init__(self,db,voc): """ Initialize with the name of the database. """ self.con = sqlite.connect(db) self.voc = voc def __del__(self): self.con.close() def get_imhistogram(self,imname): """ Return the Word histogram for an image. """ im_id = self.con.execute( "select rowid from imlist where filename='%s'" % imname).fetchone() s = self.con.execute( "select histogram from imhistograms where rowid='%d'" % im_id).fetchone() # use pickle to decode NumPy arrays from string return pickle.loads(str(s[0])) def candidates_from_Word(self,imword): """ Get list of images containing imword. """ im_ids = self.con.execute( "select distinct imid from imwords where wordid=%d" % imword).fetchall() return [i[0] for i in im_ids] def candidates_from_histogram(self,imwords): """ Get list of images with similar words. """ # get the Word ids words = imwords.nonzero()[0] # find candidates candidates = [] for Word in words: c = self.candidates_from_Word(word) candidates+=c # take all unique words and reverse sort on occurrence tmp = [(w,candidates.count(w)) for w in set(candidates)] tmp.sort(cmp=lambda x,y:cmp(x[1],y[1])) tmp.reverse() # return sorted list, best matches first return [w[0] for w in tmp] def query(self,imname): """ Find a list of matching images for imname. """ h = self.get_imhistogram(imname) candidates = self.candidates_from_histogram(h) matchscores = [] for imid in candidates: # get the name cand_name = self.con.execute( "select filename from imlist where rowid=%d" % imid).fetchone() cand_h = self.get_imhistogram(cand_name) cand_dist = sqrt( sum( self.voc.idf*(h-cand_h)**2 ) ) matchscores.append( (cand_dist,imid) ) # return a sorted list of distances and database ids matchscores.sort() return matchscores def get_filename(self,imid): """ Return the filename for an image id. """ s = self.con.execute( "select filename from imlist where rowid='%d'" % imid).fetchone() return s[0] def tf_idf_dist(voc,v1,v2): v1 /= sum(v1) v2 /= sum(v2) return sqrt( sum( voc.idf*(v1-v2)**2 ) ) def compute_ukbench_score(src,imlist): """ Returns the average number of correct images on the top four results of queries. """ nbr_images = len(imlist) pos = zeros((nbr_images,4)) # get first four results for each image for i in range(nbr_images): pos[i] = [w[1]-1 for w in src.query(imlist[i])[:4]] # compute score and return average score = array([ (pos[i]//4)==(i//4) for i in range(nbr_images)])*1.0 return sum(score) / (nbr_images) # import PIL and pylab for plotting from PIL import Image from pylab import * def plot_results(src,res): """ Show images in result list 'res'. """ figure() nbr_results = len(res) for i in range(nbr_results): imname = src.get_filename(res[i]) subplot(1,nbr_results,i+1) imshow(array(Image.open(imname))) axis('off') show()

Ankur Agarwal · Accepted Answer

imagesearch.pyの168行目は問題があるように見えます：

 pos[i] = [ w[1]-1 for w in src.query(imlist[i]) [:4] ]

ここで発生している可能性があるのは、dbクエリが4項目未満の行を返していることです。そして、リスト内包表記で、それを4列のposの行に挿入しようとしています（したがって、そこにない4つの項目が必要です）。したがって、エラー：「ValueError：サイズ2のシーケンスを次元4の配列軸にコピーできません」

クエリステートメントのo/pを出力して、この仮説をテストします。本当に4つのアイテムがあるかどうかを確認してください。

そして、2つの項目が見つかった場合は、posへの割り当てを回避します。

l = [ w[1]-1 for w in src.query(imlist[i]) [:4] ] if len(l) == 4: pos[i] = l